diff options
458 files changed, 13810 insertions, 6643 deletions
diff --git a/Documentation/devicetree/bindings/arc/archs-idu-intc.txt b/Documentation/devicetree/bindings/arc/archs-idu-intc.txt new file mode 100644 index 000000000000..0dcb7c7d3e40 --- /dev/null +++ b/Documentation/devicetree/bindings/arc/archs-idu-intc.txt @@ -0,0 +1,46 @@ +* ARC-HS Interrupt Distribution Unit + + This optional 2nd level interrupt controller can be used in SMP configurations for + dynamic IRQ routing, load balancing of common/external IRQs towards core intc. + +Properties: + +- compatible: "snps,archs-idu-intc" +- interrupt-controller: This is an interrupt controller. +- interrupt-parent: <reference to parent core intc> +- #interrupt-cells: Must be <2>. +- interrupts: <...> specifies the upstream core irqs + + First cell specifies the "common" IRQ from peripheral to IDU + Second cell specifies the irq distribution mode to cores + 0=Round Robin; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3 + + intc accessed via the special ARC AUX register interface, hence "reg" property + is not specified. + +Example: + core_intc: core-interrupt-controller { + compatible = "snps,archs-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + + idu_intc: idu-interrupt-controller { + compatible = "snps,archs-idu-intc"; + interrupt-controller; + interrupt-parent = <&core_intc>; + + /* + * <hwirq distribution> + * distribution: 0=RR; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3 + */ + #interrupt-cells = <2>; + + /* upstream core irqs: downstream these are "COMMON" irq 0,1.. */ + interrupts = <24 25 26 27 28 29 30 31>; + }; + + some_device: serial@c0fc1000 { + interrupt-parent = <&idu_intc>; + interrupts = <0 0>; /* upstream idu IRQ #24, Round Robin */ + }; diff --git a/Documentation/devicetree/bindings/arc/archs-intc.txt b/Documentation/devicetree/bindings/arc/archs-intc.txt new file mode 100644 index 000000000000..69f326d6a5ad --- /dev/null +++ b/Documentation/devicetree/bindings/arc/archs-intc.txt @@ -0,0 +1,22 @@ +* ARC-HS incore Interrupt Controller (Provided by cores implementing ARCv2 ISA) + +Properties: + +- compatible: "snps,archs-intc" +- interrupt-controller: This is an interrupt controller. +- #interrupt-cells: Must be <1>. + + Single Cell "interrupts" property of a device specifies the IRQ number + between 16 to 256 + + intc accessed via the special ARC AUX register interface, hence "reg" property + is not specified. + +Example: + + intc: interrupt-controller { + compatible = "snps,archs-intc"; + interrupt-controller; + #interrupt-cells = <1>; + interrupts = <16 17 18 19 20 21 22 23 24 25>; + }; diff --git a/Documentation/devicetree/bindings/arc/axs101.txt b/Documentation/devicetree/bindings/arc/axs101.txt new file mode 100644 index 000000000000..48290d5178b5 --- /dev/null +++ b/Documentation/devicetree/bindings/arc/axs101.txt @@ -0,0 +1,7 @@ +Synopsys DesignWare ARC Software Development Platforms Device Tree Bindings +--------------------------------------------------------------------------- + +SDP Main Board with an AXC001 CPU Card hoisting ARC700 core in silicon + +Required root node properties: + - compatible = "snps,axs101", "snps,arc-sdp"; diff --git a/Documentation/devicetree/bindings/arc/axs103.txt b/Documentation/devicetree/bindings/arc/axs103.txt new file mode 100644 index 000000000000..6eea862e72b9 --- /dev/null +++ b/Documentation/devicetree/bindings/arc/axs103.txt @@ -0,0 +1,8 @@ +Synopsys DesignWare ARC Software Development Platforms Device Tree Bindings +--------------------------------------------------------------------------- + +SDP Main Board with an AXC003 FPGA Card which can contain various flavours of +HS38x cores. + +Required root node properties: + - compatible = "snps,axs103", "snps,arc-sdp"; diff --git a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt index 750d577e8083..f5a8ca29aff0 100644 --- a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt +++ b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt @@ -1,7 +1,7 @@ * Marvell Armada 370 / Armada XP Ethernet Controller (NETA) Required properties: -- compatible: should be "marvell,armada-370-neta". +- compatible: "marvell,armada-370-neta" or "marvell,armada-xp-neta". - reg: address and length of the register set for the device. - interrupts: interrupt for the device - phy: See ethernet.txt file in the same directory. diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt index 5a5a05582b58..8146e9fd5ffc 100644 --- a/Documentation/filesystems/xfs.txt +++ b/Documentation/filesystems/xfs.txt @@ -236,10 +236,10 @@ Removed Mount Options Name Removed ---- ------- - delaylog/nodelaylog v3.20 - ihashsize v3.20 - irixsgid v3.20 - osyncisdsync/osyncisosync v3.20 + delaylog/nodelaylog v4.0 + ihashsize v4.0 + irixsgid v4.0 + osyncisdsync/osyncisosync v4.0 sysctls @@ -346,5 +346,5 @@ Removed Sysctls Name Removed ---- ------- - fs.xfs.xfsbufd_centisec v3.20 - fs.xfs.age_buffer_centisecs v3.20 + fs.xfs.xfsbufd_centisec v4.0 + fs.xfs.age_buffer_centisecs v4.0 @@ -52,7 +52,6 @@ $(obj)/$(bounds-file): kernel/bounds.s FORCE timeconst-file := include/generated/timeconst.h -#always += $(timeconst-file) targets += $(timeconst-file) quiet_cmd_gentimeconst = GEN $@ diff --git a/MAINTAINERS b/MAINTAINERS index af61ea8d2162..058b0fbc52ff 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9828,6 +9828,13 @@ F: arch/arc/ F: Documentation/devicetree/bindings/arc/ F: drivers/tty/serial/arc_uart.c +SYNOPSYS ARC SDP platform support +M: Alexey Brodkin <abrodkin@synopsys.com> +S: Supported +F: arch/arc/plat-axs10x +F: arch/arc/boot/dts/ax* +F: Documentation/devicetree/bindings/arc/axs10* + SYSTEM CONFIGURATION (SYSCON) M: Lee Jones <lee.jones@linaro.org> M: Arnd Bergmann <arnd@arndb.de> diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index df94ac1f75b6..e7cee0a5c56d 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -81,17 +81,37 @@ menu "ARC Architecture Configuration" menu "ARC Platform/SoC/Board" -source "arch/arc/plat-arcfpga/Kconfig" +source "arch/arc/plat-sim/Kconfig" source "arch/arc/plat-tb10x/Kconfig" +source "arch/arc/plat-axs10x/Kconfig" #New platform adds here endmenu +choice + prompt "ARC Instruction Set" + default ISA_ARCOMPACT + +config ISA_ARCOMPACT + bool "ARCompact ISA" + help + The original ARC ISA of ARC600/700 cores + +config ISA_ARCV2 + bool "ARC ISA v2" + help + ISA for the Next Generation ARC-HS cores + +endchoice + menu "ARC CPU Configuration" choice prompt "ARC Core" - default ARC_CPU_770 + default ARC_CPU_770 if ISA_ARCOMPACT + default ARC_CPU_HS if ISA_ARCV2 + +if ISA_ARCOMPACT config ARC_CPU_750D bool "ARC750D" @@ -100,7 +120,7 @@ config ARC_CPU_750D config ARC_CPU_770 bool "ARC770" - select ARC_CPU_REL_4_10 + select ARC_HAS_SWAPE help Support for ARC770 core introduced with Rel 4.10 (Summer 2011) This core has a bunch of cool new features: @@ -109,6 +129,27 @@ config ARC_CPU_770 -Caches: New Prog Model, Region Flush -Insns: endian swap, load-locked/store-conditional, time-stamp-ctr +endif #ISA_ARCOMPACT + +config ARC_CPU_HS + bool "ARC-HS" + depends on ISA_ARCV2 + help + Support for ARC HS38x Cores based on ARCv2 ISA + The notable features are: + - SMP configurations of upto 4 core with coherency + - Optional L2 Cache and IO-Coherency + - Revised Interrupt Architecture (multiple priorites, reg banks, + auto stack switch, auto regfile save/restore) + - MMUv4 (PIPT dcache, Huge Pages) + - Instructions for + * 64bit load/store: LDD, STD + * Hardware assisted divide/remainder: DIV, REM + * Function prologue/epilogue: ENTER_S, LEAVE_S + * IRQ enable/disable: CLRI, SETI + * pop count: FFS, FLS + * SETcc, BMSKN, XBFU... + endchoice config CPU_BIG_ENDIAN @@ -117,17 +158,13 @@ config CPU_BIG_ENDIAN help Build kernel for Big Endian Mode of ARC CPU -# If a platform can't work with 0x8000_0000 based dma_addr_t -config ARC_PLAT_NEEDS_CPU_TO_DMA - bool - config SMP - bool "Symmetric Multi-Processing (Incomplete)" + bool "Symmetric Multi-Processing" default n + select ARC_HAS_COH_CACHES if ISA_ARCV2 + select ARC_MCIP if ISA_ARCV2 help - This enables support for systems with more than one CPU. If you have - a system with only one CPU, say N. If you have a system with more - than one CPU, say Y. + This enables support for systems with more than one CPU. if SMP @@ -137,13 +174,20 @@ config ARC_HAS_COH_CACHES config ARC_HAS_REENTRANT_IRQ_LV2 def_bool n -endif +config ARC_MCIP + bool "ARConnect Multicore IP (MCIP) Support " + depends on ISA_ARCV2 + help + This IP block enables SMP in ARC-HS38 cores. + It provides for cross-core interrupts, multi-core debug + hardware semaphores, shared memory,.... config NR_CPUS int "Maximum number of CPUs (2-4096)" range 2 4096 - depends on SMP - default "2" + default "4" + +endif #SMP menuconfig ARC_CACHE bool "Enable Cache Support" @@ -185,7 +229,7 @@ config ARC_CACHE_PAGES config ARC_CACHE_VIPT_ALIASING bool "Support VIPT Aliasing D$" - depends on ARC_HAS_DCACHE + depends on ARC_HAS_DCACHE && ISA_ARCOMPACT default n endif #ARC_CACHE @@ -226,9 +270,10 @@ config ARC_HAS_HW_MPY Multipler. Otherwise software multipy lib is used choice - prompt "ARC700 MMU Version" + prompt "MMU Version" default ARC_MMU_V3 if ARC_CPU_770 default ARC_MMU_V2 if ARC_CPU_750D + default ARC_MMU_V4 if ARC_CPU_HS config ARC_MMU_V1 bool "MMU v1" @@ -249,6 +294,10 @@ config ARC_MMU_V3 Variable Page size (1k-16k), var JTLB size 128 x (2 or 4) Shared Address Spaces (SASID) +config ARC_MMU_V4 + bool "MMU v4" + depends on ISA_ARCV2 + endchoice @@ -271,6 +320,8 @@ config ARC_PAGE_SIZE_4K endchoice +if ISA_ARCOMPACT + config ARC_COMPACT_IRQ_LEVELS bool "ARCompact IRQ Priorities: High(2)/Low(1)" default n @@ -290,7 +341,7 @@ config ARC_IRQ5_LV2 config ARC_IRQ6_LV2 bool -endif +endif #ARC_COMPACT_IRQ_LEVELS config ARC_FPU_SAVE_RESTORE bool "Enable FPU state persistence across context switch" @@ -303,32 +354,53 @@ config ARC_FPU_SAVE_RESTORE based on actual usage of FPU by a task. Thus our implemn does this for all tasks in system. +endif #ISA_ARCOMPACT + config ARC_CANT_LLSC def_bool n -menuconfig ARC_CPU_REL_4_10 - bool "Enable support for Rel 4.10 features" - default n - help - -ARC770 (and dependent features) enabled - -ARC750 also shares some of the new features with 770 - config ARC_HAS_LLSC bool "Insn: LLOCK/SCOND (efficient atomic ops)" default y - depends on ARC_CPU_770 && !ARC_CANT_LLSC + depends on !ARC_CPU_750D && !ARC_CANT_LLSC config ARC_HAS_SWAPE bool "Insn: SWAPE (endian-swap)" default y - depends on ARC_CPU_REL_4_10 -config ARC_HAS_RTSC - bool "Insn: RTSC (64-bit r/o cycle counter)" +if ISA_ARCV2 + +config ARC_HAS_LL64 + bool "Insn: 64bit LDD/STD" + help + Enable gcc to generate 64-bit load/store instructions + ISA mandates even/odd registers to allow encoding of two + dest operands with 2 possible source operands. default y - depends on ARC_CPU_REL_4_10 + +config ARC_HAS_RTC + bool "Local 64-bit r/o cycle counter" + default n depends on !SMP +config ARC_HAS_GRTC + bool "SMP synchronized 64-bit cycle counter" + default y + depends on SMP + +config ARC_NUMBER_OF_INTERRUPTS + int "Number of interrupts" + range 8 240 + default 32 + help + This defines the number of interrupts on the ARCv2HS core. + It affects the size of vector table. + The initial 8 IRQs are fixed (Timer, ICI etc) and although configurable + in hardware, it keep things simple for Linux to assume they are always + present. + +endif # ISA_ARCV2 + endmenu # "ARC CPU Configuration" config LINUX_LINK_BASE @@ -354,8 +426,10 @@ config ARC_CURR_IN_REG config ARC_EMUL_UNALIGNED bool "Emulate unaligned memory access (userspace only)" + default N select SYSCTL_ARCH_UNALIGN_NO_WARN select SYSCTL_ARCH_UNALIGN_ALLOW + depends on ISA_ARCOMPACT help This enables misaligned 16 & 32 bit memory access from user space. Use ONLY-IF-ABS-NECESSARY as it will be very slow and also can hide @@ -378,9 +452,10 @@ menuconfig ARC_DBG bool "ARC debugging" default y +if ARC_DBG + config ARC_DW2_UNWIND bool "Enable DWARF specific kernel stack unwind" - depends on ARC_DBG default y select KALLSYMS help @@ -394,18 +469,38 @@ config ARC_DW2_UNWIND config ARC_DBG_TLB_PARANOIA bool "Paranoia Checks in Low Level TLB Handlers" - depends on ARC_DBG default n config ARC_DBG_TLB_MISS_COUNT bool "Profile TLB Misses" default n select DEBUG_FS - depends on ARC_DBG help Counts number of I and D TLB Misses and exports them via Debugfs The counters can be cleared via Debugfs as well +if SMP + +config ARC_IPI_DBG + bool "Debug Inter Core interrupts" + default n + +endif + +endif + +config ARC_UBOOT_SUPPORT + bool "Support uboot arg Handling" + default n + help + ARC Linux by default checks for uboot provided args as pointers to + external cmdline or DTB. This however breaks in absence of uboot, + when booting from Metaware debugger directly, as the registers are + not zeroed out on reset by mdb and/or ARCv2 based cores. The bogus + registers look like uboot args to kernel which then chokes. + So only enable the uboot arg checking/processing if users are sure + of uboot being in play. + config ARC_BUILTIN_DTB_NAME string "Built in DTB" help diff --git a/arch/arc/Makefile b/arch/arc/Makefile index db72fec0e160..6107062c0111 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -9,12 +9,14 @@ UTS_MACHINE := arc ifeq ($(CROSS_COMPILE),) -CROSS_COMPILE := arc-linux-uclibc- +CROSS_COMPILE := arc-linux- endif KBUILD_DEFCONFIG := nsim_700_defconfig -cflags-y += -mA7 -fno-common -pipe -fno-builtin -D__linux__ +cflags-y += -fno-common -pipe -fno-builtin -D__linux__ +cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7 +cflags-$(CONFIG_ISA_ARCV2) += -mcpu=archs ifdef CONFIG_ARC_CURR_IN_REG # For a global register defintion, make sure it gets passed to every file @@ -33,7 +35,11 @@ cflags-$(atleast_gcc44) += -fsection-anchors cflags-$(CONFIG_ARC_HAS_LLSC) += -mlock cflags-$(CONFIG_ARC_HAS_SWAPE) += -mswape -cflags-$(CONFIG_ARC_HAS_RTSC) += -mrtsc + +ifndef CONFIG_ARC_HAS_LL64 +cflags-$(CONFIG_ISA_ARCV2) += -mno-ll64 +endif + cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables # By default gcc 4.8 generates dwarf4 which kernel unwinder can't grok @@ -81,8 +87,9 @@ core-y += arch/arc/ # w/o this dtb won't embed into kernel binary core-y += arch/arc/boot/dts/ -core-$(CONFIG_ARC_PLAT_FPGA_LEGACY) += arch/arc/plat-arcfpga/ -core-$(CONFIG_ARC_PLAT_TB10X) += arch/arc/plat-tb10x/ +core-$(CONFIG_ARC_PLAT_SIM) += arch/arc/plat-sim/ +core-$(CONFIG_ARC_PLAT_TB10X) += arch/arc/plat-tb10x/ +core-$(CONFIG_ARC_PLAT_AXS10X) += arch/arc/plat-axs10x/ drivers-$(CONFIG_OPROFILE) += arch/arc/oprofile/ diff --git a/arch/arc/boot/dts/Makefile b/arch/arc/boot/dts/Makefile index faf240e29ec2..b0e3f19bbd07 100644 --- a/arch/arc/boot/dts/Makefile +++ b/arch/arc/boot/dts/Makefile @@ -1,5 +1,5 @@ # Built-in dtb -builtindtb-y := angel4 +builtindtb-y := nsim_700 ifneq ($(CONFIG_ARC_BUILTIN_DTB_NAME),"") builtindtb-y := $(patsubst "%",%,$(CONFIG_ARC_BUILTIN_DTB_NAME)) diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi new file mode 100644 index 000000000000..a5e2726a067e --- /dev/null +++ b/arch/arc/boot/dts/axc001.dtsi @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Device tree for AXC001 770D/EM6/AS221 CPU card + * Note that this file only supports the 770D CPU + */ + +/ { + compatible = "snps,arc"; + clock-frequency = <750000000>; /* 750 MHZ */ + #address-cells = <1>; + #size-cells = <1>; + + cpu_card { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + + ranges = <0x00000000 0xf0000000 0x10000000>; + + cpu_intc: arc700-intc@cpu { + compatible = "snps,arc700-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + + /* + * this GPIO block ORs all interrupts on CPU card (creg,..) + * to uplink only 1 IRQ to ARC core intc + */ + dw-apb-gpio@0x2000 { + compatible = "snps,dw-apb-gpio"; + reg = < 0x2000 0x80 >; + #address-cells = <1>; + #size-cells = <0>; + + ictl_intc: gpio-controller@0 { + compatible = "snps,dw-apb-gpio-port"; + gpio-controller; + #gpio-cells = <2>; + snps,nr-gpios = <30>; + reg = <0>; + interrupt-controller; + #interrupt-cells = <2>; + interrupt-parent = <&cpu_intc>; + interrupts = <15>; + }; + }; + + debug_uart: dw-apb-uart@0x5000 { + compatible = "snps,dw-apb-uart"; + reg = <0x5000 0x100>; + clock-frequency = <33333000>; + interrupt-parent = <&ictl_intc>; + interrupts = <19 4>; + baud = <115200>; + reg-shift = <2>; + reg-io-width = <4>; + }; + + arcpmu0: pmu { + compatible = "snps,arc700-pct"; + }; + }; + + /* + * This INTC is actually connected to DW APB GPIO + * which acts as a wire between MB INTC and CPU INTC. + * GPIO INTC is configured in platform init code + * and here we mimic direct connection from MB INTC to + * CPU INTC, thus we set "interrupts = <7>" instead of + * "interrupts = <12>" + * + * This intc actually resides on MB, but we move it here to + * avoid duplicating the MB dtsi file given that IRQ from + * this intc to cpu intc are different for axs101 and axs103 + */ + mb_intc: dw-apb-ictl@0xe0012000 { + #interrupt-cells = <1>; + compatible = "snps,dw-apb-ictl"; + reg = < 0xe0012000 0x200 >; + interrupt-controller; + interrupt-parent = <&cpu_intc>; + interrupts = < 7 >; + }; + + memory { + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x00000000 0x80000000 0x40000000>; + device_type = "memory"; + reg = <0x00000000 0x20000000>; /* 512MiB */ + }; +}; diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi new file mode 100644 index 000000000000..15c8d6226c9d --- /dev/null +++ b/arch/arc/boot/dts/axc003.dtsi @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Device tree for AXC003 CPU card: HS38x UP configuration + */ + +/ { + compatible = "snps,arc"; + clock-frequency = <75000000>; + #address-cells = <1>; + #size-cells = <1>; + + cpu_card { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + + ranges = <0x00000000 0xf0000000 0x10000000>; + + cpu_intc: archs-intc@cpu { + compatible = "snps,archs-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + + /* + * this GPIO block ORs all interrupts on CPU card (creg,..) + * to uplink only 1 IRQ to ARC core intc + */ + dw-apb-gpio@0x2000 { + compatible = "snps,dw-apb-gpio"; + reg = < 0x2000 0x80 >; + #address-cells = <1>; + #size-cells = <0>; + + ictl_intc: gpio-controller@0 { + compatible = "snps,dw-apb-gpio-port"; + gpio-controller; + #gpio-cells = <2>; + snps,nr-gpios = <30>; + reg = <0>; + interrupt-controller; + #interrupt-cells = <2>; + interrupt-parent = <&cpu_intc>; + interrupts = <25>; + }; + }; + + debug_uart: dw-apb-uart@0x5000 { + compatible = "snps,dw-apb-uart"; + reg = <0x5000 0x100>; + clock-frequency = <33333000>; + interrupt-parent = <&ictl_intc>; + interrupts = <2 4>; + baud = <115200>; + reg-shift = <2>; + reg-io-width = <4>; + }; + + arcpct0: pct { + compatible = "snps,archs-pct"; + #interrupt-cells = <1>; + interrupt-parent = <&cpu_intc>; + interrupts = <20>; + }; + }; + + /* + * This INTC is actually connected to DW APB GPIO + * which acts as a wire between MB INTC and CPU INTC. + * GPIO INTC is configured in platform init code + * and here we mimic direct connection from MB INTC to + * CPU INTC, thus we set "interrupts = <7>" instead of + * "interrupts = <12>" + * + * This intc actually resides on MB, but we move it here to + * avoid duplicating the MB dtsi file given that IRQ from + * this intc to cpu intc are different for axs101 and axs103 + */ + mb_intc: dw-apb-ictl@0xe0012000 { + #interrupt-cells = <1>; + compatible = "snps,dw-apb-ictl"; + reg = < 0xe0012000 0x200 >; + interrupt-controller; + interrupt-parent = <&cpu_intc>; + interrupts = < 24 >; + }; + + memory { + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x00000000 0x80000000 0x40000000>; + device_type = "memory"; + reg = <0x00000000 0x20000000>; /* 512MiB */ + }; +}; diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi new file mode 100644 index 000000000000..199d42820eca --- /dev/null +++ b/arch/arc/boot/dts/axc003_idu.dtsi @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2014, 2015 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Device tree for AXC003 CPU card: HS38x2 (Dual Core) with IDU intc + */ + +/ { + compatible = "snps,arc"; + clock-frequency = <75000000>; + #address-cells = <1>; + #size-cells = <1>; + + cpu_card { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + + ranges = <0x00000000 0xf0000000 0x10000000>; + + cpu_intc: archs-intc@cpu { + compatible = "snps,archs-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + + idu_intc: idu-interrupt-controller { + compatible = "snps,archs-idu-intc"; + interrupt-controller; + interrupt-parent = <&cpu_intc>; + + /* + * <hwirq distribution> + * distribution: 0=RR; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3 + */ + #interrupt-cells = <2>; + + /* + * upstream irqs to core intc - downstream these are + * "COMMON" irq 0,1.. + */ + interrupts = <24 25>; + }; + + /* + * this GPIO block ORs all interrupts on CPU card (creg,..) + * to uplink only 1 IRQ to ARC core intc + */ + dw-apb-gpio@0x2000 { + compatible = "snps,dw-apb-gpio"; + reg = < 0x2000 0x80 >; + #address-cells = <1>; + #size-cells = <0>; + + ictl_intc: gpio-controller@0 { + compatible = "snps,dw-apb-gpio-port"; + gpio-controller; + #gpio-cells = <2>; + snps,nr-gpios = <30>; + reg = <0>; + interrupt-controller; + #interrupt-cells = <2>; + interrupt-parent = <&idu_intc>; + + /* + * cmn irq 1 -> cpu irq 25 + * Distribute to cpu0 only + */ + interrupts = <1 1>; + }; + }; + + debug_uart: dw-apb-uart@0x5000 { + compatible = "snps,dw-apb-uart"; + reg = <0x5000 0x100>; + clock-frequency = <33333000>; + interrupt-parent = <&ictl_intc>; + interrupts = <2 4>; + baud = <115200>; + reg-shift = <2>; + reg-io-width = <4>; + }; + + arcpct0: pct { + compatible = "snps,archs-pct"; + #interrupt-cells = <1>; + interrupt-parent = <&cpu_intc>; + interrupts = <20>; + }; + }; + + /* + * This INTC is actually connected to DW APB GPIO + * which acts as a wire between MB INTC and CPU INTC. + * GPIO INTC is configured in platform init code + * and here we mimic direct connection from MB INTC to + * CPU INTC, thus we set "interrupts = <0 1>" instead of + * "interrupts = <12>" + * + * This intc actually resides on MB, but we move it here to + * avoid duplicating the MB dtsi file given that IRQ from + * this intc to cpu intc are different for axs101 and axs103 + */ + mb_intc: dw-apb-ictl@0xe0012000 { + #interrupt-cells = <1>; + compatible = "snps,dw-apb-ictl"; + reg = < 0xe0012000 0x200 >; + interrupt-controller; + interrupt-parent = <&idu_intc>; + interrupts = <0 1>; /* cmn irq 0 -> cpu irq 24 + distribute to cpu0 only */ + }; + + memory { + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x00000000 0x80000000 0x40000000>; + device_type = "memory"; + reg = <0x00000000 0x20000000>; /* 512MiB */ + }; +}; diff --git a/arch/arc/boot/dts/axs101.dts b/arch/arc/boot/dts/axs101.dts new file mode 100644 index 000000000000..3f9b0582e734 --- /dev/null +++ b/arch/arc/boot/dts/axs101.dts @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com) + * + * ARC AXS101 S/W development platform + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/dts-v1/; + +/include/ "axc001.dtsi" +/include/ "axs10x_mb.dtsi" + +/ { + compatible = "snps,axs101", "snps,arc-sdp"; + + chosen { + bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0"; + }; +}; diff --git a/arch/arc/boot/dts/axs103.dts b/arch/arc/boot/dts/axs103.dts new file mode 100644 index 000000000000..e6d0e31ea299 --- /dev/null +++ b/arch/arc/boot/dts/axs103.dts @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Device Tree for AXS103 SDP with AXS10X Main Board and + * AXC003 FPGA Card (with UP bitfile) + */ +/dts-v1/; + +/include/ "axc003.dtsi" +/include/ "axs10x_mb.dtsi" + +/ { + compatible = "snps,axs103", "snps,arc-sdp"; + + chosen { + bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=ttyS3,115200n8 debug print-fatal-signals=1"; + }; +}; diff --git a/arch/arc/boot/dts/axs103_idu.dts b/arch/arc/boot/dts/axs103_idu.dts new file mode 100644 index 000000000000..f999fef5a60a --- /dev/null +++ b/arch/arc/boot/dts/axs103_idu.dts @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Device Tree for AXS103 SDP with AXS10X Main Board and + * AXC003 FPGA Card (with SMP bitfile) + */ +/dts-v1/; + +/include/ "axc003_idu.dtsi" +/include/ "axs10x_mb.dtsi" + +/ { + compatible = "snps,axs103", "snps,arc-sdp"; + + chosen { + bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=ttyS3,115200n8 debug print-fatal-signals=1"; + }; +}; diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi new file mode 100644 index 000000000000..f3db32154973 --- /dev/null +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -0,0 +1,224 @@ +/* + * Support for peripherals on the AXS10x mainboard + * + * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/ { + axs10x_mb { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x00000000 0xe0000000 0x10000000>; + interrupt-parent = <&mb_intc>; + + clocks { + i2cclk: i2cclk { + compatible = "fixed-clock"; + clock-frequency = <50000000>; + #clock-cells = <0>; + }; + + apbclk: apbclk { + compatible = "fixed-clock"; + clock-frequency = <50000000>; + #clock-cells = <0>; + }; + + mmcclk: mmcclk { + compatible = "fixed-clock"; + clock-frequency = <50000000>; + #clock-cells = <0>; + }; + }; + + ethernet@0x18000 { + #interrupt-cells = <1>; + compatible = "snps,dwmac"; + reg = < 0x18000 0x2000 >; + interrupts = < 4 >; + interrupt-names = "macirq"; + phy-mode = "rgmii"; + snps,pbl = < 32 >; + clocks = <&apbclk>; + clock-names = "stmmaceth"; + }; + + ehci@0x40000 { + compatible = "generic-ehci"; + reg = < 0x40000 0x100 >; + interrupts = < 8 >; + }; + + ohci@0x60000 { + compatible = "generic-ohci"; + reg = < 0x60000 0x100 >; + interrupts = < 8 >; + }; + + /* + * According to DW Mobile Storage databook it is required + * to use "Hold Register" if card is enumerated in SDR12 or + * SDR25 modes. + * + * Utilization of "Hold Register" is already implemented via + * dw_mci_pltfm_prepare_command() which in its turn gets + * used through dw_mci_drv_data->prepare_command call-back. + * This call-back is used in Altera Socfpga platform and so + * we may reuse it saying that we're compatible with their + * "altr,socfpga-dw-mshc". + * + * Most probably "Hold Register" utilization is platform- + * independent requirement which means that single unified + * "snps,dw-mshc" should be enough for all users of DW MMC once + * dw_mci_pltfm_prepare_command() is used in generic platform + * code. + */ + mmc@0x15000 { + compatible = "altr,socfpga-dw-mshc"; + reg = < 0x15000 0x400 >; + num-slots = < 1 >; + fifo-depth = < 16 >; + card-detect-delay = < 200 >; + clocks = <&apbclk>, <&mmcclk>; + clock-names = "biu", "ciu"; + interrupts = < 7 >; + bus-width = < 4 >; + }; + + uart@0x20000 { + compatible = "snps,dw-apb-uart"; + reg = <0x20000 0x100>; + clock-frequency = <33333333>; + interrupts = <17>; + baud = <115200>; + reg-shift = <2>; + reg-io-width = <4>; + }; + + uart@0x21000 { + compatible = "snps,dw-apb-uart"; + reg = <0x21000 0x100>; + clock-frequency = <33333333>; + interrupts = <18>; + baud = <115200>; + reg-shift = <2>; + reg-io-width = <4>; + }; + + /* UART muxed with USB data port (ttyS3) */ + uart@0x22000 { + compatible = "snps,dw-apb-uart"; + reg = <0x22000 0x100>; + clock-frequency = <33333333>; + interrupts = <19>; + baud = <115200>; + reg-shift = <2>; + reg-io-width = <4>; + }; + + i2c@0x1d000 { + compatible = "snps,designware-i2c"; + reg = <0x1d000 0x100>; + clock-frequency = <400000>; + clocks = <&i2cclk>; + interrupts = <14>; + }; + + i2c@0x1e000 { + compatible = "snps,designware-i2c"; + reg = <0x1e000 0x100>; + clock-frequency = <400000>; + clocks = <&i2cclk>; + interrupts = <15>; + }; + + i2c@0x1f000 { + compatible = "snps,designware-i2c"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x1f000 0x100>; + clock-frequency = <400000>; + clocks = <&i2cclk>; + interrupts = <16>; + + eeprom@0x54{ + compatible = "24c01"; + reg = <0x54>; + pagesize = <0x8>; + }; + + eeprom@0x57{ + compatible = "24c04"; + reg = <0x57>; + pagesize = <0x8>; + }; + }; + + gpio0:gpio@13000 { + compatible = "snps,dw-apb-gpio"; + reg = <0x13000 0x1000>; + #address-cells = <1>; + #size-cells = <0>; + + gpio0_banka: gpio-controller@0 { + compatible = "snps,dw-apb-gpio-port"; + gpio-controller; + #gpio-cells = <2>; + snps,nr-gpios = <32>; + reg = <0>; + }; + + gpio0_bankb: gpio-controller@1 { + compatible = "snps,dw-apb-gpio-port"; + gpio-controller; + #gpio-cells = <2>; + snps,nr-gpios = <8>; + reg = <1>; + }; + + gpio0_bankc: gpio-controller@2 { + compatible = "snps,dw-apb-gpio-port"; + gpio-controller; + #gpio-cells = <2>; + snps,nr-gpios = <8>; + reg = <2>; + }; + }; + + gpio1:gpio@14000 { + compatible = "snps,dw-apb-gpio"; + reg = <0x14000 0x1000>; + #address-cells = <1>; + #size-cells = <0>; + + gpio1_banka: gpio-controller@0 { + compatible = "snps,dw-apb-gpio-port"; + gpio-controller; + #gpio-cells = <2>; + snps,nr-gpios = <30>; + reg = <0>; + }; + + gpio1_bankb: gpio-controller@1 { + compatible = "snps,dw-apb-gpio-port"; + gpio-controller; + #gpio-cells = <2>; + snps,nr-gpios = <10>; + reg = <1>; + }; + + gpio1_bankc: gpio-controller@2 { + compatible = "snps,dw-apb-gpio-port"; + gpio-controller; + #gpio-cells = <2>; + snps,nr-gpios = <8>; + reg = <2>; + }; + }; + }; +}; diff --git a/arch/arc/boot/dts/angel4.dts b/arch/arc/boot/dts/nsim_700.dts index 3b076fbd8366..105a0017023f 100644 --- a/arch/arc/boot/dts/angel4.dts +++ b/arch/arc/boot/dts/nsim_700.dts @@ -10,7 +10,7 @@ /include/ "skeleton.dtsi" / { - compatible = "snps,arc-angel4"; + compatible = "snps,nsim"; clock-frequency = <80000000>; /* 80 MHZ */ #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arc/boot/dts/nsim_hs.dts b/arch/arc/boot/dts/nsim_hs.dts new file mode 100644 index 000000000000..911f069e0540 --- /dev/null +++ b/arch/arc/boot/dts/nsim_hs.dts @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/dts-v1/; + +/include/ "skeleton.dtsi" + +/ { + compatible = "snps,nsim_hs"; + interrupt-parent = <&core_intc>; + + chosen { + bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8"; + }; + + aliases { + serial0 = &arcuart0; + }; + + fpga { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + + /* child and parent address space 1:1 mapped */ + ranges; + + core_intc: core-interrupt-controller { + compatible = "snps,archs-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + + arcuart0: serial@c0fc1000 { + compatible = "snps,arc-uart"; + reg = <0xc0fc1000 0x100>; + interrupts = <24>; + clock-frequency = <80000000>; + current-speed = <115200>; + status = "okay"; + }; + + arcpct0: pct { + compatible = "snps,archs-pct"; + #interrupt-cells = <1>; + interrupts = <20>; + }; + }; +}; diff --git a/arch/arc/boot/dts/nsim_hs_idu.dts b/arch/arc/boot/dts/nsim_hs_idu.dts new file mode 100644 index 000000000000..46ab31975612 --- /dev/null +++ b/arch/arc/boot/dts/nsim_hs_idu.dts @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/dts-v1/; + +/include/ "skeleton.dtsi" + +/ { + compatible = "snps,nsim_hs"; + interrupt-parent = <&core_intc>; + + chosen { + bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8"; + }; + + aliases { + serial0 = &arcuart0; + }; + + fpga { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + + /* child and parent address space 1:1 mapped */ + ranges; + + core_intc: core-interrupt-controller { + compatible = "snps,archs-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + + idu_intc: idu-interrupt-controller { + compatible = "snps,archs-idu-intc"; + interrupt-controller; + interrupt-parent = <&core_intc>; + + /* + * <hwirq distribution> + * distribution: 0=RR; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3 + */ + #interrupt-cells = <2>; + + /* + * upstream irqs to core intc - downstream these are + * "COMMON" irq 0,1.. + */ + interrupts = <24 25 26 27 28 29 30 31>; + }; + + arcuart0: serial@c0fc1000 { + compatible = "snps,arc-uart"; + reg = <0xc0fc1000 0x100>; + interrupt-parent = <&idu_intc>; + interrupts = <0 0>; + clock-frequency = <80000000>; + current-speed = <115200>; + status = "okay"; + }; + + arcpct0: pct { + compatible = "snps,archs-pct"; + #interrupt-cells = <1>; + interrupts = <20>; + }; + }; +}; diff --git a/arch/arc/boot/dts/nsimosci_hs.dts b/arch/arc/boot/dts/nsimosci_hs.dts new file mode 100644 index 000000000000..d64a96f8515a --- /dev/null +++ b/arch/arc/boot/dts/nsimosci_hs.dts @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/dts-v1/; + +/include/ "skeleton.dtsi" + +/ { + compatible = "snps,nsimosci_hs"; + clock-frequency = <20000000>; /* 20 MHZ */ + #address-cells = <1>; + #size-cells = <1>; + interrupt-parent = <&core_intc>; + + chosen { + /* this is for console on PGU */ + /* bootargs = "console=tty0 consoleblank=0"; */ + /* this is for console on serial */ + bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug"; + }; + + aliases { + serial0 = &uart0; + }; + + fpga { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + + /* child and parent address space 1:1 mapped */ + ranges; + + core_intc: core-interrupt-controller { + compatible = "snps,archs-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + + uart0: serial@f0000000 { + compatible = "ns8250"; + reg = <0xf0000000 0x2000>; + interrupts = <24>; + clock-frequency = <3686400>; + baud = <115200>; + reg-shift = <2>; + reg-io-width = <4>; + no-loopback-test = <1>; + }; + + pgu0: pgu@f9000000 { + compatible = "snps,arcpgufb"; + reg = <0xf9000000 0x400>; + }; + + ps2: ps2@f9001000 { + compatible = "snps,arc_ps2"; + reg = <0xf9000400 0x14>; + interrupts = <27>; + interrupt-names = "arc_ps2_irq"; + }; + + eth0: ethernet@f0003000 { + compatible = "snps,oscilan"; + reg = <0xf0003000 0x44>; + interrupts = <25>, <26>; + interrupt-names = "rx", "tx"; + }; + + arcpct0: pct { + compatible = "snps,archs-pct"; + #interrupt-cells = <1>; + interrupts = <20>; + }; + }; +}; diff --git a/arch/arc/boot/dts/nsimosci_hs_idu.dts b/arch/arc/boot/dts/nsimosci_hs_idu.dts new file mode 100644 index 000000000000..f6bf0ca95a57 --- /dev/null +++ b/arch/arc/boot/dts/nsimosci_hs_idu.dts @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/dts-v1/; + +/include/ "skeleton.dtsi" + +/ { + compatible = "snps,nsimosci_hs"; + clock-frequency = <5000000>; /* 5 MHZ */ + #address-cells = <1>; + #size-cells = <1>; + interrupt-parent = <&core_intc>; + + chosen { + /* this is for console on serial */ + bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblan=0 debug"; + }; + + aliases { + serial0 = &uart0; + }; + + fpga { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + + /* child and parent address space 1:1 mapped */ + ranges; + + core_intc: core-interrupt-controller { + compatible = "snps,archs-intc"; + interrupt-controller; + #interrupt-cells = <1>; +/* interrupts = <16 17 18 19 20 21 22 23 24 25>; */ + }; + + idu_intc: idu-interrupt-controller { + compatible = "snps,archs-idu-intc"; + interrupt-controller; + interrupt-parent = <&core_intc>; + + /* + * <hwirq distribution> + * distribution: 0=RR; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3 + */ + #interrupt-cells = <2>; + + /* + * upstream irqs to core intc - downstream these are + * "COMMON" irq 0,1.. + */ + interrupts = <24 25 26 27 28 29 30 31>; + }; + + uart0: serial@f0000000 { + compatible = "ns8250"; + reg = <0xf0000000 0x2000>; + interrupt-parent = <&idu_intc>; + interrupts = <0 0>; /* cmn irq 0 -> cpu irq 24 + RR distribute to all cpus */ + clock-frequency = <3686400>; + baud = <115200>; + reg-shift = <2>; + reg-io-width = <4>; + no-loopback-test = <1>; + }; + + pgu0: pgu@f9000000 { + compatible = "snps,arcpgufb"; + reg = <0xf9000000 0x400>; + }; + + ps2: ps2@f9001000 { + compatible = "snps,arc_ps2"; + reg = <0xf9000400 0x14>; + interrupts = <3 0>; + interrupt-parent = <&idu_intc>; + interrupt-names = "arc_ps2_irq"; + }; + + eth0: ethernet@f0003000 { + compatible = "snps,oscilan"; + reg = <0xf0003000 0x44>; + interrupt-parent = <&idu_intc>; + interrupts = <1 2>, <2 2>; + interrupt-names = "rx", "tx"; + }; + + arcpct0: pct { + compatible = "snps,archs-pct"; + #interrupt-cells = <1>; + interrupts = <20>; + }; + }; +}; diff --git a/arch/arc/boot/dts/vdk_axc003.dtsi b/arch/arc/boot/dts/vdk_axc003.dtsi new file mode 100644 index 000000000000..9393fd902f0d --- /dev/null +++ b/arch/arc/boot/dts/vdk_axc003.dtsi @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2013, 2014 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Device tree for AXC003 CPU card: HS38x UP configuration (VDK version) + */ + +/ { + compatible = "snps,arc"; + clock-frequency = <50000000>; + #address-cells = <1>; + #size-cells = <1>; + + cpu_card { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + + ranges = <0x00000000 0xf0000000 0x10000000>; + + cpu_intc: archs-intc@cpu { + compatible = "snps,archs-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + + debug_uart: dw-apb-uart@0x5000 { + compatible = "snps,dw-apb-uart"; + reg = <0x5000 0x100>; + clock-frequency = <2403200>; + interrupt-parent = <&cpu_intc>; + interrupts = <19>; + baud = <115200>; + reg-shift = <2>; + reg-io-width = <4>; + }; + + }; + + mb_intc: dw-apb-ictl@0xe0012000 { + #interrupt-cells = <1>; + compatible = "snps,dw-apb-ictl"; + reg = < 0xe0012000 0x200 >; + interrupt-controller; + interrupt-parent = <&cpu_intc>; + interrupts = < 18 >; + }; + + memory { + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x00000000 0x80000000 0x40000000>; + device_type = "memory"; + reg = <0x00000000 0x20000000>; /* 512MiB */ + }; +}; diff --git a/arch/arc/boot/dts/vdk_axc003_idu.dtsi b/arch/arc/boot/dts/vdk_axc003_idu.dtsi new file mode 100644 index 000000000000..9bee8ed09eb0 --- /dev/null +++ b/arch/arc/boot/dts/vdk_axc003_idu.dtsi @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2014, 2015 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Device tree for AXC003 CPU card: + * HS38x2 (Dual Core) with IDU intc (VDK version) + */ + +/ { + compatible = "snps,arc"; + clock-frequency = <50000000>; + #address-cells = <1>; + #size-cells = <1>; + + cpu_card { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + + ranges = <0x00000000 0xf0000000 0x10000000>; + + cpu_intc: archs-intc@cpu { + compatible = "snps,archs-intc"; + interrupt-controller; + #interrupt-cells = <1>; + }; + + idu_intc: idu-interrupt-controller { + compatible = "snps,archs-idu-intc"; + interrupt-controller; + interrupt-parent = <&cpu_intc>; + + /* + * <hwirq distribution> + * distribution: 0=RR; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3 + */ + #interrupt-cells = <2>; + + interrupts = <24 25 26 27>; + }; + + debug_uart: dw-apb-uart@0x5000 { + compatible = "snps,dw-apb-uart"; + reg = <0x5000 0x100>; + clock-frequency = <2403200>; + interrupt-parent = <&idu_intc>; + interrupts = <2 0>; + baud = <115200>; + reg-shift = <2>; + reg-io-width = <4>; + }; + + }; + + mb_intc: dw-apb-ictl@0xe0012000 { + #interrupt-cells = <1>; + compatible = "snps,dw-apb-ictl"; + reg = < 0xe0012000 0x200 >; + interrupt-controller; + interrupt-parent = <&idu_intc>; + interrupts = < 0 0 >; + }; + + memory { + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x00000000 0x80000000 0x40000000>; + device_type = "memory"; + reg = <0x00000000 0x20000000>; /* 512MiB */ + }; +}; diff --git a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi new file mode 100644 index 000000000000..45cd665fca23 --- /dev/null +++ b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi @@ -0,0 +1,93 @@ +/* + * Support for peripherals on the AXS10x mainboard (VDK version) + * + * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/ { + axs10x_mb_vdk { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x00000000 0xe0000000 0x10000000>; + interrupt-parent = <&mb_intc>; + + clocks { + apbclk: apbclk { + compatible = "fixed-clock"; + clock-frequency = <50000000>; + #clock-cells = <0>; + }; + + }; + + ethernet@0x18000 { + #interrupt-cells = <1>; + compatible = "snps,dwmac"; + reg = < 0x18000 0x2000 >; + interrupts = < 4 >; + interrupt-names = "macirq"; + phy-mode = "rgmii"; + snps,phy-addr = < 0 >; // VDK model phy address is 0 + snps,pbl = < 32 >; + clocks = <&apbclk>; + clock-names = "stmmaceth"; + }; + + ehci@0x40000 { + compatible = "generic-ehci"; + reg = < 0x40000 0x100 >; + interrupts = < 8 >; + }; + + uart@0x20000 { + compatible = "snps,dw-apb-uart"; + reg = <0x20000 0x100>; + clock-frequency = <2403200>; + interrupts = <17>; + baud = <115200>; + reg-shift = <2>; + reg-io-width = <4>; + }; + + uart@0x21000 { + compatible = "snps,dw-apb-uart"; + reg = <0x21000 0x100>; + clock-frequency = <2403200>; + interrupts = <18>; + baud = <115200>; + reg-shift = <2>; + reg-io-width = <4>; + }; + + uart@0x22000 { + compatible = "snps,dw-apb-uart"; + reg = <0x22000 0x100>; + clock-frequency = <2403200>; + interrupts = <19>; + baud = <115200>; + reg-shift = <2>; + reg-io-width = <4>; + }; + +/* PGU output directly sent to virtual LCD screen; hdmi controller not modelled */ + pgu@0x17000 { + compatible = "snps,arcpgufb"; + reg = <0x17000 0x400>; + clock-frequency = <51000000>; /* PGU'clock is initated in init function */ + /* interrupts = <5>; PGU interrupts not used, this vector is used for ps2 below */ + }; + +/* VDK has additional ps2 keyboard/mouse interface integrated in LCD screen model */ + ps2: ps2@e0017400 { + compatible = "snps,arc_ps2"; + reg = <0x17400 0x14>; + interrupts = <5>; + interrupt-names = "arc_ps2_irq"; + }; + }; +}; diff --git a/arch/arc/boot/dts/vdk_hs38.dts b/arch/arc/boot/dts/vdk_hs38.dts new file mode 100644 index 000000000000..5d803dd2de59 --- /dev/null +++ b/arch/arc/boot/dts/vdk_hs38.dts @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) + * + * ARC HS38 Virtual Development Kit (VDK) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/dts-v1/; + +/include/ "vdk_axc003.dtsi" +/include/ "vdk_axs10x_mb.dtsi" + +/ { + compatible = "snps,axs103"; + + chosen { + bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0"; + }; +}; diff --git a/arch/arc/boot/dts/vdk_hs38_smp.dts b/arch/arc/boot/dts/vdk_hs38_smp.dts new file mode 100644 index 000000000000..031a5bc79b3e --- /dev/null +++ b/arch/arc/boot/dts/vdk_hs38_smp.dts @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) + * + * ARC HS38 Virtual Development Kit, SMP version (VDK) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/dts-v1/; + +/include/ "vdk_axc003_idu.dtsi" +/include/ "vdk_axs10x_mb.dtsi" + +/ { + compatible = "snps,axs103"; + + chosen { + bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0"; + }; +}; diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig new file mode 100644 index 000000000000..562dac6a7f78 --- /dev/null +++ b/arch/arc/configs/axs101_defconfig @@ -0,0 +1,111 @@ +CONFIG_CROSS_COMPILE="arc-linux-uclibc-" +CONFIG_DEFAULT_HOSTNAME="ARCLinux" +# CONFIG_SWAP is not set +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +# CONFIG_CROSS_MEMORY_ATTACH is not set +CONFIG_NO_HZ_IDLE=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_NAMESPACES=y +# CONFIG_UTS_NS is not set +# CONFIG_PID_NS is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="../arc_initramfs/" +CONFIG_EMBEDDED=y +CONFIG_PERF_EVENTS=y +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_SLUB_DEBUG is not set +# CONFIG_COMPAT_BRK is not set +CONFIG_MODULES=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_ARC_PLAT_AXS10X=y +CONFIG_AXS101=y +CONFIG_ARC_CACHE_LINE_SHIFT=5 +CONFIG_ARC_BUILTIN_DTB_NAME="axs101" +CONFIG_PREEMPT=y +# CONFIG_COMPACTION is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_IPV6 is not set +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_NETDEVICES=y +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_VENDOR_BROADCOM is not set +# CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MARVELL is not set +# CONFIG_NET_VENDOR_MICREL is not set +# CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_SEEQ is not set +CONFIG_STMMAC_ETH=y +# CONFIG_NET_VENDOR_VIA is not set +# CONFIG_NET_VENDOR_WIZNET is not set +CONFIG_NATIONAL_PHY=y +# CONFIG_USB_NET_DRIVERS is not set +CONFIG_INPUT_EVDEV=y +CONFIG_MOUSE_PS2_TOUCHKIT=y +CONFIG_MOUSE_SERIAL=y +CONFIG_MOUSE_SYNAPTICS_USB=y +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_DW=y +CONFIG_SERIAL_OF_PLATFORM=y +# CONFIG_HW_RANDOM is not set +CONFIG_I2C=y +CONFIG_I2C_CHARDEV=y +CONFIG_I2C_DESIGNWARE_PLATFORM=y +# CONFIG_HWMON is not set +CONFIG_FB=y +# CONFIG_VGA_CONSOLE is not set +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +# CONFIG_LOGO_LINUX_CLUT224 is not set +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_HCD_PLATFORM=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_OHCI_HCD_PLATFORM=y +CONFIG_USB_STORAGE=y +CONFIG_MMC=y +CONFIG_MMC_SDHCI=y +CONFIG_MMC_SDHCI_PLTFM=y +CONFIG_MMC_DW=y +CONFIG_MMC_DW_IDMAC=y +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_NTFS_FS=y +CONFIG_TMPFS=y +CONFIG_JFFS2_FS=y +CONFIG_NFS_FS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_STRIP_ASM_SYMS=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10 +# CONFIG_SCHED_DEBUG is not set +# CONFIG_DEBUG_PREEMPT is not set +# CONFIG_FTRACE is not set diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig new file mode 100644 index 000000000000..83a6d8d5cc58 --- /dev/null +++ b/arch/arc/configs/axs103_defconfig @@ -0,0 +1,117 @@ +CONFIG_CROSS_COMPILE="arc-linux-uclibc-" +CONFIG_DEFAULT_HOSTNAME="ARCLinux" +# CONFIG_SWAP is not set +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +# CONFIG_CROSS_MEMORY_ATTACH is not set +CONFIG_NO_HZ_IDLE=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_NAMESPACES=y +# CONFIG_UTS_NS is not set +# CONFIG_PID_NS is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/" +CONFIG_EMBEDDED=y +CONFIG_PERF_EVENTS=y +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_SLUB_DEBUG is not set +# CONFIG_COMPAT_BRK is not set +CONFIG_MODULES=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_ARC_PLAT_AXS10X=y +CONFIG_AXS103=y +CONFIG_ISA_ARCV2=y +CONFIG_ARC_BUILTIN_DTB_NAME="axs103" +CONFIG_PREEMPT=y +# CONFIG_COMPACTION is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_IPV6 is not set +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_MTD=y +CONFIG_MTD_CMDLINE_PARTS=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_NAND=y +CONFIG_MTD_NAND_AXS=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_NETDEVICES=y +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_VENDOR_BROADCOM is not set +# CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MARVELL is not set +# CONFIG_NET_VENDOR_MICREL is not set +# CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_SEEQ is not set +CONFIG_STMMAC_ETH=y +# CONFIG_NET_VENDOR_VIA is not set +# CONFIG_NET_VENDOR_WIZNET is not set +CONFIG_NATIONAL_PHY=y +# CONFIG_USB_NET_DRIVERS is not set +CONFIG_INPUT_EVDEV=y +CONFIG_MOUSE_PS2_TOUCHKIT=y +CONFIG_MOUSE_SERIAL=y +CONFIG_MOUSE_SYNAPTICS_USB=y +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_DW=y +CONFIG_SERIAL_OF_PLATFORM=y +# CONFIG_HW_RANDOM is not set +CONFIG_I2C=y +CONFIG_I2C_CHARDEV=y +CONFIG_I2C_DESIGNWARE_PLATFORM=y +# CONFIG_HWMON is not set +CONFIG_FB=y +# CONFIG_VGA_CONSOLE is not set +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +# CONFIG_LOGO_LINUX_CLUT224 is not set +CONFIG_USB=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_HCD_PLATFORM=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_OHCI_HCD_PLATFORM=y +CONFIG_USB_STORAGE=y +CONFIG_MMC=y +CONFIG_MMC_SDHCI=y +CONFIG_MMC_SDHCI_PLTFM=y +CONFIG_MMC_DW=y +CONFIG_MMC_DW_IDMAC=y +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_NTFS_FS=y +CONFIG_TMPFS=y +CONFIG_JFFS2_FS=y +CONFIG_NFS_FS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_STRIP_ASM_SYMS=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10 +# CONFIG_SCHED_DEBUG is not set +# CONFIG_DEBUG_PREEMPT is not set +# CONFIG_FTRACE is not set diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig new file mode 100644 index 000000000000..f1e1c84e0dda --- /dev/null +++ b/arch/arc/configs/axs103_smp_defconfig @@ -0,0 +1,118 @@ +CONFIG_CROSS_COMPILE="arc-linux-uclibc-" +CONFIG_DEFAULT_HOSTNAME="ARCLinux" +# CONFIG_SWAP is not set +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +# CONFIG_CROSS_MEMORY_ATTACH is not set +CONFIG_NO_HZ_IDLE=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_NAMESPACES=y +# CONFIG_UTS_NS is not set +# CONFIG_PID_NS is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/" +CONFIG_EMBEDDED=y +CONFIG_PERF_EVENTS=y +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_COMPAT_BRK is not set +CONFIG_SLAB=y +CONFIG_MODULES=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_ARC_PLAT_AXS10X=y +CONFIG_AXS103=y +CONFIG_ISA_ARCV2=y +CONFIG_SMP=y +CONFIG_ARC_BUILTIN_DTB_NAME="axs103_idu" +CONFIG_PREEMPT=y +# CONFIG_COMPACTION is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_IPV6 is not set +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_MTD=y +CONFIG_MTD_CMDLINE_PARTS=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_NAND=y +CONFIG_MTD_NAND_AXS=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_NETDEVICES=y +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_VENDOR_BROADCOM is not set +# CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MARVELL is not set +# CONFIG_NET_VENDOR_MICREL is not set +# CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_SEEQ is not set +CONFIG_STMMAC_ETH=y +# CONFIG_NET_VENDOR_VIA is not set +# CONFIG_NET_VENDOR_WIZNET is not set +CONFIG_NATIONAL_PHY=y +# CONFIG_USB_NET_DRIVERS is not set +CONFIG_INPUT_EVDEV=y +CONFIG_MOUSE_PS2_TOUCHKIT=y +CONFIG_MOUSE_SERIAL=y +CONFIG_MOUSE_SYNAPTICS_USB=y +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_DW=y +CONFIG_SERIAL_OF_PLATFORM=y +# CONFIG_HW_RANDOM is not set +CONFIG_I2C=y +CONFIG_I2C_CHARDEV=y +CONFIG_I2C_DESIGNWARE_PLATFORM=y +# CONFIG_HWMON is not set +CONFIG_FB=y +# CONFIG_VGA_CONSOLE is not set +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +# CONFIG_LOGO_LINUX_CLUT224 is not set +CONFIG_USB=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_HCD_PLATFORM=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_OHCI_HCD_PLATFORM=y +CONFIG_USB_STORAGE=y +CONFIG_MMC=y +CONFIG_MMC_SDHCI=y +CONFIG_MMC_SDHCI_PLTFM=y +CONFIG_MMC_DW=y +CONFIG_MMC_DW_IDMAC=y +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_NTFS_FS=y +CONFIG_TMPFS=y +CONFIG_JFFS2_FS=y +CONFIG_NFS_FS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_STRIP_ASM_SYMS=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10 +# CONFIG_SCHED_DEBUG is not set +# CONFIG_DEBUG_PREEMPT is not set +# CONFIG_FTRACE is not set diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig index ef4d3bc7b6c0..138f9d887957 100644 --- a/arch/arc/configs/nsim_700_defconfig +++ b/arch/arc/configs/nsim_700_defconfig @@ -1,4 +1,4 @@ -CONFIG_CROSS_COMPILE="arc-linux-uclibc-" +CONFIG_CROSS_COMPILE="arc-linux-" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set @@ -22,9 +22,8 @@ CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set -CONFIG_ARC_PLAT_FPGA_LEGACY=y -# CONFIG_ARC_HAS_RTSC is not set -CONFIG_ARC_BUILTIN_DTB_NAME="angel4" +CONFIG_ARC_PLAT_SIM=y +CONFIG_ARC_BUILTIN_DTB_NAME="nsim_700" CONFIG_PREEMPT=y # CONFIG_COMPACTION is not set # CONFIG_CROSS_MEMORY_ATTACH is not set diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig new file mode 100644 index 000000000000..f761a7c70761 --- /dev/null +++ b/arch/arc/configs/nsim_hs_defconfig @@ -0,0 +1,64 @@ +CONFIG_CROSS_COMPILE="arc-linux-uclibc-" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_DEFAULT_HOSTNAME="ARCLinux" +# CONFIG_SWAP is not set +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +# CONFIG_CROSS_MEMORY_ATTACH is not set +CONFIG_HIGH_RES_TIMERS=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_NAMESPACES=y +# CONFIG_UTS_NS is not set +# CONFIG_PID_NS is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/" +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +# CONFIG_SLUB_DEBUG is not set +# CONFIG_COMPAT_BRK is not set +CONFIG_KPROBES=y +CONFIG_MODULES=y +# CONFIG_LBDAF is not set +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +CONFIG_ARC_PLAT_SIM=y +CONFIG_ISA_ARCV2=y +CONFIG_ARC_BUILTIN_DTB_NAME="nsim_hs" +CONFIG_PREEMPT=y +# CONFIG_COMPACTION is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_UNIX_DIAG=y +CONFIG_NET_KEY=y +CONFIG_INET=y +# CONFIG_IPV6 is not set +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FIRMWARE_IN_KERNEL is not set +# CONFIG_BLK_DEV is not set +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_ARC=y +CONFIG_SERIAL_ARC_CONSOLE=y +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +# CONFIG_VGA_CONSOLE is not set +# CONFIG_HID is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_TMPFS=y +# CONFIG_MISC_FILESYSTEMS is not set +CONFIG_NFS_FS=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +# CONFIG_DEBUG_PREEMPT is not set +CONFIG_XZ_DEC=y diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig new file mode 100644 index 000000000000..dc6f74f41283 --- /dev/null +++ b/arch/arc/configs/nsim_hs_smp_defconfig @@ -0,0 +1,63 @@ +CONFIG_CROSS_COMPILE="arc-linux-uclibc-" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_DEFAULT_HOSTNAME="ARCLinux" +# CONFIG_SWAP is not set +CONFIG_HIGH_RES_TIMERS=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_NAMESPACES=y +# CONFIG_UTS_NS is not set +# CONFIG_PID_NS is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/" +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +# CONFIG_SLUB_DEBUG is not set +# CONFIG_COMPAT_BRK is not set +CONFIG_KPROBES=y +CONFIG_MODULES=y +# CONFIG_LBDAF is not set +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +CONFIG_ARC_PLAT_SIM=y +CONFIG_ARC_BOARD_ML509=y +CONFIG_ISA_ARCV2=y +CONFIG_SMP=y +CONFIG_ARC_BUILTIN_DTB_NAME="nsim_hs_idu" +CONFIG_PREEMPT=y +# CONFIG_COMPACTION is not set +# CONFIG_CROSS_MEMORY_ATTACH is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_UNIX_DIAG=y +CONFIG_NET_KEY=y +CONFIG_INET=y +# CONFIG_IPV6 is not set +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FIRMWARE_IN_KERNEL is not set +# CONFIG_BLK_DEV is not set +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_ARC=y +CONFIG_SERIAL_ARC_CONSOLE=y +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +# CONFIG_VGA_CONSOLE is not set +# CONFIG_HID is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_TMPFS=y +# CONFIG_MISC_FILESYSTEMS is not set +CONFIG_NFS_FS=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_XZ_DEC=y diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index d2ac4e56ba1d..31e1d95764ff 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -1,4 +1,4 @@ -CONFIG_CROSS_COMPILE="arc-linux-uclibc-" +CONFIG_CROSS_COMPILE="arc-linux-" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set @@ -23,8 +23,7 @@ CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set -CONFIG_ARC_PLAT_FPGA_LEGACY=y -# CONFIG_ARC_HAS_RTSC is not set +CONFIG_ARC_PLAT_SIM=y CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci" # CONFIG_COMPACTION is not set CONFIG_NET=y diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig new file mode 100644 index 000000000000..3fef0a210c56 --- /dev/null +++ b/arch/arc/configs/nsimosci_hs_defconfig @@ -0,0 +1,73 @@ +CONFIG_CROSS_COMPILE="arc-linux-uclibc-" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_DEFAULT_HOSTNAME="ARCLinux" +# CONFIG_SWAP is not set +CONFIG_SYSVIPC=y +# CONFIG_CROSS_MEMORY_ATTACH is not set +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_NAMESPACES=y +# CONFIG_UTS_NS is not set +# CONFIG_PID_NS is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/" +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +# CONFIG_SLUB_DEBUG is not set +# CONFIG_COMPAT_BRK is not set +CONFIG_KPROBES=y +CONFIG_MODULES=y +# CONFIG_LBDAF is not set +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +CONFIG_ARC_PLAT_SIM=y +CONFIG_ISA_ARCV2=y +CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs" +# CONFIG_COMPACTION is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_UNIX_DIAG=y +CONFIG_NET_KEY=y +CONFIG_INET=y +# CONFIG_IPV6 is not set +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FIRMWARE_IN_KERNEL is not set +# CONFIG_BLK_DEV is not set +CONFIG_NETDEVICES=y +CONFIG_NET_OSCI_LAN=y +CONFIG_INPUT_EVDEV=y +# CONFIG_MOUSE_PS2_ALPS is not set +# CONFIG_MOUSE_PS2_LOGIPS2PP is not set +# CONFIG_MOUSE_PS2_SYNAPTICS is not set +# CONFIG_MOUSE_PS2_TRACKPOINT is not set +CONFIG_MOUSE_PS2_TOUCHKIT=y +# CONFIG_SERIO_SERPORT is not set +CONFIG_SERIO_ARC_PS2=y +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=1 +CONFIG_SERIAL_8250_RUNTIME_UARTS=1 +CONFIG_SERIAL_OF_PLATFORM=y +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +CONFIG_FB=y +# CONFIG_VGA_CONSOLE is not set +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_LOGO=y +# CONFIG_HID is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_TMPFS=y +# CONFIG_MISC_FILESYSTEMS is not set +CONFIG_NFS_FS=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig new file mode 100644 index 000000000000..51784837daae --- /dev/null +++ b/arch/arc/configs/nsimosci_hs_smp_defconfig @@ -0,0 +1,93 @@ +CONFIG_CROSS_COMPILE="arc-linux-uclibc-" +CONFIG_DEFAULT_HOSTNAME="ARCLinux" +# CONFIG_SWAP is not set +CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +# CONFIG_UTS_NS is not set +# CONFIG_PID_NS is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/" +# CONFIG_COMPAT_BRK is not set +CONFIG_KPROBES=y +CONFIG_MODULES=y +# CONFIG_LBDAF is not set +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +CONFIG_ARC_PLAT_SIM=y +CONFIG_ARC_BOARD_ML509=y +CONFIG_ISA_ARCV2=y +CONFIG_SMP=y +CONFIG_ARC_HAS_LL64=y +# CONFIG_ARC_HAS_RTSC is not set +CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs_idu" +CONFIG_PREEMPT=y +# CONFIG_COMPACTION is not set +# CONFIG_CROSS_MEMORY_ATTACH is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_PACKET_DIAG=y +CONFIG_UNIX=y +CONFIG_UNIX_DIAG=y +CONFIG_NET_KEY=y +CONFIG_INET=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +# CONFIG_IPV6 is not set +# CONFIG_WIRELESS is not set +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FIRMWARE_IN_KERNEL is not set +# CONFIG_BLK_DEV is not set +CONFIG_NETDEVICES=y +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_CADENCE is not set +# CONFIG_NET_VENDOR_BROADCOM is not set +# CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MARVELL is not set +# CONFIG_NET_VENDOR_MICREL is not set +# CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_SEEQ is not set +# CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_VIA is not set +# CONFIG_NET_VENDOR_WIZNET is not set +CONFIG_NET_OSCI_LAN=y +# CONFIG_WLAN is not set +CONFIG_INPUT_EVDEV=y +CONFIG_MOUSE_PS2_TOUCHKIT=y +# CONFIG_SERIO_SERPORT is not set +CONFIG_SERIO_LIBPS2=y +CONFIG_SERIO_ARC_PS2=y +CONFIG_VT_HW_CONSOLE_BINDING=y +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=1 +CONFIG_SERIAL_8250_RUNTIME_UARTS=1 +CONFIG_SERIAL_8250_DW=y +CONFIG_SERIAL_OF_PLATFORM=y +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +CONFIG_FB=y +CONFIG_ARCPGU_RGB888=y +CONFIG_ARCPGU_DISPTYPE=0 +# CONFIG_VGA_CONSOLE is not set +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_LOGO=y +# CONFIG_HID is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_TMPFS=y +# CONFIG_MISC_FILESYSTEMS is not set +CONFIG_NFS_FS=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FTRACE=y diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig index 6be6492442d6..3b4dc9cebcf1 100644 --- a/arch/arc/configs/tb10x_defconfig +++ b/arch/arc/configs/tb10x_defconfig @@ -1,4 +1,4 @@ -CONFIG_CROSS_COMPILE="arc-linux-uclibc-" +CONFIG_CROSS_COMPILE="arc-linux-" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_DEFAULT_HOSTNAME="tb10x" CONFIG_SYSVIPC=y @@ -26,7 +26,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLOCK is not set CONFIG_ARC_PLAT_TB10X=y CONFIG_ARC_CACHE_LINE_SHIFT=5 -# CONFIG_ARC_HAS_RTSC is not set CONFIG_ARC_STACK_NONEXEC=y CONFIG_HZ=250 CONFIG_ARC_BUILTIN_DTB_NAME="abilis_tb100_dvk" diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig new file mode 100644 index 000000000000..ef35ef3923dd --- /dev/null +++ b/arch/arc/configs/vdk_hs38_defconfig @@ -0,0 +1,102 @@ +CONFIG_CROSS_COMPILE="arc-linux-uclibc-" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_DEFAULT_HOSTNAME="ARCLinux" +# CONFIG_CROSS_MEMORY_ATTACH is not set +CONFIG_HIGH_RES_TIMERS=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_EMBEDDED=y +CONFIG_PERF_EVENTS=y +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_SLUB_DEBUG is not set +# CONFIG_COMPAT_BRK is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_ARC_PLAT_AXS10X=y +CONFIG_AXS103=y +CONFIG_ISA_ARCV2=y +CONFIG_ARC_UBOOT_SUPPORT=y +CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38" +CONFIG_PREEMPT=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +# CONFIG_IPV6 is not set +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_MTD=y +CONFIG_MTD_CMDLINE_PARTS=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_SLRAM=y +CONFIG_BLK_DEV_RAM=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_NETDEVICES=y +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_VENDOR_BROADCOM is not set +# CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MARVELL is not set +# CONFIG_NET_VENDOR_MICREL is not set +# CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_SEEQ is not set +CONFIG_STMMAC_ETH=y +# CONFIG_NET_VENDOR_VIA is not set +# CONFIG_NET_VENDOR_WIZNET is not set +CONFIG_NATIONAL_PHY=y +CONFIG_MOUSE_PS2_TOUCHKIT=y +CONFIG_SERIO_ARC_PS2=y +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_DW=y +CONFIG_SERIAL_OF_PLATFORM=y +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +CONFIG_FB=y +CONFIG_ARCPGU_RGB888=y +CONFIG_ARCPGU_DISPTYPE=0 +# CONFIG_VGA_CONSOLE is not set +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +# CONFIG_LOGO_LINUX_CLUT224 is not set +CONFIG_USB=y +CONFIG_USB_EHCI_HCD=y +# CONFIG_USB_EHCI_TT_NEWSCHED is not set +CONFIG_USB_EHCI_HCD_PLATFORM=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_OHCI_HCD_PLATFORM=y +CONFIG_USB_STORAGE=y +CONFIG_USB_SERIAL=y +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_NTFS_FS=y +CONFIG_TMPFS=y +CONFIG_JFFS2_FS=y +CONFIG_NFS_FS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_STRIP_ASM_SYMS=y +CONFIG_DEBUG_SHIRQ=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10 +# CONFIG_SCHED_DEBUG is not set +# CONFIG_DEBUG_PREEMPT is not set +# CONFIG_FTRACE is not set diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig new file mode 100644 index 000000000000..634509e5e572 --- /dev/null +++ b/arch/arc/configs/vdk_hs38_smp_defconfig @@ -0,0 +1,104 @@ +CONFIG_CROSS_COMPILE="arc-linux-uclibc-" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_DEFAULT_HOSTNAME="ARCLinux" +# CONFIG_CROSS_MEMORY_ATTACH is not set +CONFIG_HIGH_RES_TIMERS=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_EMBEDDED=y +CONFIG_PERF_EVENTS=y +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_SLUB_DEBUG is not set +# CONFIG_COMPAT_BRK is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_ARC_PLAT_AXS10X=y +CONFIG_AXS103=y +CONFIG_ISA_ARCV2=y +CONFIG_SMP=y +# CONFIG_ARC_HAS_GRTC is not set +CONFIG_ARC_UBOOT_SUPPORT=y +CONFIG_ARC_BUILTIN_DTB_NAME="vdk_hs38_smp" +CONFIG_PREEMPT=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +# CONFIG_IPV6 is not set +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_MTD=y +CONFIG_MTD_CMDLINE_PARTS=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_SLRAM=y +CONFIG_BLK_DEV_RAM=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_NETDEVICES=y +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_VENDOR_BROADCOM is not set +# CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MARVELL is not set +# CONFIG_NET_VENDOR_MICREL is not set +# CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_SEEQ is not set +CONFIG_STMMAC_ETH=y +# CONFIG_NET_VENDOR_VIA is not set +# CONFIG_NET_VENDOR_WIZNET is not set +CONFIG_NATIONAL_PHY=y +CONFIG_MOUSE_PS2_TOUCHKIT=y +CONFIG_SERIO_ARC_PS2=y +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_DW=y +CONFIG_SERIAL_OF_PLATFORM=y +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +CONFIG_FB=y +CONFIG_ARCPGU_RGB888=y +CONFIG_ARCPGU_DISPTYPE=0 +# CONFIG_VGA_CONSOLE is not set +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +# CONFIG_LOGO_LINUX_CLUT224 is not set +CONFIG_USB=y +CONFIG_USB_EHCI_HCD=y +# CONFIG_USB_EHCI_TT_NEWSCHED is not set +CONFIG_USB_EHCI_HCD_PLATFORM=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_OHCI_HCD_PLATFORM=y +CONFIG_USB_STORAGE=y +CONFIG_USB_SERIAL=y +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_NTFS_FS=y +CONFIG_TMPFS=y +CONFIG_JFFS2_FS=y +CONFIG_NFS_FS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_STRIP_ASM_SYMS=y +CONFIG_DEBUG_SHIRQ=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10 +# CONFIG_SCHED_DEBUG is not set +# CONFIG_DEBUG_PREEMPT is not set +# CONFIG_FTRACE is not set diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index 769b312c1abb..1a80cc91a03b 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -1,5 +1,4 @@ generic-y += auxvec.h -generic-y += barrier.h generic-y += bitsperlong.h generic-y += bugs.h generic-y += clkdev.h diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index e2b1b1211b0d..070f58827a5c 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -16,6 +16,8 @@ #define ARC_REG_PERIBASE_BCR 0x69 #define ARC_REG_FP_BCR 0x6B /* ARCompact: Single-Precision FPU */ #define ARC_REG_DPFP_BCR 0x6C /* ARCompact: Dbl Precision FPU */ +#define ARC_REG_FP_V2_BCR 0xc8 /* ARCv2 FPU */ +#define ARC_REG_SLC_BCR 0xce #define ARC_REG_DCCM_BCR 0x74 /* DCCM Present + SZ */ #define ARC_REG_TIMERS_BCR 0x75 #define ARC_REG_AP_BCR 0x76 @@ -31,6 +33,7 @@ #define ARC_REG_BPU_BCR 0xc0 #define ARC_REG_ISA_CFG_BCR 0xc1 #define ARC_REG_RTT_BCR 0xF2 +#define ARC_REG_IRQ_BCR 0xF3 #define ARC_REG_SMART_BCR 0xFF /* status32 Bits Positions */ @@ -51,6 +54,7 @@ * [15: 8] = Exception Cause Code * [ 7: 0] = Exception Parameters (for certain types only) */ +#ifdef CONFIG_ISA_ARCOMPACT #define ECR_V_MEM_ERR 0x01 #define ECR_V_INSN_ERR 0x02 #define ECR_V_MACH_CHK 0x20 @@ -58,6 +62,15 @@ #define ECR_V_DTLB_MISS 0x22 #define ECR_V_PROTV 0x23 #define ECR_V_TRAP 0x25 +#else +#define ECR_V_MEM_ERR 0x01 +#define ECR_V_INSN_ERR 0x02 +#define ECR_V_MACH_CHK 0x03 +#define ECR_V_ITLB_MISS 0x04 +#define ECR_V_DTLB_MISS 0x05 +#define ECR_V_PROTV 0x06 +#define ECR_V_TRAP 0x09 +#endif /* DTLB Miss and Protection Violation Cause Codes */ @@ -76,9 +89,6 @@ #define ECR_C_BIT_DTLB_LD_MISS 8 #define ECR_C_BIT_DTLB_ST_MISS 9 -/* Dummy ECR values for Interrupts */ -#define event_IRQ1 0x0031abcd -#define event_IRQ2 0x0032abcd /* Auxiliary registers */ #define AUX_IDENTITY 4 @@ -204,9 +214,11 @@ struct bcr_identity { struct bcr_isa { #ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad1:23, atomic1:1, ver:8; + unsigned int div_rem:4, pad2:4, ldd:1, unalign:1, atomic:1, be:1, + pad1:11, atomic1:1, ver:8; #else - unsigned int ver:8, atomic1:1, pad1:23; + unsigned int ver:8, atomic1:1, pad1:11, be:1, atomic:1, unalign:1, + ldd:1, pad2:4, div_rem:4; #endif }; @@ -269,11 +281,19 @@ struct bcr_fp_arcompact { #endif }; +struct bcr_fp_arcv2 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad2:15, dp:1, pad1:7, sp:1, ver:8; +#else + unsigned int ver:8, sp:1, pad1:7, dp:1, pad2:15; +#endif +}; + struct bcr_timer { #ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad2:15, rtsc:1, pad1:6, t1:1, t0:1, ver:8; + unsigned int pad2:15, rtsc:1, pad1:5, rtc:1, t1:1, t0:1, ver:8; #else - unsigned int ver:8, t0:1, t1:1, pad1:6, rtsc:1, pad2:15; + unsigned int ver:8, t0:1, t1:1, rtc:1, pad1:5, rtsc:1, pad2:15; #endif }; @@ -285,6 +305,14 @@ struct bcr_bpu_arcompact { #endif }; +struct bcr_bpu_arcv2 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:6, fbe:2, tqe:2, ts:4, ft:1, rse:2, pte:3, bce:3, ver:8; +#else + unsigned int ver:8, bce:3, pte:3, rse:2, ft:1, ts:4, tqe:2, fbe:2, pad:6; +#endif +}; + struct bcr_generic { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int pad:24, ver:8; @@ -299,11 +327,12 @@ struct bcr_generic { */ struct cpuinfo_arc_mmu { - unsigned int ver, pg_sz, sets, ways, u_dtlb, u_itlb, num_tlb; + unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, u_dtlb:6, u_itlb:6; + unsigned int num_tlb:16, sets:12, ways:4; }; struct cpuinfo_arc_cache { - unsigned int sz_k:8, line_len:8, assoc:4, ver:4, alias:1, vipt:1, pad:6; + unsigned int sz_k:14, line_len:8, assoc:4, ver:4, alias:1, vipt:1; }; struct cpuinfo_arc_bpu { @@ -315,14 +344,13 @@ struct cpuinfo_arc_ccm { }; struct cpuinfo_arc { - struct cpuinfo_arc_cache icache, dcache; + struct cpuinfo_arc_cache icache, dcache, slc; struct cpuinfo_arc_mmu mmu; struct cpuinfo_arc_bpu bpu; struct bcr_identity core; struct bcr_isa isa; struct bcr_timer timers; unsigned int vec_base; - unsigned int uncached_base; struct cpuinfo_arc_ccm iccm, dccm; struct { unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, pad1:3, @@ -336,6 +364,22 @@ struct cpuinfo_arc { extern struct cpuinfo_arc cpuinfo_arc700[]; +static inline int is_isa_arcv2(void) +{ + return IS_ENABLED(CONFIG_ISA_ARCV2); +} + +static inline int is_isa_arcompact(void) +{ + return IS_ENABLED(CONFIG_ISA_ARCOMPACT); +} + +#if defined(CONFIG_ISA_ARCOMPACT) && !defined(_CPU_DEFAULT_A7) +#error "Toolchain not configured for ARCompact builds" +#elif defined(CONFIG_ISA_ARCV2) && !defined(_CPU_DEFAULT_HS) +#error "Toolchain not configured for ARCv2 builds" +#endif + #endif /* __ASEMBLY__ */ #endif /* _ASM_ARC_ARCREGS_H */ diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 9917a45fc430..03484cb4d16d 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -23,13 +23,21 @@ #define atomic_set(v, i) (((v)->counter) = (i)) +#ifdef CONFIG_ISA_ARCV2 +#define PREFETCHW " prefetchw [%1] \n" +#else +#define PREFETCHW +#endif + #define ATOMIC_OP(op, c_op, asm_op) \ static inline void atomic_##op(int i, atomic_t *v) \ { \ unsigned int temp; \ \ __asm__ __volatile__( \ - "1: llock %0, [%1] \n" \ + "1: \n" \ + PREFETCHW \ + " llock %0, [%1] \n" \ " " #asm_op " %0, %0, %2 \n" \ " scond %0, [%1] \n" \ " bnz 1b \n" \ @@ -43,8 +51,16 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ { \ unsigned int temp; \ \ + /* \ + * Explicit full memory barrier needed before/after as \ + * LLOCK/SCOND thmeselves don't provide any such semantics \ + */ \ + smp_mb(); \ + \ __asm__ __volatile__( \ - "1: llock %0, [%1] \n" \ + "1: \n" \ + PREFETCHW \ + " llock %0, [%1] \n" \ " " #asm_op " %0, %0, %2 \n" \ " scond %0, [%1] \n" \ " bnz 1b \n" \ @@ -52,6 +68,8 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ : "r"(&v->counter), "ir"(i) \ : "cc"); \ \ + smp_mb(); \ + \ return temp; \ } @@ -105,6 +123,9 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ unsigned long flags; \ unsigned long temp; \ \ + /* \ + * spin lock/unlock provides the needed smp_mb() before/after \ + */ \ atomic_ops_lock(flags); \ temp = v->counter; \ temp c_op i; \ @@ -142,9 +163,19 @@ ATOMIC_OP(and, &=, and) #define __atomic_add_unless(v, a, u) \ ({ \ int c, old; \ + \ + /* \ + * Explicit full memory barrier needed before/after as \ + * LLOCK/SCOND thmeselves don't provide any such semantics \ + */ \ + smp_mb(); \ + \ c = atomic_read(v); \ while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c)\ c = old; \ + \ + smp_mb(); \ + \ c; \ }) diff --git a/arch/arc/include/asm/barrier.h b/arch/arc/include/asm/barrier.h new file mode 100644 index 000000000000..a7209983ee64 --- /dev/null +++ b/arch/arc/include/asm/barrier.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_BARRIER_H +#define __ASM_BARRIER_H + +#ifdef CONFIG_ISA_ARCV2 + +/* + * ARCv2 based HS38 cores are in-order issue, but still weakly ordered + * due to micro-arch buffering/queuing of load/store, cache hit vs. miss ... + * + * Explicit barrier provided by DMB instruction + * - Operand supports fine grained load/store/load+store semantics + * - Ensures that selected memory operation issued before it will complete + * before any subsequent memory operation of same type + * - DMB guarantees SMP as well as local barrier semantics + * (asm-generic/barrier.h ensures sane smp_*mb if not defined here, i.e. + * UP: barrier(), SMP: smp_*mb == *mb) + * - DSYNC provides DMB+completion_of_cache_bpu_maintenance_ops hence not needed + * in the general case. Plus it only provides full barrier. + */ + +#define mb() asm volatile("dmb 3\n" : : : "memory") +#define rmb() asm volatile("dmb 1\n" : : : "memory") +#define wmb() asm volatile("dmb 2\n" : : : "memory") + +#endif + +#ifdef CONFIG_ISA_ARCOMPACT + +/* + * ARCompact based cores (ARC700) only have SYNC instruction which is super + * heavy weight as it flushes the pipeline as well. + * There are no real SMP implementations of such cores. + */ + +#define mb() asm volatile("sync\n" : : : "memory") +#endif + +#include <asm-generic/barrier.h> + +#endif diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index 4051e9525939..99fe118d3730 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -18,83 +18,50 @@ #include <linux/types.h> #include <linux/compiler.h> #include <asm/barrier.h> +#ifndef CONFIG_ARC_HAS_LLSC +#include <asm/smp.h> +#endif -/* - * Hardware assisted read-modify-write using ARC700 LLOCK/SCOND insns. - * The Kconfig glue ensures that in SMP, this is only set if the container - * SoC/platform has cross-core coherent LLOCK/SCOND - */ #if defined(CONFIG_ARC_HAS_LLSC) -static inline void set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned int temp; - - m += nr >> 5; - - /* - * ARC ISA micro-optimization: - * - * Instructions dealing with bitpos only consider lower 5 bits (0-31) - * e.g (x << 33) is handled like (x << 1) by ASL instruction - * (mem pointer still needs adjustment to point to next word) - * - * Hence the masking to clamp @nr arg can be elided in general. - * - * However if @nr is a constant (above assumed it in a register), - * and greater than 31, gcc can optimize away (x << 33) to 0, - * as overflow, given the 32-bit ISA. Thus masking needs to be done - * for constant @nr, but no code is generated due to const prop. - */ - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - __asm__ __volatile__( - "1: llock %0, [%1] \n" - " bset %0, %0, %2 \n" - " scond %0, [%1] \n" - " bnz 1b \n" - : "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); -} - -static inline void clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned int temp; - - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - __asm__ __volatile__( - "1: llock %0, [%1] \n" - " bclr %0, %0, %2 \n" - " scond %0, [%1] \n" - " bnz 1b \n" - : "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); -} - -static inline void change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned int temp; - - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; +/* + * Hardware assisted Atomic-R-M-W + */ - __asm__ __volatile__( - "1: llock %0, [%1] \n" - " bxor %0, %0, %2 \n" - " scond %0, [%1] \n" - " bnz 1b \n" - : "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); +#define BIT_OP(op, c_op, asm_op) \ +static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\ +{ \ + unsigned int temp; \ + \ + m += nr >> 5; \ + \ + /* \ + * ARC ISA micro-optimization: \ + * \ + * Instructions dealing with bitpos only consider lower 5 bits \ + * e.g (x << 33) is handled like (x << 1) by ASL instruction \ + * (mem pointer still needs adjustment to point to next word) \ + * \ + * Hence the masking to clamp @nr arg can be elided in general. \ + * \ + * However if @nr is a constant (above assumed in a register), \ + * and greater than 31, gcc can optimize away (x << 33) to 0, \ + * as overflow, given the 32-bit ISA. Thus masking needs to be \ + * done for const @nr, but no code is generated due to gcc \ + * const prop. \ + */ \ + if (__builtin_constant_p(nr)) \ + nr &= 0x1f; \ + \ + __asm__ __volatile__( \ + "1: llock %0, [%1] \n" \ + " " #asm_op " %0, %0, %2 \n" \ + " scond %0, [%1] \n" \ + " bnz 1b \n" \ + : "=&r"(temp) /* Early clobber, to prevent reg reuse */ \ + : "r"(m), /* Not "m": llock only supports reg direct addr mode */ \ + "ir"(nr) \ + : "cc"); \ } /* @@ -108,75 +75,38 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *m) * Since ARC lacks a equivalent h/w primitive, the bit is set unconditionally * and the old value of bit is returned */ -static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old, temp; - - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - __asm__ __volatile__( - "1: llock %0, [%2] \n" - " bset %1, %0, %3 \n" - " scond %1, [%2] \n" - " bnz 1b \n" - : "=&r"(old), "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); - - return (old & (1 << nr)) != 0; -} - -static inline int -test_and_clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned int old, temp; - - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - __asm__ __volatile__( - "1: llock %0, [%2] \n" - " bclr %1, %0, %3 \n" - " scond %1, [%2] \n" - " bnz 1b \n" - : "=&r"(old), "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); - - return (old & (1 << nr)) != 0; -} - -static inline int -test_and_change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned int old, temp; - - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - __asm__ __volatile__( - "1: llock %0, [%2] \n" - " bxor %1, %0, %3 \n" - " scond %1, [%2] \n" - " bnz 1b \n" - : "=&r"(old), "=&r"(temp) - : "r"(m), "ir"(nr) - : "cc"); - - return (old & (1 << nr)) != 0; +#define TEST_N_BIT_OP(op, c_op, asm_op) \ +static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\ +{ \ + unsigned long old, temp; \ + \ + m += nr >> 5; \ + \ + if (__builtin_constant_p(nr)) \ + nr &= 0x1f; \ + \ + /* \ + * Explicit full memory barrier needed before/after as \ + * LLOCK/SCOND themselves don't provide any such smenatic \ + */ \ + smp_mb(); \ + \ + __asm__ __volatile__( \ + "1: llock %0, [%2] \n" \ + " " #asm_op " %1, %0, %3 \n" \ + " scond %1, [%2] \n" \ + " bnz 1b \n" \ + : "=&r"(old), "=&r"(temp) \ + : "r"(m), "ir"(nr) \ + : "cc"); \ + \ + smp_mb(); \ + \ + return (old & (1 << nr)) != 0; \ } #else /* !CONFIG_ARC_HAS_LLSC */ -#include <asm/smp.h> - /* * Non hardware assisted Atomic-R-M-W * Locking would change to irq-disabling only (UP) and spinlocks (SMP) @@ -193,108 +123,43 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m) * at compile time) */ -static inline void set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - bitops_lock(flags); - - temp = *m; - *m = temp | (1UL << nr); - - bitops_unlock(flags); -} - -static inline void clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - bitops_lock(flags); - - temp = *m; - *m = temp & ~(1UL << nr); - - bitops_unlock(flags); -} - -static inline void change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - bitops_lock(flags); - - temp = *m; - *m = temp ^ (1UL << nr); - - bitops_unlock(flags); -} - -static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - bitops_lock(flags); - - old = *m; - *m = old | (1 << nr); - - bitops_unlock(flags); - - return (old & (1 << nr)) != 0; -} - -static inline int -test_and_clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - bitops_lock(flags); - - old = *m; - *m = old & ~(1 << nr); - - bitops_unlock(flags); - - return (old & (1 << nr)) != 0; +#define BIT_OP(op, c_op, asm_op) \ +static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\ +{ \ + unsigned long temp, flags; \ + m += nr >> 5; \ + \ + if (__builtin_constant_p(nr)) \ + nr &= 0x1f; \ + \ + /* \ + * spin lock/unlock provide the needed smp_mb() before/after \ + */ \ + bitops_lock(flags); \ + \ + temp = *m; \ + *m = temp c_op (1UL << nr); \ + \ + bitops_unlock(flags); \ } -static inline int -test_and_change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old, flags; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - bitops_lock(flags); - - old = *m; - *m = old ^ (1 << nr); - - bitops_unlock(flags); - - return (old & (1 << nr)) != 0; +#define TEST_N_BIT_OP(op, c_op, asm_op) \ +static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\ +{ \ + unsigned long old, flags; \ + m += nr >> 5; \ + \ + if (__builtin_constant_p(nr)) \ + nr &= 0x1f; \ + \ + bitops_lock(flags); \ + \ + old = *m; \ + *m = old c_op (1 << nr); \ + \ + bitops_unlock(flags); \ + \ + return (old & (1 << nr)) != 0; \ } #endif /* CONFIG_ARC_HAS_LLSC */ @@ -303,86 +168,51 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *m) * Non atomic variants **************************************/ -static inline void __set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - temp = *m; - *m = temp | (1UL << nr); -} - -static inline void __clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - temp = *m; - *m = temp & ~(1UL << nr); -} - -static inline void __change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long temp; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - temp = *m; - *m = temp ^ (1UL << nr); -} - -static inline int -__test_and_set_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - old = *m; - *m = old | (1 << nr); - - return (old & (1 << nr)) != 0; +#define __BIT_OP(op, c_op, asm_op) \ +static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m) \ +{ \ + unsigned long temp; \ + m += nr >> 5; \ + \ + if (__builtin_constant_p(nr)) \ + nr &= 0x1f; \ + \ + temp = *m; \ + *m = temp c_op (1UL << nr); \ } -static inline int -__test_and_clear_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - old = *m; - *m = old & ~(1 << nr); - - return (old & (1 << nr)) != 0; +#define __TEST_N_BIT_OP(op, c_op, asm_op) \ +static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\ +{ \ + unsigned long old; \ + m += nr >> 5; \ + \ + if (__builtin_constant_p(nr)) \ + nr &= 0x1f; \ + \ + old = *m; \ + *m = old c_op (1 << nr); \ + \ + return (old & (1 << nr)) != 0; \ } -static inline int -__test_and_change_bit(unsigned long nr, volatile unsigned long *m) -{ - unsigned long old; - m += nr >> 5; - - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - old = *m; - *m = old ^ (1 << nr); - - return (old & (1 << nr)) != 0; -} +#define BIT_OPS(op, c_op, asm_op) \ + \ + /* set_bit(), clear_bit(), change_bit() */ \ + BIT_OP(op, c_op, asm_op) \ + \ + /* test_and_set_bit(), test_and_clear_bit(), test_and_change_bit() */\ + TEST_N_BIT_OP(op, c_op, asm_op) \ + \ + /* __set_bit(), __clear_bit(), __change_bit() */ \ + __BIT_OP(op, c_op, asm_op) \ + \ + /* __test_and_set_bit(), __test_and_clear_bit(), __test_and_change_bit() */\ + __TEST_N_BIT_OP(op, c_op, asm_op) + +BIT_OPS(set, |, bset) +BIT_OPS(clear, & ~, bclr) +BIT_OPS(change, ^, bxor) /* * This routine doesn't need to be atomic. @@ -402,6 +232,8 @@ test_bit(unsigned int nr, const volatile unsigned long *addr) return ((mask & *addr) != 0); } +#ifdef CONFIG_ISA_ARCOMPACT + /* * Count the number of zeros, starting from MSB * Helper for fls( ) friends @@ -494,6 +326,75 @@ static inline __attribute__ ((const)) int __ffs(unsigned long word) return ffs(word) - 1; } +#else /* CONFIG_ISA_ARCV2 */ + +/* + * fls = Find Last Set in word + * @result: [1-32] + * fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0 + */ +static inline __attribute__ ((const)) int fls(unsigned long x) +{ + int n; + + asm volatile( + " fls.f %0, %1 \n" /* 0:31; 0(Z) if src 0 */ + " add.nz %0, %0, 1 \n" /* 0:31 -> 1:32 */ + : "=r"(n) /* Early clobber not needed */ + : "r"(x) + : "cc"); + + return n; +} + +/* + * __fls: Similar to fls, but zero based (0-31). Also 0 if no bit set + */ +static inline __attribute__ ((const)) int __fls(unsigned long x) +{ + /* FLS insn has exactly same semantics as the API */ + return __builtin_arc_fls(x); +} + +/* + * ffs = Find First Set in word (LSB to MSB) + * @result: [1-32], 0 if all 0's + */ +static inline __attribute__ ((const)) int ffs(unsigned long x) +{ + int n; + + asm volatile( + " ffs.f %0, %1 \n" /* 0:31; 31(Z) if src 0 */ + " add.nz %0, %0, 1 \n" /* 0:31 -> 1:32 */ + " mov.z %0, 0 \n" /* 31(Z)-> 0 */ + : "=r"(n) /* Early clobber not needed */ + : "r"(x) + : "cc"); + + return n; +} + +/* + * __ffs: Similar to ffs, but zero based (0-31) + */ +static inline __attribute__ ((const)) int __ffs(unsigned long x) +{ + int n; + + asm volatile( + " ffs.f %0, %1 \n" /* 0:31; 31(Z) if src 0 */ + " mov.z %0, 0 \n" /* 31(Z)-> 0 */ + : "=r"(n) + : "r"(x) + : "cc"); + + return n; + +} + +#endif /* CONFIG_ISA_ARCOMPACT */ + /* * ffz = Find First Zero in word. * @return:[0-31], 32 if all 1's diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index 7861255da32d..d67345d3e2d4 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -60,7 +60,7 @@ extern void read_decode_cache_bcr(void); #define ARC_REG_IC_IVIC 0x10 #define ARC_REG_IC_CTRL 0x11 #define ARC_REG_IC_IVIL 0x19 -#if defined(CONFIG_ARC_MMU_V3) +#if defined(CONFIG_ARC_MMU_V3) || defined(CONFIG_ARC_MMU_V4) #define ARC_REG_IC_PTAG 0x1E #endif @@ -74,12 +74,24 @@ extern void read_decode_cache_bcr(void); #define ARC_REG_DC_IVDL 0x4A #define ARC_REG_DC_FLSH 0x4B #define ARC_REG_DC_FLDL 0x4C -#if defined(CONFIG_ARC_MMU_V3) #define ARC_REG_DC_PTAG 0x5C -#endif /* Bit val in DC_CTRL */ #define DC_CTRL_INV_MODE_FLUSH 0x40 #define DC_CTRL_FLUSH_STATUS 0x100 +/*System-level cache (L2 cache) related Auxiliary registers */ +#define ARC_REG_SLC_CFG 0x901 +#define ARC_REG_SLC_CTRL 0x903 +#define ARC_REG_SLC_FLUSH 0x904 +#define ARC_REG_SLC_INVALIDATE 0x905 +#define ARC_REG_SLC_RGN_START 0x914 +#define ARC_REG_SLC_RGN_END 0x916 + +/* Bit val in SLC_CONTROL */ +#define SLC_CTRL_IM 0x040 +#define SLC_CTRL_DISABLE 0x001 +#define SLC_CTRL_BUSY 0x100 +#define SLC_CTRL_RGN_OP_INV 0x200 + #endif /* _ASM_CACHE_H */ diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h index 6abc4972bc93..0992d3dbcc65 100644 --- a/arch/arc/include/asm/cacheflush.h +++ b/arch/arc/include/asm/cacheflush.h @@ -34,9 +34,7 @@ void flush_cache_all(void); void flush_icache_range(unsigned long start, unsigned long end); void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len); void __inv_icache_page(unsigned long paddr, unsigned long vaddr); -void ___flush_dcache_page(unsigned long paddr, unsigned long vaddr); -#define __flush_dcache_page(p, v) \ - ___flush_dcache_page((unsigned long)p, (unsigned long)v) +void __flush_dcache_page(unsigned long paddr, unsigned long vaddr); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h index 03cd6894855d..44fd531f4d7b 100644 --- a/arch/arc/include/asm/cmpxchg.h +++ b/arch/arc/include/asm/cmpxchg.h @@ -10,6 +10,8 @@ #define __ASM_ARC_CMPXCHG_H #include <linux/types.h> + +#include <asm/barrier.h> #include <asm/smp.h> #ifdef CONFIG_ARC_HAS_LLSC @@ -19,16 +21,25 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) { unsigned long prev; + /* + * Explicit full memory barrier needed before/after as + * LLOCK/SCOND thmeselves don't provide any such semantics + */ + smp_mb(); + __asm__ __volatile__( "1: llock %0, [%1] \n" " brne %0, %2, 2f \n" " scond %3, [%1] \n" " bnz 1b \n" "2: \n" - : "=&r"(prev) - : "r"(ptr), "ir"(expected), - "r"(new) /* can't be "ir". scond can't take limm for "b" */ - : "cc"); + : "=&r"(prev) /* Early clobber, to prevent reg reuse */ + : "r"(ptr), /* Not "m": llock only supports reg direct addr mode */ + "ir"(expected), + "r"(new) /* can't be "ir". scond can't take LIMM for "b" */ + : "cc", "memory"); /* so that gcc knows memory is being written here */ + + smp_mb(); return prev; } @@ -42,6 +53,9 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) int prev; volatile unsigned long *p = ptr; + /* + * spin lock/unlock provide the needed smp_mb() before/after + */ atomic_ops_lock(flags); prev = *p; if (prev == expected) @@ -77,12 +91,16 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr, switch (size) { case 4: + smp_mb(); + __asm__ __volatile__( " ex %0, [%1] \n" : "+r"(val) : "r"(ptr) : "memory"); + smp_mb(); + return val; } return __xchg_bad_pointer(); diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h index 43de30256981..08e7e2a16ac1 100644 --- a/arch/arc/include/asm/delay.h +++ b/arch/arc/include/asm/delay.h @@ -22,11 +22,10 @@ static inline void __delay(unsigned long loops) { __asm__ __volatile__( - "1: sub.f %0, %0, 1 \n" - " jpnz 1b \n" - : "+r"(loops) - : - : "cc"); + " lp 1f \n" + " nop \n" + "1: \n" + : "+l"(loops)); } extern void __bad_udelay(void); diff --git a/arch/arc/include/asm/dma-mapping.h b/arch/arc/include/asm/dma-mapping.h index f787894613ad..2d28ba939d8e 100644 --- a/arch/arc/include/asm/dma-mapping.h +++ b/arch/arc/include/asm/dma-mapping.h @@ -14,23 +14,6 @@ #include <asm-generic/dma-coherent.h> #include <asm/cacheflush.h> -#ifndef CONFIG_ARC_PLAT_NEEDS_CPU_TO_DMA -/* - * dma_map_* API take cpu addresses, which is kernel logical address in the - * untranslated address space (0x8000_0000) based. The dma address (bus addr) - * ideally needs to be 0x0000_0000 based hence these glue routines. - * However given that intermediate bus bridges can ignore the high bit, we can - * do with these routines being no-ops. - * If a platform/device comes up which sriclty requires 0 based bus addr - * (e.g. AHB-PCI bridge on Angel4 board), then it can provide it's own versions - */ -#define plat_dma_addr_to_kernel(dev, addr) ((unsigned long)(addr)) -#define plat_kernel_addr_to_dma(dev, ptr) ((dma_addr_t)(ptr)) - -#else -#include <plat/dma_addr.h> -#endif - void *dma_alloc_noncoherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp); @@ -94,7 +77,7 @@ dma_map_single(struct device *dev, void *cpu_addr, size_t size, enum dma_data_direction dir) { _dma_cache_sync((unsigned long)cpu_addr, size, dir); - return plat_kernel_addr_to_dma(dev, cpu_addr); + return (dma_addr_t)cpu_addr; } static inline void @@ -147,16 +130,14 @@ static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) { - _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle), size, - DMA_FROM_DEVICE); + _dma_cache_sync(dma_handle, size, DMA_FROM_DEVICE); } static inline void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) { - _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle), size, - DMA_TO_DEVICE); + _dma_cache_sync(dma_handle, size, DMA_TO_DEVICE); } static inline void @@ -164,8 +145,7 @@ dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, unsigned long offset, size_t size, enum dma_data_direction direction) { - _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle) + offset, - size, DMA_FROM_DEVICE); + _dma_cache_sync(dma_handle + offset, size, DMA_FROM_DEVICE); } static inline void @@ -173,8 +153,7 @@ dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle, unsigned long offset, size_t size, enum dma_data_direction direction) { - _dma_cache_sync(plat_dma_addr_to_kernel(dev, dma_handle) + offset, - size, DMA_TO_DEVICE); + _dma_cache_sync(dma_handle + offset, size, DMA_TO_DEVICE); } static inline void diff --git a/arch/arc/include/asm/elf.h b/arch/arc/include/asm/elf.h index a26282857683..51a99e25fe33 100644 --- a/arch/arc/include/asm/elf.h +++ b/arch/arc/include/asm/elf.h @@ -15,6 +15,11 @@ /* These ELF defines belong to uapi but libc elf.h already defines them */ #define EM_ARCOMPACT 93 +#define EM_ARCV2 195 /* ARCv2 Cores */ + +#define EM_ARC_INUSE (IS_ENABLED(CONFIG_ISA_ARCOMPACT) ? \ + EM_ARCOMPACT : EM_ARCV2) + /* ARC Relocations (kernel Modules only) */ #define R_ARC_32 0x4 #define R_ARC_32_ME 0x1B diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h new file mode 100644 index 000000000000..b5ff87e6f4b7 --- /dev/null +++ b/arch/arc/include/asm/entry-arcv2.h @@ -0,0 +1,190 @@ + +#ifndef __ASM_ARC_ENTRY_ARCV2_H +#define __ASM_ARC_ENTRY_ARCV2_H + +#include <asm/asm-offsets.h> +#include <asm/irqflags-arcv2.h> +#include <asm/thread_info.h> /* For THREAD_SIZE */ + +/*------------------------------------------------------------------------*/ +.macro INTERRUPT_PROLOGUE called_from + + ; Before jumping to Interrupt Vector, hardware micro-ops did following: + ; 1. SP auto-switched to kernel mode stack + ; 2. STATUS32.Z flag set to U mode at time of interrupt (U:1, K:0) + ; 3. Auto saved: r0-r11, blink, LPE,LPS,LPC, JLI,LDI,EI, PC, STAT32 + ; + ; Now manually save: r12, sp, fp, gp, r25 + + PUSH r12 + + ; Saving pt_regs->sp correctly requires some extra work due to the way + ; Auto stack switch works + ; - U mode: retrieve it from AUX_USER_SP + ; - K mode: add the offset from current SP where H/w starts auto push + ; + ; Utilize the fact that Z bit is set if Intr taken in U mode + mov.nz r9, sp + add.nz r9, r9, SZ_PT_REGS - PT_sp - 4 + bnz 1f + + lr r9, [AUX_USER_SP] +1: + PUSH r9 ; SP + + PUSH fp + PUSH gp + +#ifdef CONFIG_ARC_CURR_IN_REG + PUSH r25 ; user_r25 + GET_CURR_TASK_ON_CPU r25 +#else + sub sp, sp, 4 +#endif + +.ifnc \called_from, exception + sub sp, sp, 12 ; BTA/ECR/orig_r0 placeholder per pt_regs +.endif + +.endm + +/*------------------------------------------------------------------------*/ +.macro INTERRUPT_EPILOGUE called_from + +.ifnc \called_from, exception + add sp, sp, 12 ; skip BTA/ECR/orig_r0 placeholderss +.endif + +#ifdef CONFIG_ARC_CURR_IN_REG + POP r25 +#else + add sp, sp, 4 +#endif + + POP gp + POP fp + + ; Don't touch AUX_USER_SP if returning to K mode (Z bit set) + ; (Z bit set on K mode is inverse of INTERRUPT_PROLOGUE) + add.z sp, sp, 4 + bz 1f + + POPAX AUX_USER_SP +1: + POP r12 + +.endm + +/*------------------------------------------------------------------------*/ +.macro EXCEPTION_PROLOGUE + + ; Before jumping to Exception Vector, hardware micro-ops did following: + ; 1. SP auto-switched to kernel mode stack + ; 2. STATUS32.Z flag set to U mode at time of interrupt (U:1,K:0) + ; + ; Now manually save the complete reg file + + PUSH r9 ; freeup a register: slot of erstatus + + PUSHAX eret + sub sp, sp, 12 ; skip JLI, LDI, EI + PUSH lp_count + PUSHAX lp_start + PUSHAX lp_end + PUSH blink + + PUSH r11 + PUSH r10 + + ld.as r9, [sp, 10] ; load stashed r9 (status32 stack slot) + lr r10, [erstatus] + st.as r10, [sp, 10] ; save status32 at it's right stack slot + + PUSH r9 + PUSH r8 + PUSH r7 + PUSH r6 + PUSH r5 + PUSH r4 + PUSH r3 + PUSH r2 + PUSH r1 + PUSH r0 + + ; -- for interrupts, regs above are auto-saved by h/w in that order -- + ; Now do what ISR prologue does (manually save r12, sp, fp, gp, r25) + ; + ; Set Z flag if this was from U mode (expected by INTERRUPT_PROLOGUE) + ; Although H/w exception micro-ops do set Z flag for U mode (just like + ; for interrupts), it could get clobbered in case we soft land here from + ; a TLB Miss exception handler (tlbex.S) + + and r10, r10, STATUS_U_MASK + xor.f 0, r10, STATUS_U_MASK + + INTERRUPT_PROLOGUE exception + + PUSHAX erbta + PUSHAX ecr ; r9 contains ECR, expected by EV_Trap + + PUSH r0 ; orig_r0 +.endm + +/*------------------------------------------------------------------------*/ +.macro EXCEPTION_EPILOGUE + + ; Assumes r0 has PT_status32 + btst r0, STATUS_U_BIT ; Z flag set if K, used in INTERRUPT_EPILOGUE + + add sp, sp, 8 ; orig_r0/ECR don't need restoring + POPAX erbta + + INTERRUPT_EPILOGUE exception + + POP r0 + POP r1 + POP r2 + POP r3 + POP r4 + POP r5 + POP r6 + POP r7 + POP r8 + POP r9 + POP r10 + POP r11 + + POP blink + POPAX lp_end + POPAX lp_start + + POP r9 + mov lp_count, r9 + + add sp, sp, 12 ; skip JLI, LDI, EI + POPAX eret + POPAX erstatus + + ld.as r9, [sp, -12] ; reload r9 which got clobbered +.endm + +.macro FAKE_RET_FROM_EXCPN + lr r9, [status32] + bic r9, r9, (STATUS_U_MASK|STATUS_DE_MASK|STATUS_AE_MASK) + or r9, r9, (STATUS_L_MASK|STATUS_IE_MASK) + kflag r9 +.endm + +/* Get thread_info of "current" tsk */ +.macro GET_CURR_THR_INFO_FROM_SP reg + bmskn \reg, sp, THREAD_SHIFT - 1 +.endm + +/* Get CPU-ID of this core */ +.macro GET_CPU_ID reg + lr \reg, [identity] + xbfu \reg, \reg, 0xE8 /* 00111 01000 */ + /* M = 8-1 N = 8 */ +.endm + +#endif diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h new file mode 100644 index 000000000000..415443c2a8c4 --- /dev/null +++ b/arch/arc/include/asm/entry-compact.h @@ -0,0 +1,307 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Vineetg: March 2009 (Supporting 2 levels of Interrupts) + * Stack switching code can no longer reliably rely on the fact that + * if we are NOT in user mode, stack is switched to kernel mode. + * e.g. L2 IRQ interrupted a L1 ISR which had not yet completed + * it's prologue including stack switching from user mode + * + * Vineetg: Aug 28th 2008: Bug #94984 + * -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap + * Normally CPU does this automatically, however when doing FAKE rtie, + * we also need to explicitly do this. The problem in macros + * FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit + * was being "CLEARED" rather then "SET". Actually "SET" clears ZOL context + * + * Vineetg: May 5th 2008 + * -Modified CALLEE_REG save/restore macros to handle the fact that + * r25 contains the kernel current task ptr + * - Defined Stack Switching Macro to be reused in all intr/excp hdlrs + * - Shaved off 11 instructions from RESTORE_ALL_INT1 by using the + * address Write back load ld.ab instead of seperate ld/add instn + * + * Amit Bhor, Sameer Dhavale: Codito Technologies 2004 + */ + +#ifndef __ASM_ARC_ENTRY_COMPACT_H +#define __ASM_ARC_ENTRY_COMPACT_H + +#include <asm/asm-offsets.h> +#include <asm/irqflags-compact.h> +#include <asm/thread_info.h> /* For THREAD_SIZE */ + +/*-------------------------------------------------------------- + * Switch to Kernel Mode stack if SP points to User Mode stack + * + * Entry : r9 contains pre-IRQ/exception/trap status32 + * Exit : SP set to K mode stack + * SP at the time of entry (K/U) saved @ pt_regs->sp + * Clobbers: r9 + *-------------------------------------------------------------*/ + +.macro SWITCH_TO_KERNEL_STK + + /* User Mode when this happened ? Yes: Proceed to switch stack */ + bbit1 r9, STATUS_U_BIT, 88f + + /* OK we were already in kernel mode when this event happened, thus can + * assume SP is kernel mode SP. _NO_ need to do any stack switching + */ + +#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS + /* However.... + * If Level 2 Interrupts enabled, we may end up with a corner case: + * 1. User Task executing + * 2. L1 IRQ taken, ISR starts (CPU auto-switched to KERNEL mode) + * 3. But before it could switch SP from USER to KERNEL stack + * a L2 IRQ "Interrupts" L1 + * Thay way although L2 IRQ happened in Kernel mode, stack is still + * not switched. + * To handle this, we may need to switch stack even if in kernel mode + * provided SP has values in range of USER mode stack ( < 0x7000_0000 ) + */ + brlo sp, VMALLOC_START, 88f + + /* TODO: vineetg: + * We need to be a bit more cautious here. What if a kernel bug in + * L1 ISR, caused SP to go whaco (some small value which looks like + * USER stk) and then we take L2 ISR. + * Above brlo alone would treat it as a valid L1-L2 sceanrio + * instead of shouting alound + * The only feasible way is to make sure this L2 happened in + * L1 prelogue ONLY i.e. ilink2 is less than a pre-set marker in + * L1 ISR before it switches stack + */ + +#endif + + /*------Intr/Ecxp happened in kernel mode, SP already setup ------ */ + /* save it nevertheless @ pt_regs->sp for uniformity */ + + b.d 66f + st sp, [sp, PT_sp - SZ_PT_REGS] + +88: /*------Intr/Ecxp happened in user mode, "switch" stack ------ */ + + GET_CURR_TASK_ON_CPU r9 + + /* With current tsk in r9, get it's kernel mode stack base */ + GET_TSK_STACK_BASE r9, r9 + + /* save U mode SP @ pt_regs->sp */ + st sp, [r9, PT_sp - SZ_PT_REGS] + + /* final SP switch */ + mov sp, r9 +66: +.endm + +/*------------------------------------------------------------ + * "FAKE" a rtie to return from CPU Exception context + * This is to re-enable Exceptions within exception + * Look at EV_ProtV to see how this is actually used + *-------------------------------------------------------------*/ + +.macro FAKE_RET_FROM_EXCPN + + ld r9, [sp, PT_status32] + bic r9, r9, (STATUS_U_MASK|STATUS_DE_MASK) + bset r9, r9, STATUS_L_BIT + sr r9, [erstatus] + mov r9, 55f + sr r9, [eret] + + rtie +55: +.endm + +/*-------------------------------------------------------------- + * For early Exception/ISR Prologue, a core reg is temporarily needed to + * code the rest of prolog (stack switching). This is done by stashing + * it to memory (non-SMP case) or SCRATCH0 Aux Reg (SMP). + * + * Before saving the full regfile - this reg is restored back, only + * to be saved again on kernel mode stack, as part of pt_regs. + *-------------------------------------------------------------*/ +.macro PROLOG_FREEUP_REG reg, mem +#ifdef CONFIG_SMP + sr \reg, [ARC_REG_SCRATCH_DATA0] +#else + st \reg, [\mem] +#endif +.endm + +.macro PROLOG_RESTORE_REG reg, mem +#ifdef CONFIG_SMP + lr \reg, [ARC_REG_SCRATCH_DATA0] +#else + ld \reg, [\mem] +#endif +.endm + +/*-------------------------------------------------------------- + * Exception Entry prologue + * -Switches stack to K mode (if not already) + * -Saves the register file + * + * After this it is safe to call the "C" handlers + *-------------------------------------------------------------*/ +.macro EXCEPTION_PROLOGUE + + /* Need at least 1 reg to code the early exception prologue */ + PROLOG_FREEUP_REG r9, @ex_saved_reg1 + + /* U/K mode at time of exception (stack not switched if already K) */ + lr r9, [erstatus] + + /* ARC700 doesn't provide auto-stack switching */ + SWITCH_TO_KERNEL_STK + +#ifdef CONFIG_ARC_CURR_IN_REG + /* Treat r25 as scratch reg (save on stack) and load with "current" */ + PUSH r25 + GET_CURR_TASK_ON_CPU r25 +#else + sub sp, sp, 4 +#endif + + st.a r0, [sp, -8] /* orig_r0 needed for syscall (skip ECR slot) */ + sub sp, sp, 4 /* skip pt_regs->sp, already saved above */ + + /* Restore r9 used to code the early prologue */ + PROLOG_RESTORE_REG r9, @ex_saved_reg1 + + /* now we are ready to save the regfile */ + SAVE_R0_TO_R12 + PUSH gp + PUSH fp + PUSH blink + PUSHAX eret + PUSHAX erstatus + PUSH lp_count + PUSHAX lp_end + PUSHAX lp_start + PUSHAX erbta + + lr r9, [ecr] + st r9, [sp, PT_event] /* EV_Trap expects r9 to have ECR */ +.endm + +/*-------------------------------------------------------------- + * Restore all registers used by system call or Exceptions + * SP should always be pointing to the next free stack element + * when entering this macro. + * + * NOTE: + * + * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg + * for memory load operations. If used in that way interrupts are deffered + * by hardware and that is not good. + *-------------------------------------------------------------*/ +.macro EXCEPTION_EPILOGUE + POPAX erbta + POPAX lp_start + POPAX lp_end + + POP r9 + mov lp_count, r9 ;LD to lp_count is not allowed + + POPAX erstatus + POPAX eret + POP blink + POP fp + POP gp + RESTORE_R12_TO_R0 + + ld sp, [sp] /* restore original sp */ + /* orig_r0, ECR, user_r25 skipped automatically */ +.endm + +/* Dummy ECR values for Interrupts */ +#define event_IRQ1 0x0031abcd +#define event_IRQ2 0x0032abcd + +.macro INTERRUPT_PROLOGUE LVL + + /* free up r9 as scratchpad */ + PROLOG_FREEUP_REG r9, @int\LVL\()_saved_reg + + /* Which mode (user/kernel) was the system in when intr occured */ + lr r9, [status32_l\LVL\()] + + SWITCH_TO_KERNEL_STK + +#ifdef CONFIG_ARC_CURR_IN_REG + /* Treat r25 as scratch reg (save on stack) and load with "current" */ + PUSH r25 + GET_CURR_TASK_ON_CPU r25 +#else + sub sp, sp, 4 +#endif + + PUSH 0x003\LVL\()abcd /* Dummy ECR */ + sub sp, sp, 8 /* skip orig_r0 (not needed) + skip pt_regs->sp, already saved above */ + + /* Restore r9 used to code the early prologue */ + PROLOG_RESTORE_REG r9, @int\LVL\()_saved_reg + + SAVE_R0_TO_R12 + PUSH gp + PUSH fp + PUSH blink + PUSH ilink\LVL\() + PUSHAX status32_l\LVL\() + PUSH lp_count + PUSHAX lp_end + PUSHAX lp_start + PUSHAX bta_l\LVL\() +.endm + +/*-------------------------------------------------------------- + * Restore all registers used by interrupt handlers. + * + * NOTE: + * + * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg + * for memory load operations. If used in that way interrupts are deffered + * by hardware and that is not good. + *-------------------------------------------------------------*/ +.macro INTERRUPT_EPILOGUE LVL + POPAX bta_l\LVL\() + POPAX lp_start + POPAX lp_end + + POP r9 + mov lp_count, r9 ;LD to lp_count is not allowed + + POPAX status32_l\LVL\() + POP ilink\LVL\() + POP blink + POP fp + POP gp + RESTORE_R12_TO_R0 + + ld sp, [sp] /* restore original sp */ + /* orig_r0, ECR, user_r25 skipped automatically */ +.endm + +/* Get thread_info of "current" tsk */ +.macro GET_CURR_THR_INFO_FROM_SP reg + bic \reg, sp, (THREAD_SIZE - 1) +.endm + +/* Get CPU-ID of this core */ +.macro GET_CPU_ID reg + lr \reg, [identity] + lsr \reg, \reg, 8 + bmsk \reg, \reg, 7 +.endm + +#endif /* __ASM_ARC_ENTRY_COMPACT_H */ diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index 884081099f80..ad7860c5ce15 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -1,45 +1,27 @@ /* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. - * - * Vineetg: March 2009 (Supporting 2 levels of Interrupts) - * Stack switching code can no longer reliably rely on the fact that - * if we are NOT in user mode, stack is switched to kernel mode. - * e.g. L2 IRQ interrupted a L1 ISR which had not yet completed - * it's prologue including stack switching from user mode - * - * Vineetg: Aug 28th 2008: Bug #94984 - * -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap - * Normally CPU does this automatically, however when doing FAKE rtie, - * we also need to explicitly do this. The problem in macros - * FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit - * was being "CLEARED" rather then "SET". Actually "SET" clears ZOL context - * - * Vineetg: May 5th 2008 - * -Modified CALLEE_REG save/restore macros to handle the fact that - * r25 contains the kernel current task ptr - * - Defined Stack Switching Macro to be reused in all intr/excp hdlrs - * - Shaved off 11 instructions from RESTORE_ALL_INT1 by using the - * address Write back load ld.ab instead of seperate ld/add instn - * - * Amit Bhor, Sameer Dhavale: Codito Technologies 2004 */ #ifndef __ASM_ARC_ENTRY_H #define __ASM_ARC_ENTRY_H -#ifdef __ASSEMBLY__ #include <asm/unistd.h> /* For NR_syscalls defination */ -#include <asm/asm-offsets.h> #include <asm/arcregs.h> #include <asm/ptrace.h> #include <asm/processor.h> /* For VMALLOC_START */ -#include <asm/thread_info.h> /* For THREAD_SIZE */ #include <asm/mmu.h> +#ifdef CONFIG_ISA_ARCOMPACT +#include <asm/entry-compact.h> /* ISA specific bits */ +#else +#include <asm/entry-arcv2.h> +#endif + /* Note on the LD/ST addr modes with addr reg wback * * LD.a same as LD.aw @@ -143,8 +125,6 @@ POP r13 .endm -#define OFF_USER_R25_FROM_R24 (SZ_CALLEE_REGS + SZ_PT_REGS - 8)/4 - /*-------------------------------------------------------------- * Collect User Mode callee regs as struct callee_regs - needed by * fork/do_signal/unaligned-access-emulation. @@ -157,12 +137,13 @@ *-------------------------------------------------------------*/ .macro SAVE_CALLEE_SAVED_USER + mov r12, sp ; save SP as ref to pt_regs SAVE_R13_TO_R24 #ifdef CONFIG_ARC_CURR_IN_REG - ; Retrieve orig r25 and save it on stack - ld.as r12, [sp, OFF_USER_R25_FROM_R24] - st.a r12, [sp, -4] + ; Retrieve orig r25 and save it with rest of callee_regs + ld.as r12, [r12, PT_user_r25] + PUSH r12 #else PUSH r25 #endif @@ -209,12 +190,16 @@ .macro RESTORE_CALLEE_SAVED_USER #ifdef CONFIG_ARC_CURR_IN_REG - ld.ab r12, [sp, 4] - st.as r12, [sp, OFF_USER_R25_FROM_R24] + POP r12 #else POP r25 #endif RESTORE_R24_TO_R13 + + ; SP is back to start of pt_regs +#ifdef CONFIG_ARC_CURR_IN_REG + st.as r12, [sp, PT_user_r25] +#endif .endm /*-------------------------------------------------------------- @@ -240,117 +225,6 @@ .endm -/*-------------------------------------------------------------- - * Switch to Kernel Mode stack if SP points to User Mode stack - * - * Entry : r9 contains pre-IRQ/exception/trap status32 - * Exit : SP is set to kernel mode stack pointer - * If CURR_IN_REG, r25 set to "current" task pointer - * Clobbers: r9 - *-------------------------------------------------------------*/ - -.macro SWITCH_TO_KERNEL_STK - - /* User Mode when this happened ? Yes: Proceed to switch stack */ - bbit1 r9, STATUS_U_BIT, 88f - - /* OK we were already in kernel mode when this event happened, thus can - * assume SP is kernel mode SP. _NO_ need to do any stack switching - */ - -#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS - /* However.... - * If Level 2 Interrupts enabled, we may end up with a corner case: - * 1. User Task executing - * 2. L1 IRQ taken, ISR starts (CPU auto-switched to KERNEL mode) - * 3. But before it could switch SP from USER to KERNEL stack - * a L2 IRQ "Interrupts" L1 - * Thay way although L2 IRQ happened in Kernel mode, stack is still - * not switched. - * To handle this, we may need to switch stack even if in kernel mode - * provided SP has values in range of USER mode stack ( < 0x7000_0000 ) - */ - brlo sp, VMALLOC_START, 88f - - /* TODO: vineetg: - * We need to be a bit more cautious here. What if a kernel bug in - * L1 ISR, caused SP to go whaco (some small value which looks like - * USER stk) and then we take L2 ISR. - * Above brlo alone would treat it as a valid L1-L2 sceanrio - * instead of shouting alound - * The only feasible way is to make sure this L2 happened in - * L1 prelogue ONLY i.e. ilink2 is less than a pre-set marker in - * L1 ISR before it switches stack - */ - -#endif - - /* Save Pre Intr/Exception KERNEL MODE SP on kernel stack - * safe-keeping not really needed, but it keeps the epilogue code - * (SP restore) simpler/uniform. - */ - b.d 66f - mov r9, sp - -88: /*------Intr/Ecxp happened in user mode, "switch" stack ------ */ - - GET_CURR_TASK_ON_CPU r9 - - /* With current tsk in r9, get it's kernel mode stack base */ - GET_TSK_STACK_BASE r9, r9 - -66: -#ifdef CONFIG_ARC_CURR_IN_REG - /* - * Treat r25 as scratch reg, save it on stack first - * Load it with current task pointer - */ - st r25, [r9, -4] - GET_CURR_TASK_ON_CPU r25 -#endif - - /* Save Pre Intr/Exception User SP on kernel stack */ - st.a sp, [r9, -16] ; Make room for orig_r0, ECR, user_r25 - - /* CAUTION: - * SP should be set at the very end when we are done with everything - * In case of 2 levels of interrupt we depend on value of SP to assume - * that everything else is done (loading r25 etc) - */ - - /* set SP to point to kernel mode stack */ - mov sp, r9 - - /* ----- Stack Switched to kernel Mode, Now save REG FILE ----- */ - -.endm - -/*------------------------------------------------------------ - * "FAKE" a rtie to return from CPU Exception context - * This is to re-enable Exceptions within exception - * Look at EV_ProtV to see how this is actually used - *-------------------------------------------------------------*/ - -.macro FAKE_RET_FROM_EXCPN reg - - ld \reg, [sp, PT_status32] - bic \reg, \reg, (STATUS_U_MASK|STATUS_DE_MASK) - bset \reg, \reg, STATUS_L_BIT - sr \reg, [erstatus] - mov \reg, 55f - sr \reg, [eret] - - rtie -55: -.endm - -/* - * @reg [OUT] &thread_info of "current" - */ -.macro GET_CURR_THR_INFO_FROM_SP reg - bic \reg, sp, (THREAD_SIZE - 1) -.endm - /* * @reg [OUT] thread_info->flags of "current" */ @@ -359,222 +233,6 @@ ld \reg, [\reg, THREAD_INFO_FLAGS] .endm -/*-------------------------------------------------------------- - * For early Exception Prologue, a core reg is temporarily needed to - * code the rest of prolog (stack switching). This is done by stashing - * it to memory (non-SMP case) or SCRATCH0 Aux Reg (SMP). - * - * Before saving the full regfile - this reg is restored back, only - * to be saved again on kernel mode stack, as part of pt_regs. - *-------------------------------------------------------------*/ -.macro EXCPN_PROLOG_FREEUP_REG reg -#ifdef CONFIG_SMP - sr \reg, [ARC_REG_SCRATCH_DATA0] -#else - st \reg, [@ex_saved_reg1] -#endif -.endm - -.macro EXCPN_PROLOG_RESTORE_REG reg -#ifdef CONFIG_SMP - lr \reg, [ARC_REG_SCRATCH_DATA0] -#else - ld \reg, [@ex_saved_reg1] -#endif -.endm - -/*-------------------------------------------------------------- - * Exception Entry prologue - * -Switches stack to K mode (if not already) - * -Saves the register file - * - * After this it is safe to call the "C" handlers - *-------------------------------------------------------------*/ -.macro EXCEPTION_PROLOGUE - - /* Need at least 1 reg to code the early exception prologue */ - EXCPN_PROLOG_FREEUP_REG r9 - - /* U/K mode at time of exception (stack not switched if already K) */ - lr r9, [erstatus] - - /* ARC700 doesn't provide auto-stack switching */ - SWITCH_TO_KERNEL_STK - - /* save the regfile */ - SAVE_ALL_SYS -.endm - -/*-------------------------------------------------------------- - * Save all registers used by Exceptions (TLB Miss, Prot-V, Mem err etc) - * Requires SP to be already switched to kernel mode Stack - * sp points to the next free element on the stack at exit of this macro. - * Registers are pushed / popped in the order defined in struct ptregs - * in asm/ptrace.h - * Note that syscalls are implemented via TRAP which is also a exception - * from CPU's point of view - *-------------------------------------------------------------*/ -.macro SAVE_ALL_SYS - - lr r9, [ecr] - st r9, [sp, 8] /* ECR */ - st r0, [sp, 4] /* orig_r0, needed only for sys calls */ - - /* Restore r9 used to code the early prologue */ - EXCPN_PROLOG_RESTORE_REG r9 - - SAVE_R0_TO_R12 - PUSH gp - PUSH fp - PUSH blink - PUSHAX eret - PUSHAX erstatus - PUSH lp_count - PUSHAX lp_end - PUSHAX lp_start - PUSHAX erbta -.endm - -/*-------------------------------------------------------------- - * Restore all registers used by system call or Exceptions - * SP should always be pointing to the next free stack element - * when entering this macro. - * - * NOTE: - * - * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg - * for memory load operations. If used in that way interrupts are deffered - * by hardware and that is not good. - *-------------------------------------------------------------*/ -.macro RESTORE_ALL_SYS - POPAX erbta - POPAX lp_start - POPAX lp_end - - POP r9 - mov lp_count, r9 ;LD to lp_count is not allowed - - POPAX erstatus - POPAX eret - POP blink - POP fp - POP gp - RESTORE_R12_TO_R0 - - ld sp, [sp] /* restore original sp */ - /* orig_r0, ECR, user_r25 skipped automatically */ -.endm - - -/*-------------------------------------------------------------- - * Save all registers used by interrupt handlers. - *-------------------------------------------------------------*/ -.macro SAVE_ALL_INT1 - - /* restore original r9 to be saved as part of reg-file */ -#ifdef CONFIG_SMP - lr r9, [ARC_REG_SCRATCH_DATA0] -#else - ld r9, [@int1_saved_reg] -#endif - - /* now we are ready to save the remaining context :) */ - st event_IRQ1, [sp, 8] /* Dummy ECR */ - st 0, [sp, 4] /* orig_r0 , N/A for IRQ */ - - SAVE_R0_TO_R12 - PUSH gp - PUSH fp - PUSH blink - PUSH ilink1 - PUSHAX status32_l1 - PUSH lp_count - PUSHAX lp_end - PUSHAX lp_start - PUSHAX bta_l1 -.endm - -.macro SAVE_ALL_INT2 - - /* TODO-vineetg: SMP we can't use global nor can we use - * SCRATCH0 as we do for int1 because while int1 is using - * it, int2 can come - */ - /* retsore original r9 , saved in sys_saved_r9 */ - ld r9, [@int2_saved_reg] - - /* now we are ready to save the remaining context :) */ - st event_IRQ2, [sp, 8] /* Dummy ECR */ - st 0, [sp, 4] /* orig_r0 , N/A for IRQ */ - - SAVE_R0_TO_R12 - PUSH gp - PUSH fp - PUSH blink - PUSH ilink2 - PUSHAX status32_l2 - PUSH lp_count - PUSHAX lp_end - PUSHAX lp_start - PUSHAX bta_l2 -.endm - -/*-------------------------------------------------------------- - * Restore all registers used by interrupt handlers. - * - * NOTE: - * - * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg - * for memory load operations. If used in that way interrupts are deffered - * by hardware and that is not good. - *-------------------------------------------------------------*/ - -.macro RESTORE_ALL_INT1 - POPAX bta_l1 - POPAX lp_start - POPAX lp_end - - POP r9 - mov lp_count, r9 ;LD to lp_count is not allowed - - POPAX status32_l1 - POP ilink1 - POP blink - POP fp - POP gp - RESTORE_R12_TO_R0 - - ld sp, [sp] /* restore original sp */ - /* orig_r0, ECR, user_r25 skipped automatically */ -.endm - -.macro RESTORE_ALL_INT2 - POPAX bta_l2 - POPAX lp_start - POPAX lp_end - - POP r9 - mov lp_count, r9 ;LD to lp_count is not allowed - - POPAX status32_l2 - POP ilink2 - POP blink - POP fp - POP gp - RESTORE_R12_TO_R0 - - ld sp, [sp] /* restore original sp */ - /* orig_r0, ECR, user_r25 skipped automatically */ -.endm - - -/* Get CPU-ID of this core */ -.macro GET_CPU_ID reg - lr \reg, [identity] - lsr \reg, \reg, 8 - bmsk \reg, \reg, 7 -.endm - #ifdef CONFIG_SMP /*------------------------------------------------- @@ -643,6 +301,4 @@ #endif /* CONFIG_ARC_CURR_IN_REG */ -#endif /* __ASSEMBLY__ */ - #endif /* __ASM_ARC_ENTRY_H */ diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h index 7cc4ced5dbf4..694ece8a0243 100644 --- a/arch/arc/include/asm/io.h +++ b/arch/arc/include/asm/io.h @@ -99,9 +99,45 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr) } -#define readb_relaxed readb -#define readw_relaxed readw -#define readl_relaxed readl +#ifdef CONFIG_ISA_ARCV2 +#include <asm/barrier.h> +#define __iormb() rmb() +#define __iowmb() wmb() +#else +#define __iormb() do { } while (0) +#define __iowmb() do { } while (0) +#endif + +/* + * MMIO can also get buffered/optimized in micro-arch, so barriers needed + * Based on ARM model for the typical use case + * + * <ST [DMA buffer]> + * <writel MMIO "go" reg> + * or: + * <readl MMIO "status" reg> + * <LD [DMA buffer]> + * + * http://lkml.kernel.org/r/20150622133656.GG1583@arm.com + */ +#define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(); __v; }) +#define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(); __v; }) +#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(); __v; }) + +#define writeb(v,c) ({ __iowmb(); writeb_relaxed(v,c); }) +#define writew(v,c) ({ __iowmb(); writew_relaxed(v,c); }) +#define writel(v,c) ({ __iowmb(); writel_relaxed(v,c); }) + +/* + * Relaxed API for drivers which can handle any ordering themselves + */ +#define readb_relaxed(c) __raw_readb(c) +#define readw_relaxed(c) __raw_readw(c) +#define readl_relaxed(c) __raw_readl(c) + +#define writeb_relaxed(v,c) __raw_writeb(v,c) +#define writew_relaxed(v,c) __raw_writew(v,c) +#define writel_relaxed(v,c) __raw_writel(v,c) #include <asm-generic/io.h> diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h index f38652fb2ed7..bc5103637326 100644 --- a/arch/arc/include/asm/irq.h +++ b/arch/arc/include/asm/irq.h @@ -13,8 +13,14 @@ #define NR_IRQS 128 /* allow some CPU external IRQ handling */ /* Platform Independent IRQs */ +#ifdef CONFIG_ISA_ARCOMPACT #define TIMER0_IRQ 3 #define TIMER1_IRQ 4 +#else +#define TIMER0_IRQ 16 +#define TIMER1_IRQ 17 +#define IPI_IRQ 19 +#endif #include <linux/interrupt.h> #include <asm-generic/irq.h> diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h new file mode 100644 index 000000000000..ad481c24070d --- /dev/null +++ b/arch/arc/include/asm/irqflags-arcv2.h @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_IRQFLAGS_ARCV2_H +#define __ASM_IRQFLAGS_ARCV2_H + +#include <asm/arcregs.h> + +/* status32 Bits */ +#define STATUS_AD_BIT 19 /* Disable Align chk: core supports non-aligned */ +#define STATUS_IE_BIT 31 + +#define STATUS_AD_MASK (1<<STATUS_AD_BIT) +#define STATUS_IE_MASK (1<<STATUS_IE_BIT) + +#define AUX_USER_SP 0x00D +#define AUX_IRQ_CTRL 0x00E +#define AUX_IRQ_ACT 0x043 /* Active Intr across all levels */ +#define AUX_IRQ_LVL_PEND 0x200 /* Pending Intr across all levels */ +#define AUX_IRQ_PRIORITY 0x206 +#define ICAUSE 0x40a +#define AUX_IRQ_SELECT 0x40b +#define AUX_IRQ_ENABLE 0x40c + +/* Was Intr taken in User Mode */ +#define AUX_IRQ_ACT_BIT_U 31 + +/* 0 is highest level, but taken by FIRQs, if present in design */ +#define ARCV2_IRQ_DEF_PRIO 0 + +/* seed value for status register */ +#define ISA_INIT_STATUS_BITS (STATUS_IE_MASK | STATUS_AD_MASK | \ + (ARCV2_IRQ_DEF_PRIO << 1)) + +#ifndef __ASSEMBLY__ + +/* + * Save IRQ state and disable IRQs + */ +static inline long arch_local_irq_save(void) +{ + unsigned long flags; + + __asm__ __volatile__(" clri %0 \n" : "=r" (flags) : : "memory"); + + return flags; +} + +/* + * restore saved IRQ state + */ +static inline void arch_local_irq_restore(unsigned long flags) +{ + __asm__ __volatile__(" seti %0 \n" : : "r" (flags) : "memory"); +} + +/* + * Unconditionally Enable IRQs + */ +static inline void arch_local_irq_enable(void) +{ + unsigned int irqact = read_aux_reg(AUX_IRQ_ACT); + + if (irqact & 0xffff) + write_aux_reg(AUX_IRQ_ACT, irqact & ~0xffff); + + __asm__ __volatile__(" seti \n" : : : "memory"); +} + +/* + * Unconditionally Disable IRQs + */ +static inline void arch_local_irq_disable(void) +{ + __asm__ __volatile__(" clri \n" : : : "memory"); +} + +/* + * save IRQ state + */ +static inline long arch_local_save_flags(void) +{ + unsigned long temp; + + __asm__ __volatile__( + " lr %0, [status32] \n" + : "=&r"(temp) + : + : "memory"); + + return temp; +} + +/* + * Query IRQ state + */ +static inline int arch_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & (STATUS_IE_MASK)); +} + +static inline int arch_irqs_disabled(void) +{ + return arch_irqs_disabled_flags(arch_local_save_flags()); +} + +#else + +.macro IRQ_DISABLE scratch + clri +.endm + +.macro IRQ_ENABLE scratch + seti +.endm + +#endif /* __ASSEMBLY__ */ + +#endif diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h new file mode 100644 index 000000000000..aa805575c320 --- /dev/null +++ b/arch/arc/include/asm/irqflags-compact.h @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_IRQFLAGS_ARCOMPACT_H +#define __ASM_IRQFLAGS_ARCOMPACT_H + +/* vineetg: March 2010 : local_irq_save( ) optimisation + * -Remove explicit mov of current status32 into reg, that is not needed + * -Use BIC insn instead of INVERTED + AND + * -Conditionally disable interrupts (if they are not enabled, don't disable) +*/ + +#include <asm/arcregs.h> + +/* status32 Reg bits related to Interrupt Handling */ +#define STATUS_E1_BIT 1 /* Int 1 enable */ +#define STATUS_E2_BIT 2 /* Int 2 enable */ +#define STATUS_A1_BIT 3 /* Int 1 active */ +#define STATUS_A2_BIT 4 /* Int 2 active */ + +#define STATUS_E1_MASK (1<<STATUS_E1_BIT) +#define STATUS_E2_MASK (1<<STATUS_E2_BIT) +#define STATUS_A1_MASK (1<<STATUS_A1_BIT) +#define STATUS_A2_MASK (1<<STATUS_A2_BIT) +#define STATUS_IE_MASK (STATUS_E1_MASK | STATUS_E2_MASK) + +/* Other Interrupt Handling related Aux regs */ +#define AUX_IRQ_LEV 0x200 /* IRQ Priority: L1 or L2 */ +#define AUX_IRQ_HINT 0x201 /* For generating Soft Interrupts */ +#define AUX_IRQ_LV12 0x43 /* interrupt level register */ + +#define AUX_IENABLE 0x40c +#define AUX_ITRIGGER 0x40d +#define AUX_IPULSE 0x415 + +#define ISA_INIT_STATUS_BITS STATUS_IE_MASK + +#ifndef __ASSEMBLY__ + +/****************************************************************** + * IRQ Control Macros + * + * All of them have "memory" clobber (compiler barrier) which is needed to + * ensure that LD/ST requiring irq safetly (R-M-W when LLSC is not available) + * are redone after IRQs are re-enabled (and gcc doesn't reuse stale register) + * + * Noted at the time of Abilis Timer List corruption + * Orig Bug + Rejected solution : https://lkml.org/lkml/2013/3/29/67 + * Reasoning : https://lkml.org/lkml/2013/4/8/15 + * + ******************************************************************/ + +/* + * Save IRQ state and disable IRQs + */ +static inline long arch_local_irq_save(void) +{ + unsigned long temp, flags; + + __asm__ __volatile__( + " lr %1, [status32] \n" + " bic %0, %1, %2 \n" + " and.f 0, %1, %2 \n" + " flag.nz %0 \n" + : "=r"(temp), "=r"(flags) + : "n"((STATUS_E1_MASK | STATUS_E2_MASK)) + : "memory", "cc"); + + return flags; +} + +/* + * restore saved IRQ state + */ +static inline void arch_local_irq_restore(unsigned long flags) +{ + + __asm__ __volatile__( + " flag %0 \n" + : + : "r"(flags) + : "memory"); +} + +/* + * Unconditionally Enable IRQs + */ +extern void arch_local_irq_enable(void); + +/* + * Unconditionally Disable IRQs + */ +static inline void arch_local_irq_disable(void) +{ + unsigned long temp; + + __asm__ __volatile__( + " lr %0, [status32] \n" + " and %0, %0, %1 \n" + " flag %0 \n" + : "=&r"(temp) + : "n"(~(STATUS_E1_MASK | STATUS_E2_MASK)) + : "memory"); +} + +/* + * save IRQ state + */ +static inline long arch_local_save_flags(void) +{ + unsigned long temp; + + __asm__ __volatile__( + " lr %0, [status32] \n" + : "=&r"(temp) + : + : "memory"); + + return temp; +} + +/* + * Query IRQ state + */ +static inline int arch_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & (STATUS_E1_MASK +#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS + | STATUS_E2_MASK +#endif + )); +} + +static inline int arch_irqs_disabled(void) +{ + return arch_irqs_disabled_flags(arch_local_save_flags()); +} + +#else + +#ifdef CONFIG_TRACE_IRQFLAGS + +.macro TRACE_ASM_IRQ_DISABLE + bl trace_hardirqs_off +.endm + +.macro TRACE_ASM_IRQ_ENABLE + bl trace_hardirqs_on +.endm + +#else + +.macro TRACE_ASM_IRQ_DISABLE +.endm + +.macro TRACE_ASM_IRQ_ENABLE +.endm + +#endif + +.macro IRQ_DISABLE scratch + lr \scratch, [status32] + bic \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK) + flag \scratch + TRACE_ASM_IRQ_DISABLE +.endm + +.macro IRQ_ENABLE scratch + lr \scratch, [status32] + or \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK) + flag \scratch + TRACE_ASM_IRQ_ENABLE +.endm + +#endif /* __ASSEMBLY__ */ + +#endif diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h index 27ecc6975a58..59bc6a64f75d 100644 --- a/arch/arc/include/asm/irqflags.h +++ b/arch/arc/include/asm/irqflags.h @@ -1,4 +1,5 @@ /* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) * * This program is free software; you can redistribute it and/or modify @@ -9,171 +10,10 @@ #ifndef __ASM_ARC_IRQFLAGS_H #define __ASM_ARC_IRQFLAGS_H -/* vineetg: March 2010 : local_irq_save( ) optimisation - * -Remove explicit mov of current status32 into reg, that is not needed - * -Use BIC insn instead of INVERTED + AND - * -Conditionally disable interrupts (if they are not enabled, don't disable) -*/ - -#include <asm/arcregs.h> - -/* status32 Reg bits related to Interrupt Handling */ -#define STATUS_E1_BIT 1 /* Int 1 enable */ -#define STATUS_E2_BIT 2 /* Int 2 enable */ -#define STATUS_A1_BIT 3 /* Int 1 active */ -#define STATUS_A2_BIT 4 /* Int 2 active */ - -#define STATUS_E1_MASK (1<<STATUS_E1_BIT) -#define STATUS_E2_MASK (1<<STATUS_E2_BIT) -#define STATUS_A1_MASK (1<<STATUS_A1_BIT) -#define STATUS_A2_MASK (1<<STATUS_A2_BIT) - -/* Other Interrupt Handling related Aux regs */ -#define AUX_IRQ_LEV 0x200 /* IRQ Priority: L1 or L2 */ -#define AUX_IRQ_HINT 0x201 /* For generating Soft Interrupts */ -#define AUX_IRQ_LV12 0x43 /* interrupt level register */ - -#define AUX_IENABLE 0x40c -#define AUX_ITRIGGER 0x40d -#define AUX_IPULSE 0x415 - -#ifndef __ASSEMBLY__ - -/****************************************************************** - * IRQ Control Macros - * - * All of them have "memory" clobber (compiler barrier) which is needed to - * ensure that LD/ST requiring irq safetly (R-M-W when LLSC is not available) - * are redone after IRQs are re-enabled (and gcc doesn't reuse stale register) - * - * Noted at the time of Abilis Timer List corruption - * Orig Bug + Rejected solution : https://lkml.org/lkml/2013/3/29/67 - * Reasoning : https://lkml.org/lkml/2013/4/8/15 - * - ******************************************************************/ - -/* - * Save IRQ state and disable IRQs - */ -static inline long arch_local_irq_save(void) -{ - unsigned long temp, flags; - - __asm__ __volatile__( - " lr %1, [status32] \n" - " bic %0, %1, %2 \n" - " and.f 0, %1, %2 \n" - " flag.nz %0 \n" - : "=r"(temp), "=r"(flags) - : "n"((STATUS_E1_MASK | STATUS_E2_MASK)) - : "memory", "cc"); - - return flags; -} - -/* - * restore saved IRQ state - */ -static inline void arch_local_irq_restore(unsigned long flags) -{ - - __asm__ __volatile__( - " flag %0 \n" - : - : "r"(flags) - : "memory"); -} - -/* - * Unconditionally Enable IRQs - */ -extern void arch_local_irq_enable(void); - -/* - * Unconditionally Disable IRQs - */ -static inline void arch_local_irq_disable(void) -{ - unsigned long temp; - - __asm__ __volatile__( - " lr %0, [status32] \n" - " and %0, %0, %1 \n" - " flag %0 \n" - : "=&r"(temp) - : "n"(~(STATUS_E1_MASK | STATUS_E2_MASK)) - : "memory"); -} - -/* - * save IRQ state - */ -static inline long arch_local_save_flags(void) -{ - unsigned long temp; - - __asm__ __volatile__( - " lr %0, [status32] \n" - : "=&r"(temp) - : - : "memory"); - - return temp; -} - -/* - * Query IRQ state - */ -static inline int arch_irqs_disabled_flags(unsigned long flags) -{ - return !(flags & (STATUS_E1_MASK -#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS - | STATUS_E2_MASK -#endif - )); -} - -static inline int arch_irqs_disabled(void) -{ - return arch_irqs_disabled_flags(arch_local_save_flags()); -} - -#else - -#ifdef CONFIG_TRACE_IRQFLAGS - -.macro TRACE_ASM_IRQ_DISABLE - bl trace_hardirqs_off -.endm - -.macro TRACE_ASM_IRQ_ENABLE - bl trace_hardirqs_on -.endm - +#ifdef CONFIG_ISA_ARCOMPACT +#include <asm/irqflags-compact.h> #else - -.macro TRACE_ASM_IRQ_DISABLE -.endm - -.macro TRACE_ASM_IRQ_ENABLE -.endm - +#include <asm/irqflags-arcv2.h> #endif -.macro IRQ_DISABLE scratch - lr \scratch, [status32] - bic \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK) - flag \scratch - TRACE_ASM_IRQ_DISABLE -.endm - -.macro IRQ_ENABLE scratch - lr \scratch, [status32] - or \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK) - flag \scratch - TRACE_ASM_IRQ_ENABLE -.endm - -#endif /* __ASSEMBLY__ */ - #endif diff --git a/arch/arc/include/asm/mcip.h b/arch/arc/include/asm/mcip.h new file mode 100644 index 000000000000..52c11f0bb0e5 --- /dev/null +++ b/arch/arc/include/asm/mcip.h @@ -0,0 +1,94 @@ +/* + * ARConnect IP Support (Multi core enabler: Cross core IPI, RTC ...) + * + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_MCIP_H +#define __ASM_MCIP_H + +#ifdef CONFIG_ISA_ARCV2 + +#include <asm/arcregs.h> + +#define ARC_REG_MCIP_BCR 0x0d0 +#define ARC_REG_MCIP_CMD 0x600 +#define ARC_REG_MCIP_WDATA 0x601 +#define ARC_REG_MCIP_READBACK 0x602 + +struct mcip_cmd { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:8, param:16, cmd:8; +#else + unsigned int cmd:8, param:16, pad:8; +#endif + +#define CMD_INTRPT_GENERATE_IRQ 0x01 +#define CMD_INTRPT_GENERATE_ACK 0x02 +#define CMD_INTRPT_READ_STATUS 0x03 +#define CMD_INTRPT_CHECK_SOURCE 0x04 + +/* Semaphore Commands */ +#define CMD_SEMA_CLAIM_AND_READ 0x11 +#define CMD_SEMA_RELEASE 0x12 + +#define CMD_DEBUG_SET_MASK 0x34 +#define CMD_DEBUG_SET_SELECT 0x36 + +#define CMD_GRTC_READ_LO 0x42 +#define CMD_GRTC_READ_HI 0x43 + +#define CMD_IDU_ENABLE 0x71 +#define CMD_IDU_DISABLE 0x72 +#define CMD_IDU_SET_MODE 0x74 +#define CMD_IDU_SET_DEST 0x76 +#define CMD_IDU_SET_MASK 0x7C + +#define IDU_M_TRIG_LEVEL 0x0 +#define IDU_M_TRIG_EDGE 0x1 + +#define IDU_M_DISTRI_RR 0x0 +#define IDU_M_DISTRI_DEST 0x2 +}; + +/* + * MCIP programming model + * + * - Simple commands write {cmd:8,param:16} to MCIP_CMD aux reg + * (param could be irq, common_irq, core_id ...) + * - More involved commands setup MCIP_WDATA with cmd specific data + * before invoking the simple command + */ +static inline void __mcip_cmd(unsigned int cmd, unsigned int param) +{ + struct mcip_cmd buf; + + buf.pad = 0; + buf.cmd = cmd; + buf.param = param; + + WRITE_AUX(ARC_REG_MCIP_CMD, buf); +} + +/* + * Setup additional data for a cmd + * Callers need to lock to ensure atomicity + */ +static inline void __mcip_cmd_data(unsigned int cmd, unsigned int param, + unsigned int data) +{ + write_aux_reg(ARC_REG_MCIP_WDATA, data); + + __mcip_cmd(cmd, param); +} + +extern void mcip_init_early_smp(void); +extern void mcip_init_smp(unsigned int cpu); + +#endif + +#endif diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index 8c84ae98c337..0f9c3eb5327e 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -15,24 +15,41 @@ #define CONFIG_ARC_MMU_VER 2 #elif defined(CONFIG_ARC_MMU_V3) #define CONFIG_ARC_MMU_VER 3 +#elif defined(CONFIG_ARC_MMU_V4) +#define CONFIG_ARC_MMU_VER 4 #endif /* MMU Management regs */ #define ARC_REG_MMU_BCR 0x06f +#if (CONFIG_ARC_MMU_VER < 4) #define ARC_REG_TLBPD0 0x405 #define ARC_REG_TLBPD1 0x406 #define ARC_REG_TLBINDEX 0x407 #define ARC_REG_TLBCOMMAND 0x408 #define ARC_REG_PID 0x409 #define ARC_REG_SCRATCH_DATA0 0x418 +#else +#define ARC_REG_TLBPD0 0x460 +#define ARC_REG_TLBPD1 0x461 +#define ARC_REG_TLBINDEX 0x464 +#define ARC_REG_TLBCOMMAND 0x465 +#define ARC_REG_PID 0x468 +#define ARC_REG_SCRATCH_DATA0 0x46c +#endif /* Bits in MMU PID register */ -#define MMU_ENABLE (1 << 31) /* Enable MMU for process */ +#define __TLB_ENABLE (1 << 31) +#define __PROG_ENABLE (1 << 30) +#define MMU_ENABLE (__TLB_ENABLE | __PROG_ENABLE) /* Error code if probe fails */ #define TLB_LKUP_ERR 0x80000000 +#if (CONFIG_ARC_MMU_VER < 4) #define TLB_DUP_ERR (TLB_LKUP_ERR | 0x00000001) +#else +#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x40000000) +#endif /* TLB Commands */ #define TLBWrite 0x1 @@ -45,6 +62,11 @@ #define TLBIVUTLB 0x6 /* explicitly inv uTLBs */ #endif +#if (CONFIG_ARC_MMU_VER >= 4) +#define TLBInsertEntry 0x7 +#define TLBDeleteEntry 0x8 +#endif + #ifndef __ASSEMBLY__ typedef struct { diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 9615fe1701c6..1281718802f7 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -72,8 +72,18 @@ #define _PAGE_READ (1<<3) /* Page has user read perm (H) */ #define _PAGE_ACCESSED (1<<4) /* Page is accessed (S) */ #define _PAGE_MODIFIED (1<<5) /* Page modified (dirty) (S) */ + +#if (CONFIG_ARC_MMU_VER >= 4) +#define _PAGE_WTHRU (1<<7) /* Page cache mode write-thru (H) */ +#endif + #define _PAGE_GLOBAL (1<<8) /* Page is global (H) */ #define _PAGE_PRESENT (1<<9) /* TLB entry is valid (H) */ + +#if (CONFIG_ARC_MMU_VER >= 4) +#define _PAGE_SZ (1<<10) /* Page Size indicator (H) */ +#endif + #define _PAGE_SHARED_CODE (1<<11) /* Shared Code page with cmn vaddr usable for shared TLB entries (H) */ #endif diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index 52312cb5dbe2..ee682d8e0213 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -77,7 +77,7 @@ struct task_struct; */ #define TSK_K_ESP(tsk) (tsk->thread.ksp) -#define TSK_K_REG(tsk, off) (*((unsigned int *)(TSK_K_ESP(tsk) + \ +#define TSK_K_REG(tsk, off) (*((unsigned long *)(TSK_K_ESP(tsk) + \ sizeof(struct callee_regs) + off))) #define TSK_K_BLINK(tsk) TSK_K_REG(tsk, 4) @@ -100,29 +100,26 @@ extern unsigned int get_wchan(struct task_struct *p); #endif /* !__ASSEMBLY__ */ -/* Kernels Virtual memory area. - * Unlike other architectures(MIPS, sh, cris ) ARC 700 does not have a - * "kernel translated" region (like KSEG2 in MIPS). So we use a upper part - * of the translated bottom 2GB for kernel virtual memory and protect - * these pages from user accesses by disabling Ru, Eu and Wu. +/* + * System Memory Map on ARC + * + * ---------------------------- (lower 2G, Translated) ------------------------- + * 0x0000_0000 0x5FFF_FFFF (user vaddr: TASK_SIZE) + * 0x6000_0000 0x6FFF_FFFF (reserved gutter between U/K) + * 0x7000_0000 0x7FFF_FFFF (kvaddr: vmalloc/modules/pkmap..) + * + * PAGE_OFFSET ---------------- (Upper 2G, Untranslated) ----------------------- + * 0x8000_0000 0xBFFF_FFFF (kernel direct mapped) + * 0xC000_0000 0xFFFF_FFFF (peripheral uncached space) + * ----------------------------------------------------------------------------- */ -#define VMALLOC_SIZE (0x10000000) /* 256M */ -#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) -#define VMALLOC_END (PAGE_OFFSET) +#define VMALLOC_START 0x70000000 +#define VMALLOC_SIZE (PAGE_OFFSET - VMALLOC_START) +#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) -/* Most of the architectures seem to be keeping some kind of padding between - * userspace TASK_SIZE and PAGE_OFFSET. i.e TASK_SIZE != PAGE_OFFSET. - */ #define USER_KERNEL_GUTTER 0x10000000 -/* User address space: - * On ARC700, CPU allows the entire lower half of 32 bit address space to be - * translated. Thus potentially 2G (0:0x7FFF_FFFF) could be User vaddr space. - * However we steal 256M for kernel addr (0x7000_0000:0x7FFF_FFFF) and another - * 256M (0x6000_0000:0x6FFF_FFFF) is gutter between user/kernel spaces - * Thus total User vaddr space is (0:0x5FFF_FFFF) - */ -#define TASK_SIZE (PAGE_OFFSET - VMALLOC_SIZE - USER_KERNEL_GUTTER) +#define TASK_SIZE (VMALLOC_START - USER_KERNEL_GUTTER) #define STACK_TOP TASK_SIZE #define STACK_TOP_MAX STACK_TOP diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index 1bfeec2c0558..91755972b9a2 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -16,6 +16,7 @@ /* THE pt_regs: Defines how regs are saved during entry into kernel */ +#ifdef CONFIG_ISA_ARCOMPACT struct pt_regs { /* Real registers */ @@ -56,6 +57,48 @@ struct pt_regs { long user_r25; }; +#else + +struct pt_regs { + + long orig_r0; + + union { + struct { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned long state:8, ecr_vec:8, + ecr_cause:8, ecr_param:8; +#else + unsigned long ecr_param:8, ecr_cause:8, + ecr_vec:8, state:8; +#endif + }; + unsigned long event; + }; + + long bta; /* bta_l1, bta_l2, erbta */ + + long user_r25; + + long r26; /* gp */ + long fp; + long sp; /* user/kernel sp depending on where we came from */ + + long r12; + + /*------- Below list auto saved by h/w -----------*/ + long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11; + + long blink; + long lp_end, lp_start, lp_count; + + long ei, ldi, jli; + + long ret; + long status32; +}; + +#endif /* Callee saved registers - need to be saved only when you are scheduled out */ diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index b6a8c2dfbe6e..e1651df6a93d 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -22,24 +22,46 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; + /* + * This smp_mb() is technically superfluous, we only need the one + * after the lock for providing the ACQUIRE semantics. + * However doing the "right" thing was regressing hackbench + * so keeping this, pending further investigation + */ + smp_mb(); + __asm__ __volatile__( "1: ex %0, [%1] \n" " breq %0, %2, 1b \n" : "+&r" (tmp) : "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__) : "memory"); + + /* + * ACQUIRE barrier to ensure load/store after taking the lock + * don't "bleed-up" out of the critical section (leak-in is allowed) + * http://www.spinics.net/lists/kernel/msg2010409.html + * + * ARCv2 only has load-load, store-store and all-all barrier + * thus need the full all-all barrier + */ + smp_mb(); } static inline int arch_spin_trylock(arch_spinlock_t *lock) { unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; + smp_mb(); + __asm__ __volatile__( "1: ex %0, [%1] \n" : "+r" (tmp) : "r"(&(lock->slock)) : "memory"); + smp_mb(); + return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__); } @@ -47,12 +69,22 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) { unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__; + /* + * RELEASE barrier: given the instructions avail on ARCv2, full barrier + * is the only option + */ + smp_mb(); + __asm__ __volatile__( " ex %0, [%1] \n" : "+r" (tmp) : "r"(&(lock->slock)) : "memory"); + /* + * superfluous, but keeping for now - see pairing version in + * arch_spin_lock above + */ smp_mb(); } diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h index aca0d5a45c7b..3af67455659a 100644 --- a/arch/arc/include/asm/thread_info.h +++ b/arch/arc/include/asm/thread_info.h @@ -25,6 +25,7 @@ #endif #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) +#define THREAD_SHIFT (PAGE_SHIFT << THREAD_SIZE_ORDER) #ifndef __ASSEMBLY__ diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index 30c9baffa96f..d1da6032b715 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -659,31 +659,30 @@ static inline unsigned long __arc_clear_user(void __user *to, unsigned long n) static inline long __arc_strncpy_from_user(char *dst, const char __user *src, long count) { - long res = count; + long res = 0; char val; - unsigned int hw_count; if (count == 0) return 0; __asm__ __volatile__( - " lp 2f \n" + " lp 3f \n" "1: ldb.ab %3, [%2, 1] \n" - " breq.d %3, 0, 2f \n" + " breq.d %3, 0, 3f \n" " stb.ab %3, [%1, 1] \n" - "2: sub %0, %6, %4 \n" - "3: ;nop \n" + " add %0, %0, 1 # Num of NON NULL bytes copied \n" + "3: \n" " .section .fixup, \"ax\" \n" " .align 4 \n" - "4: mov %0, %5 \n" + "4: mov %0, %4 # sets @res as -EFAULT \n" " j 3b \n" " .previous \n" " .section __ex_table, \"a\" \n" " .align 4 \n" " .word 1b, 4b \n" " .previous \n" - : "=r"(res), "+r"(dst), "+r"(src), "=&r"(val), "=l"(hw_count) - : "g"(-EFAULT), "ir"(count), "4"(count) /* this "4" seeds lp_count */ + : "+r"(res), "+r"(dst), "+r"(src), "=r"(val) + : "g"(-EFAULT), "l"(count) : "memory"); return res; diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h index e5d41e08240c..9d129a2a1351 100644 --- a/arch/arc/include/uapi/asm/page.h +++ b/arch/arc/include/uapi/asm/page.h @@ -30,7 +30,7 @@ #define PAGE_OFFSET (0x80000000) #else #define PAGE_SIZE (1UL << PAGE_SHIFT) /* Default 8K */ -#define PAGE_OFFSET (0x80000000UL) /* Kernel starts at 2G onwards */ +#define PAGE_OFFSET (0x80000000UL) /* Kernel starts at 2G onwards */ #endif #define PAGE_MASK (~(PAGE_SIZE-1)) diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile index 113f2033da9f..e7f3625a19b5 100644 --- a/arch/arc/kernel/Makefile +++ b/arch/arc/kernel/Makefile @@ -8,12 +8,14 @@ # Pass UTS_MACHINE for user_regset definition CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' -obj-y := arcksyms.o setup.o irq.o time.o reset.o ptrace.o entry.o process.o +obj-y := arcksyms.o setup.o irq.o time.o reset.o ptrace.o process.o devtree.o obj-y += signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o clk.o -obj-y += devtree.o +obj-$(CONFIG_ISA_ARCOMPACT) += entry-compact.o intc-compact.o +obj-$(CONFIG_ISA_ARCV2) += entry-arcv2.o intc-arcv2.o obj-$(CONFIG_MODULES) += arcksyms.o module.o obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_ARC_MCIP) += mcip.o obj-$(CONFIG_ARC_DW2_UNWIND) += unwind.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_ARC_EMUL_UNALIGNED) += unaligned.o diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c index 6c3aa0edb9b5..ecaf34e9235c 100644 --- a/arch/arc/kernel/asm-offsets.c +++ b/arch/arc/kernel/asm-offsets.c @@ -37,6 +37,8 @@ int main(void) DEFINE(TASK_ACT_MM, offsetof(struct task_struct, active_mm)); DEFINE(TASK_TGID, offsetof(struct task_struct, tgid)); + DEFINE(TASK_PID, offsetof(struct task_struct, pid)); + DEFINE(TASK_COMM, offsetof(struct task_struct, comm)); DEFINE(MM_CTXT, offsetof(struct mm_struct, context)); DEFINE(MM_PGD, offsetof(struct mm_struct, pgd)); @@ -56,8 +58,11 @@ int main(void) DEFINE(PT_r5, offsetof(struct pt_regs, r5)); DEFINE(PT_r6, offsetof(struct pt_regs, r6)); DEFINE(PT_r7, offsetof(struct pt_regs, r7)); + DEFINE(PT_ret, offsetof(struct pt_regs, ret)); DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs)); DEFINE(SZ_PT_REGS, sizeof(struct pt_regs)); + DEFINE(PT_user_r25, offsetof(struct pt_regs, user_r25)); + return 0; } diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c index e32b54abff51..7e844fd8213f 100644 --- a/arch/arc/kernel/devtree.c +++ b/arch/arc/kernel/devtree.c @@ -32,6 +32,8 @@ static void __init arc_set_early_base_baud(unsigned long dt_root) if (of_flat_dt_is_compatible(dt_root, "abilis,arc-tb10x")) arc_base_baud = core_clk/3; + else if (of_flat_dt_is_compatible(dt_root, "snps,arc-sdp")) + arc_base_baud = 33333333; /* Fixed 33MHz clk (AXS10x) */ else arc_base_baud = core_clk; } diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S new file mode 100644 index 000000000000..bd7105d3172f --- /dev/null +++ b/arch/arc/kernel/entry-arcv2.S @@ -0,0 +1,239 @@ +/* + * ARCv2 ISA based core Low Level Intr/Traps/Exceptions(non-TLB) Handling + * + * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> /* ARC_{EXTRY,EXIT} */ +#include <asm/entry.h> /* SAVE_ALL_{INT1,INT2,TRAP...} */ +#include <asm/errno.h> +#include <asm/arcregs.h> +#include <asm/irqflags.h> + + .cpu HS + +#define VECTOR .word + +;############################ Vector Table ################################# + + .section .vector,"a",@progbits + .align 4 + +# Initial 16 slots are Exception Vectors +VECTOR stext ; Restart Vector (jump to entry point) +VECTOR mem_service ; Mem exception +VECTOR instr_service ; Instrn Error +VECTOR EV_MachineCheck ; Fatal Machine check +VECTOR EV_TLBMissI ; Intruction TLB miss +VECTOR EV_TLBMissD ; Data TLB miss +VECTOR EV_TLBProtV ; Protection Violation +VECTOR EV_PrivilegeV ; Privilege Violation +VECTOR EV_SWI ; Software Breakpoint +VECTOR EV_Trap ; Trap exception +VECTOR EV_Extension ; Extn Instruction Exception +VECTOR EV_DivZero ; Divide by Zero +VECTOR EV_DCError ; Data Cache Error +VECTOR EV_Misaligned ; Misaligned Data Access +VECTOR reserved ; Reserved slots +VECTOR reserved ; Reserved slots + +# Begin Interrupt Vectors +VECTOR handle_interrupt ; (16) Timer0 +VECTOR handle_interrupt ; unused (Timer1) +VECTOR handle_interrupt ; unused (WDT) +VECTOR handle_interrupt ; (19) ICI (inter core interrupt) +VECTOR handle_interrupt +VECTOR handle_interrupt +VECTOR handle_interrupt +VECTOR handle_interrupt ; (23) End of fixed IRQs + +.rept CONFIG_ARC_NUMBER_OF_INTERRUPTS - 8 + VECTOR handle_interrupt +.endr + + .section .text, "ax",@progbits + +res_service: ; processor restart + flag 0x1 ; not implemented + nop + nop + +reserved: ; processor restart + rtie ; jump to processor initializations + +;##################### Interrupt Handling ############################## + +ENTRY(handle_interrupt) + + INTERRUPT_PROLOGUE irq + + clri ; To make status32.IE agree with CPU internal state + + lr r0, [ICAUSE] + + mov blink, ret_from_exception + + b.d arch_do_IRQ + mov r1, sp + +END(handle_interrupt) + +;################### Non TLB Exception Handling ############################# + +ENTRY(EV_SWI) + flag 1 +END(EV_SWI) + +ENTRY(EV_DivZero) + flag 1 +END(EV_DivZero) + +ENTRY(EV_DCError) + flag 1 +END(EV_DCError) + +ENTRY(EV_Misaligned) + + EXCEPTION_PROLOGUE + + lr r0, [efa] ; Faulting Data address + mov r1, sp + + FAKE_RET_FROM_EXCPN + + SAVE_CALLEE_SAVED_USER + mov r2, sp ; callee_regs + + bl do_misaligned_access + + ; TBD: optimize - do this only if a callee reg was involved + ; either a dst of emulated LD/ST or src with address-writeback + RESTORE_CALLEE_SAVED_USER + + b ret_from_exception +END(EV_Misaligned) + +; --------------------------------------------- +; Protection Violation Exception Handler +; --------------------------------------------- + +ENTRY(EV_TLBProtV) + + EXCEPTION_PROLOGUE + + lr r0, [efa] ; Faulting Data address + mov r1, sp ; pt_regs + + FAKE_RET_FROM_EXCPN + + mov blink, ret_from_exception + b do_page_fault + +END(EV_TLBProtV) + +; From Linux standpoint Slow Path I/D TLB Miss is same a ProtV as they +; need to call do_page_fault(). +; ECR in pt_regs provides whether access was R/W/X + +.global call_do_page_fault +.set call_do_page_fault, EV_TLBProtV + +;############# Common Handlers for ARCompact and ARCv2 ############## + +#include "entry.S" + +;############# Return from Intr/Excp/Trap (ARCv2 ISA Specifics) ############## +; +; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap) +; IRQ shd definitely not happen between now and rtie +; All 2 entry points to here already disable interrupts + +.Lrestore_regs: + + ld r0, [sp, PT_status32] ; U/K mode at time of entry + lr r10, [AUX_IRQ_ACT] + + bmsk r11, r10, 15 ; AUX_IRQ_ACT.ACTIVE + breq r11, 0, .Lexcept_ret ; No intr active, ret from Exception + +;####### Return from Intr ####### + +debug_marker_l1: + bbit1.nt r0, STATUS_DE_BIT, .Lintr_ret_to_delay_slot + +.Lisr_ret_fast_path: + ; Handle special case #1: (Entry via Exception, Return via IRQ) + ; + ; Exception in U mode, preempted in kernel, Intr taken (K mode), orig + ; task now returning to U mode (riding the Intr) + ; AUX_IRQ_ACTIVE won't have U bit set (since intr in K mode), hence SP + ; won't be switched to correct U mode value (from AUX_SP) + ; So force AUX_IRQ_ACT.U for such a case + + btst r0, STATUS_U_BIT ; Z flag set if K (Z clear for U) + bset.nz r11, r11, AUX_IRQ_ACT_BIT_U ; NZ means U + sr r11, [AUX_IRQ_ACT] + + INTERRUPT_EPILOGUE irq + rtie + +;####### Return from Exception / pure kernel mode ####### + +.Lexcept_ret: ; Expects r0 has PT_status32 + +debug_marker_syscall: + EXCEPTION_EPILOGUE + rtie + +;####### Return from Intr to insn in delay slot ####### + +; Handle special case #2: (Entry via Exception in Delay Slot, Return via IRQ) +; +; Intr returning to a Delay Slot (DS) insn +; (since IRQ NOT allowed in DS in ARCv2, this can only happen if orig +; entry was via Exception in DS which got preempted in kernel). +; +; IRQ RTIE won't reliably restore DE bit and/or BTA, needs handling +.Lintr_ret_to_delay_slot: +debug_marker_ds: + + ld r2, [@intr_to_DE_cnt] + add r2, r2, 1 + st r2, [@intr_to_DE_cnt] + + ld r2, [sp, PT_ret] + ld r3, [sp, PT_status32] + + bic r0, r3, STATUS_U_MASK|STATUS_DE_MASK|STATUS_IE_MASK|STATUS_L_MASK + st r0, [sp, PT_status32] + + mov r1, .Lintr_ret_to_delay_slot_2 + st r1, [sp, PT_ret] + + st r2, [sp, 0] + st r3, [sp, 4] + + b .Lisr_ret_fast_path + +.Lintr_ret_to_delay_slot_2: + sub sp, sp, SZ_PT_REGS + st r9, [sp, -4] + + ld r9, [sp, 0] + sr r9, [eret] + + ld r9, [sp, 4] + sr r9, [erstatus] + + ld r9, [sp, 8] + sr r9, [erbta] + + ld r9, [sp, -4] + add sp, sp, SZ_PT_REGS + rtie + +END(ret_from_exception) diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S new file mode 100644 index 000000000000..15d457b4403a --- /dev/null +++ b/arch/arc/kernel/entry-compact.S @@ -0,0 +1,393 @@ +/* + * Low Level Interrupts/Traps/Exceptions(non-TLB) Handling for ARCompact ISA + * + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * vineetg: May 2011 + * -Userspace unaligned access emulation + * + * vineetg: Feb 2011 (ptrace low level code fixes) + * -traced syscall return code (r0) was not saved into pt_regs for restoring + * into user reg-file when traded task rets to user space. + * -syscalls needing arch-wrappers (mainly for passing sp as pt_regs) + * were not invoking post-syscall trace hook (jumping directly into + * ret_from_system_call) + * + * vineetg: Nov 2010: + * -Vector table jumps (@8 bytes) converted into branches (@4 bytes) + * -To maintain the slot size of 8 bytes/vector, added nop, which is + * not executed at runtime. + * + * vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK) + * -do_signal()invoked upon TIF_RESTORE_SIGMASK as well + * -Wrappers for sys_{,rt_}sigsuspend() nolonger needed as they don't + * need ptregs anymore + * + * Vineetg: Oct 2009 + * -In a rare scenario, Process gets a Priv-V exception and gets scheduled + * out. Since we don't do FAKE RTIE for Priv-V, CPU excpetion state remains + * active (AE bit enabled). This causes a double fault for a subseq valid + * exception. Thus FAKE RTIE needed in low level Priv-Violation handler. + * Instr Error could also cause similar scenario, so same there as well. + * + * Vineetg: March 2009 (Supporting 2 levels of Interrupts) + * + * Vineetg: Aug 28th 2008: Bug #94984 + * -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap + * Normally CPU does this automatically, however when doing FAKE rtie, + * we need to explicitly do this. The problem in macros + * FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit + * was being "CLEARED" rather then "SET". Since it is Loop INHIBIT Bit, + * setting it and not clearing it clears ZOL context + * + * Vineetg: May 16th, 2008 + * - r25 now contains the Current Task when in kernel + * + * Vineetg: Dec 22, 2007 + * Minor Surgery of Low Level ISR to make it SMP safe + * - MMU_SCRATCH0 Reg used for freeing up r9 in Level 1 ISR + * - _current_task is made an array of NR_CPUS + * - Access of _current_task wrapped inside a macro so that if hardware + * team agrees for a dedicated reg, no other code is touched + * + * Amit Bhor, Rahul Trivedi, Kanika Nema, Sameer Dhavale : Codito Tech 2004 + */ + +#include <linux/errno.h> +#include <linux/linkage.h> /* {EXTRY,EXIT} */ +#include <asm/entry.h> +#include <asm/irqflags.h> + + .cpu A7 + +;############################ Vector Table ################################# + +.macro VECTOR lbl +#if 1 /* Just in case, build breaks */ + j \lbl +#else + b \lbl + nop +#endif +.endm + + .section .vector, "ax",@progbits + .align 4 + +/* Each entry in the vector table must occupy 2 words. Since it is a jump + * across sections (.vector to .text) we are gauranteed that 'j somewhere' + * will use the 'j limm' form of the intrsuction as long as somewhere is in + * a section other than .vector. + */ + +; ********* Critical System Events ********************** +VECTOR res_service ; 0x0, Restart Vector (0x0) +VECTOR mem_service ; 0x8, Mem exception (0x1) +VECTOR instr_service ; 0x10, Instrn Error (0x2) + +; ******************** Device ISRs ********************** +#ifdef CONFIG_ARC_IRQ3_LV2 +VECTOR handle_interrupt_level2 +#else +VECTOR handle_interrupt_level1 +#endif + +VECTOR handle_interrupt_level1 + +#ifdef CONFIG_ARC_IRQ5_LV2 +VECTOR handle_interrupt_level2 +#else +VECTOR handle_interrupt_level1 +#endif + +#ifdef CONFIG_ARC_IRQ6_LV2 +VECTOR handle_interrupt_level2 +#else +VECTOR handle_interrupt_level1 +#endif + +.rept 25 +VECTOR handle_interrupt_level1 ; Other devices +.endr + +/* FOR ARC600: timer = 0x3, uart = 0x8, emac = 0x10 */ + +; ******************** Exceptions ********************** +VECTOR EV_MachineCheck ; 0x100, Fatal Machine check (0x20) +VECTOR EV_TLBMissI ; 0x108, Intruction TLB miss (0x21) +VECTOR EV_TLBMissD ; 0x110, Data TLB miss (0x22) +VECTOR EV_TLBProtV ; 0x118, Protection Violation (0x23) + ; or Misaligned Access +VECTOR EV_PrivilegeV ; 0x120, Privilege Violation (0x24) +VECTOR EV_Trap ; 0x128, Trap exception (0x25) +VECTOR EV_Extension ; 0x130, Extn Intruction Excp (0x26) + +.rept 24 +VECTOR reserved ; Reserved Exceptions +.endr + + +;##################### Scratch Mem for IRQ stack switching ############# + +ARCFP_DATA int1_saved_reg + .align 32 + .type int1_saved_reg, @object + .size int1_saved_reg, 4 +int1_saved_reg: + .zero 4 + +/* Each Interrupt level needs its own scratch */ +#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS + +ARCFP_DATA int2_saved_reg + .type int2_saved_reg, @object + .size int2_saved_reg, 4 +int2_saved_reg: + .zero 4 + +#endif + +; --------------------------------------------- + .section .text, "ax",@progbits + +res_service: ; processor restart + flag 0x1 ; not implemented + nop + nop + +reserved: ; processor restart + rtie ; jump to processor initializations + +;##################### Interrupt Handling ############################## + +#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS +; --------------------------------------------- +; Level 2 ISR: Can interrupt a Level 1 ISR +; --------------------------------------------- +ENTRY(handle_interrupt_level2) + + INTERRUPT_PROLOGUE 2 + + ;------------------------------------------------------ + ; if L2 IRQ interrupted a L1 ISR, disable preemption + ;------------------------------------------------------ + + ld r9, [sp, PT_status32] ; get statu32_l2 (saved in pt_regs) + bbit0 r9, STATUS_A1_BIT, 1f ; L1 not active when L2 IRQ, so normal + + ; A1 is set in status32_l2 + ; bump thread_info->preempt_count (Disable preemption) + GET_CURR_THR_INFO_FROM_SP r10 + ld r9, [r10, THREAD_INFO_PREEMPT_COUNT] + add r9, r9, 1 + st r9, [r10, THREAD_INFO_PREEMPT_COUNT] + +1: + ;------------------------------------------------------ + ; setup params for Linux common ISR and invoke it + ;------------------------------------------------------ + lr r0, [icause2] + and r0, r0, 0x1f + + bl.d @arch_do_IRQ + mov r1, sp + + mov r8,0x2 + sr r8, [AUX_IRQ_LV12] ; clear bit in Sticky Status Reg + + b ret_from_exception + +END(handle_interrupt_level2) + +#endif + +; --------------------------------------------- +; Level 1 ISR +; --------------------------------------------- +ENTRY(handle_interrupt_level1) + + INTERRUPT_PROLOGUE 1 + + lr r0, [icause1] + and r0, r0, 0x1f + +#ifdef CONFIG_TRACE_IRQFLAGS + ; icause1 needs to be read early, before calling tracing, which + ; can clobber scratch regs, hence use of stack to stash it + push r0 + TRACE_ASM_IRQ_DISABLE + pop r0 +#endif + + bl.d @arch_do_IRQ + mov r1, sp + + mov r8,0x1 + sr r8, [AUX_IRQ_LV12] ; clear bit in Sticky Status Reg + + b ret_from_exception +END(handle_interrupt_level1) + +;################### Non TLB Exception Handling ############################# + +; --------------------------------------------- +; Protection Violation Exception Handler +; --------------------------------------------- + +ENTRY(EV_TLBProtV) + + EXCEPTION_PROLOGUE + + lr r2, [ecr] + lr r0, [efa] ; Faulting Data address (not part of pt_regs saved above) + + ; Exception auto-disables further Intr/exceptions. + ; Re-enable them by pretending to return from exception + ; (so rest of handler executes in pure K mode) + + FAKE_RET_FROM_EXCPN + + mov r1, sp ; Handle to pt_regs + + ;------ (5) Type of Protection Violation? ---------- + ; + ; ProtV Hardware Exception is triggered for Access Faults of 2 types + ; -Access Violaton : 00_23_(00|01|02|03)_00 + ; x r w r+w + ; -Unaligned Access : 00_23_04_00 + ; + bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f + + ;========= (6a) Access Violation Processing ======== + bl do_page_fault + b ret_from_exception + + ;========== (6b) Non aligned access ============ +4: + + SAVE_CALLEE_SAVED_USER + mov r2, sp ; callee_regs + + bl do_misaligned_access + + ; TBD: optimize - do this only if a callee reg was involved + ; either a dst of emulated LD/ST or src with address-writeback + RESTORE_CALLEE_SAVED_USER + + b ret_from_exception + +END(EV_TLBProtV) + +; Wrapper for Linux page fault handler called from EV_TLBMiss* +; Very similar to ProtV handler case (6a) above, but avoids the extra checks +; for Misaligned access +; +ENTRY(call_do_page_fault) + + EXCEPTION_PROLOGUE + lr r0, [efa] ; Faulting Data address + mov r1, sp + FAKE_RET_FROM_EXCPN + + mov blink, ret_from_exception + b do_page_fault + +END(call_do_page_fault) + +;############# Common Handlers for ARCompact and ARCv2 ############## + +#include "entry.S" + +;############# Return from Intr/Excp/Trap (ARC Specifics) ############## +; +; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap) +; IRQ shd definitely not happen between now and rtie +; All 2 entry points to here already disable interrupts + +.Lrestore_regs: + + TRACE_ASM_IRQ_ENABLE + + lr r10, [status32] + + ; Restore REG File. In case multiple Events outstanding, + ; use the same priorty as rtie: EXCPN, L2 IRQ, L1 IRQ, None + ; Note that we use realtime STATUS32 (not pt_regs->status32) to + ; decide that. + + ; if Returning from Exception + btst r10, STATUS_AE_BIT + bnz .Lexcep_ret + + ; Not Exception so maybe Interrupts (Level 1 or 2) + +#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS + + ; Level 2 interrupt return Path - from hardware standpoint + bbit0 r10, STATUS_A2_BIT, not_level2_interrupt + + ;------------------------------------------------------------------ + ; However the context returning might not have taken L2 intr itself + ; e.g. Task'A' user-code -> L2 intr -> schedule -> 'B' user-code ret + ; Special considerations needed for the context which took L2 intr + + ld r9, [sp, PT_event] ; Ensure this is L2 intr context + brne r9, event_IRQ2, 149f + + ;------------------------------------------------------------------ + ; if L2 IRQ interrupted an L1 ISR, we'd disabled preemption earlier + ; so that sched doesn't move to new task, causing L1 to be delayed + ; undeterministically. Now that we've achieved that, let's reset + ; things to what they were, before returning from L2 context + ;---------------------------------------------------------------- + + ld r9, [sp, PT_status32] ; get statu32_l2 (saved in pt_regs) + bbit0 r9, STATUS_A1_BIT, 149f ; L1 not active when L2 IRQ, so normal + + ; decrement thread_info->preempt_count (re-enable preemption) + GET_CURR_THR_INFO_FROM_SP r10 + ld r9, [r10, THREAD_INFO_PREEMPT_COUNT] + + ; paranoid check, given A1 was active when A2 happened, preempt count + ; must not be 0 because we would have incremented it. + ; If this does happen we simply HALT as it means a BUG !!! + cmp r9, 0 + bnz 2f + flag 1 + +2: + sub r9, r9, 1 + st r9, [r10, THREAD_INFO_PREEMPT_COUNT] + +149: + ;return from level 2 + INTERRUPT_EPILOGUE 2 +debug_marker_l2: + rtie + +not_level2_interrupt: + +#endif + + bbit0 r10, STATUS_A1_BIT, .Lpure_k_mode_ret + + ;return from level 1 + INTERRUPT_EPILOGUE 1 +debug_marker_l1: + rtie + +.Lexcep_ret: +.Lpure_k_mode_ret: + + ;this case is for syscalls or Exceptions or pure kernel mode + + EXCEPTION_EPILOGUE +debug_marker_syscall: + rtie + +END(ret_from_exception) diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index d868289c5a26..f7a82fd4d601 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -1,60 +1,13 @@ /* - * Low Level Interrupts/Traps/Exceptions(non-TLB) Handling for ARC + * Common Low Level Interrupts/Traps/Exceptions(non-TLB) Handling for ARC + * (included from entry-<isa>.S * + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. - * - * vineetg: May 2011 - * -Userspace unaligned access emulation - * - * vineetg: Feb 2011 (ptrace low level code fixes) - * -traced syscall return code (r0) was not saved into pt_regs for restoring - * into user reg-file when traded task rets to user space. - * -syscalls needing arch-wrappers (mainly for passing sp as pt_regs) - * were not invoking post-syscall trace hook (jumping directly into - * ret_from_system_call) - * - * vineetg: Nov 2010: - * -Vector table jumps (@8 bytes) converted into branches (@4 bytes) - * -To maintain the slot size of 8 bytes/vector, added nop, which is - * not executed at runtime. - * - * vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK) - * -do_signal()invoked upon TIF_RESTORE_SIGMASK as well - * -Wrappers for sys_{,rt_}sigsuspend() nolonger needed as they don't - * need ptregs anymore - * - * Vineetg: Oct 2009 - * -In a rare scenario, Process gets a Priv-V exception and gets scheduled - * out. Since we don't do FAKE RTIE for Priv-V, CPU excpetion state remains - * active (AE bit enabled). This causes a double fault for a subseq valid - * exception. Thus FAKE RTIE needed in low level Priv-Violation handler. - * Instr Error could also cause similar scenario, so same there as well. - * - * Vineetg: March 2009 (Supporting 2 levels of Interrupts) - * - * Vineetg: Aug 28th 2008: Bug #94984 - * -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap - * Normally CPU does this automatically, however when doing FAKE rtie, - * we need to explicitly do this. The problem in macros - * FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit - * was being "CLEARED" rather then "SET". Since it is Loop INHIBIT Bit, - * setting it and not clearing it clears ZOL context - * - * Vineetg: May 16th, 2008 - * - r25 now contains the Current Task when in kernel - * - * Vineetg: Dec 22, 2007 - * Minor Surgery of Low Level ISR to make it SMP safe - * - MMU_SCRATCH0 Reg used for freeing up r9 in Level 1 ISR - * - _current_task is made an array of NR_CPUS - * - Access of _current_task wrapped inside a macro so that if hardware - * team agrees for a dedicated reg, no other code is touched - * - * Amit Bhor, Rahul Trivedi, Kanika Nema, Sameer Dhavale : Codito Tech 2004 */ /*------------------------------------------------------------------ @@ -67,206 +20,59 @@ * Global Pointer (gp) r26 * Frame Pointer (fp) r27 * Stack Pointer (sp) r28 - * Interrupt link register (ilink1) r29 - * Interrupt link register (ilink2) r30 * Branch link register (blink) r31 *------------------------------------------------------------------ */ - .cpu A7 - -;############################ Vector Table ################################# - -.macro VECTOR lbl -#if 1 /* Just in case, build breaks */ - j \lbl -#else - b \lbl - nop -#endif -.endm - - .section .vector, "ax",@progbits - .align 4 - -/* Each entry in the vector table must occupy 2 words. Since it is a jump - * across sections (.vector to .text) we are gauranteed that 'j somewhere' - * will use the 'j limm' form of the intrsuction as long as somewhere is in - * a section other than .vector. - */ - -; ********* Critical System Events ********************** -VECTOR res_service ; 0x0, Restart Vector (0x0) -VECTOR mem_service ; 0x8, Mem exception (0x1) -VECTOR instr_service ; 0x10, Instrn Error (0x2) - -; ******************** Device ISRs ********************** -#ifdef CONFIG_ARC_IRQ3_LV2 -VECTOR handle_interrupt_level2 -#else -VECTOR handle_interrupt_level1 -#endif - -VECTOR handle_interrupt_level1 - -#ifdef CONFIG_ARC_IRQ5_LV2 -VECTOR handle_interrupt_level2 -#else -VECTOR handle_interrupt_level1 -#endif - -#ifdef CONFIG_ARC_IRQ6_LV2 -VECTOR handle_interrupt_level2 -#else -VECTOR handle_interrupt_level1 -#endif - -.rept 25 -VECTOR handle_interrupt_level1 ; Other devices -.endr - -/* FOR ARC600: timer = 0x3, uart = 0x8, emac = 0x10 */ - -; ******************** Exceptions ********************** -VECTOR EV_MachineCheck ; 0x100, Fatal Machine check (0x20) -VECTOR EV_TLBMissI ; 0x108, Intruction TLB miss (0x21) -VECTOR EV_TLBMissD ; 0x110, Data TLB miss (0x22) -VECTOR EV_TLBProtV ; 0x118, Protection Violation (0x23) - ; or Misaligned Access -VECTOR EV_PrivilegeV ; 0x120, Privilege Violation (0x24) -VECTOR EV_Trap ; 0x128, Trap exception (0x25) -VECTOR EV_Extension ; 0x130, Extn Intruction Excp (0x26) - -.rept 24 -VECTOR reserved ; Reserved Exceptions -.endr - -#include <linux/linkage.h> /* {EXTRY,EXIT} */ -#include <asm/entry.h> /* SAVE_ALL_{INT1,INT2,SYS...} */ -#include <asm/errno.h> -#include <asm/arcregs.h> -#include <asm/irqflags.h> - -;##################### Scratch Mem for IRQ stack switching ############# - -ARCFP_DATA int1_saved_reg - .align 32 - .type int1_saved_reg, @object - .size int1_saved_reg, 4 -int1_saved_reg: - .zero 4 - -/* Each Interrupt level needs its own scratch */ -#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS - -ARCFP_DATA int2_saved_reg - .type int2_saved_reg, @object - .size int2_saved_reg, 4 -int2_saved_reg: - .zero 4 - -#endif - -; --------------------------------------------- - .section .text, "ax",@progbits - -res_service: ; processor restart - flag 0x1 ; not implemented - nop - nop - -reserved: ; processor restart - rtie ; jump to processor initializations - -;##################### Interrupt Handling ############################## - -#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS -; --------------------------------------------- -; Level 2 ISR: Can interrupt a Level 1 ISR -; --------------------------------------------- -ENTRY(handle_interrupt_level2) +;################### Special Sys Call Wrappers ########################## - ; TODO-vineetg for SMP this wont work - ; free up r9 as scratchpad - st r9, [@int2_saved_reg] +ENTRY(sys_clone_wrapper) + SAVE_CALLEE_SAVED_USER + bl @sys_clone + DISCARD_CALLEE_SAVED_USER - ;Which mode (user/kernel) was the system in when intr occured - lr r9, [status32_l2] + GET_CURR_THR_INFO_FLAGS r10 + btst r10, TIF_SYSCALL_TRACE + bnz tracesys_exit - SWITCH_TO_KERNEL_STK - SAVE_ALL_INT2 + b ret_from_system_call +END(sys_clone_wrapper) - ;------------------------------------------------------ - ; if L2 IRQ interrupted a L1 ISR, disable preemption - ;------------------------------------------------------ +ENTRY(ret_from_fork) + ; when the forked child comes here from the __switch_to function + ; r0 has the last task pointer. + ; put last task in scheduler queue + bl @schedule_tail - ld r9, [sp, PT_status32] ; get statu32_l2 (saved in pt_regs) - bbit0 r9, STATUS_A1_BIT, 1f ; L1 not active when L2 IRQ, so normal + ld r9, [sp, PT_status32] + brne r9, 0, 1f - ; A1 is set in status32_l2 - ; bump thread_info->preempt_count (Disable preemption) - GET_CURR_THR_INFO_FROM_SP r10 - ld r9, [r10, THREAD_INFO_PREEMPT_COUNT] - add r9, r9, 1 - st r9, [r10, THREAD_INFO_PREEMPT_COUNT] + jl.d [r14] ; kernel thread entry point + mov r0, r13 ; (see PF_KTHREAD block in copy_thread) 1: - ;------------------------------------------------------ - ; setup params for Linux common ISR and invoke it - ;------------------------------------------------------ - lr r0, [icause2] - and r0, r0, 0x1f - - bl.d @arch_do_IRQ - mov r1, sp - - mov r8,0x2 - sr r8, [AUX_IRQ_LV12] ; clear bit in Sticky Status Reg - - b ret_from_exception - -END(handle_interrupt_level2) - -#endif - -; --------------------------------------------- -; Level 1 ISR -; --------------------------------------------- -ENTRY(handle_interrupt_level1) - - /* free up r9 as scratchpad */ -#ifdef CONFIG_SMP - sr r9, [ARC_REG_SCRATCH_DATA0] -#else - st r9, [@int1_saved_reg] -#endif - - ;Which mode (user/kernel) was the system in when intr occured - lr r9, [status32_l1] - - SWITCH_TO_KERNEL_STK - SAVE_ALL_INT1 + ; Return to user space + ; 1. Any forked task (Reach here via BRne above) + ; 2. First ever init task (Reach here via return from JL above) + ; This is the historic "kernel_execve" use-case, to return to init + ; user mode, in a round about way since that is always done from + ; a kernel thread which is executed via JL above but always returns + ; out whenever kernel_execve (now inline do_fork()) is involved + b ret_from_exception +END(ret_from_fork) - lr r0, [icause1] - and r0, r0, 0x1f +#ifdef CONFIG_ARC_DW2_UNWIND +; Workaround for bug 94179 (STAR ): +; Despite -fasynchronous-unwind-tables, linker is not making dwarf2 unwinder +; section (.debug_frame) as loadable. So we force it here. +; This also fixes STAR 9000487933 where the prev-workaround (objcopy --setflag) +; would not work after a clean build due to kernel build system dependencies. +.section .debug_frame, "wa",@progbits -#ifdef CONFIG_TRACE_IRQFLAGS - ; icause1 needs to be read early, before calling tracing, which - ; can clobber scratch regs, hence use of stack to stash it - push r0 - TRACE_ASM_IRQ_DISABLE - pop r0 +; Reset to .text as this file is included in entry-<isa>.S +.section .text, "ax",@progbits #endif - bl.d @arch_do_IRQ - mov r1, sp - - mov r8,0x1 - sr r8, [AUX_IRQ_LV12] ; clear bit in Sticky Status Reg - - b ret_from_exception -END(handle_interrupt_level1) - ;################### Non TLB Exception Handling ############################# ; --------------------------------------------- @@ -280,7 +86,7 @@ ENTRY(instr_service) lr r0, [efa] mov r1, sp - FAKE_RET_FROM_EXCPN r9 + FAKE_RET_FROM_EXCPN bl do_insterror_or_kprobe b ret_from_exception @@ -297,7 +103,7 @@ ENTRY(mem_service) lr r0, [efa] mov r1, sp - FAKE_RET_FROM_EXCPN r9 + FAKE_RET_FROM_EXCPN bl do_memory_error b ret_from_exception @@ -334,60 +140,6 @@ ENTRY(EV_MachineCheck) END(EV_MachineCheck) ; --------------------------------------------- -; Protection Violation Exception Handler -; --------------------------------------------- - -ENTRY(EV_TLBProtV) - - EXCEPTION_PROLOGUE - - ;---------(3) Save some more regs----------------- - ; vineetg: Mar 6th: Random Seg Fault issue #1 - ; ecr and efa were not saved in case an Intr sneaks in - ; after fake rtie - - lr r2, [ecr] - lr r0, [efa] ; Faulting Data address - - ; --------(4) Return from CPU Exception Mode --------- - ; Fake a rtie, but rtie to next label - ; That way, subsequently, do_page_fault ( ) executes in pure kernel - ; mode with further Exceptions enabled - - FAKE_RET_FROM_EXCPN r9 - - mov r1, sp - - ;------ (5) Type of Protection Violation? ---------- - ; - ; ProtV Hardware Exception is triggered for Access Faults of 2 types - ; -Access Violaton : 00_23_(00|01|02|03)_00 - ; x r w r+w - ; -Unaligned Access : 00_23_04_00 - ; - bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f - - ;========= (6a) Access Violation Processing ======== - bl do_page_fault - b ret_from_exception - - ;========== (6b) Non aligned access ============ -4: - - SAVE_CALLEE_SAVED_USER - mov r2, sp ; callee_regs - - bl do_misaligned_access - - ; TBD: optimize - do this only if a callee reg was involved - ; either a dst of emulated LD/ST or src with address-writeback - RESTORE_CALLEE_SAVED_USER - - b ret_from_exception - -END(EV_TLBProtV) - -; --------------------------------------------- ; Privilege Violation Exception Handler ; --------------------------------------------- ENTRY(EV_PrivilegeV) @@ -397,7 +149,7 @@ ENTRY(EV_PrivilegeV) lr r0, [efa] mov r1, sp - FAKE_RET_FROM_EXCPN r9 + FAKE_RET_FROM_EXCPN bl do_privilege_fault b ret_from_exception @@ -413,14 +165,17 @@ ENTRY(EV_Extension) lr r0, [efa] mov r1, sp - FAKE_RET_FROM_EXCPN r9 + FAKE_RET_FROM_EXCPN bl do_extension_fault b ret_from_exception END(EV_Extension) -;######################### System Call Tracing ######################### +;################ Trap Handling (Syscall, Breakpoint) ################## +; --------------------------------------------- +; syscall Tracing +; --------------------------------------------- tracesys: ; save EFA in case tracer wants the PC of traced task ; using ERET won't work since next-PC has already committed @@ -463,10 +218,9 @@ tracesys_exit: b ret_from_exception ; NOT ret_from_system_call at is saves r0 which ; we'd done before calling post hook above -;################### Break Point TRAP ########################## - - ; ======= (5b) Trap is due to Break-Point ========= - +; --------------------------------------------- +; Breakpoint TRAP +; --------------------------------------------- trap_with_param: ; stop_pc info by gdb needs this info @@ -475,7 +229,7 @@ trap_with_param: ; Now that we have read EFA, it is safe to do "fake" rtie ; and get out of CPU exception mode - FAKE_RET_FROM_EXCPN r11 + FAKE_RET_FROM_EXCPN ; Save callee regs in case gdb wants to have a look ; SP will grow up by size of CALLEE Reg-File @@ -494,37 +248,33 @@ trap_with_param: b ret_from_exception -;##################### Trap Handling ############################## -; -; EV_Trap caused by TRAP_S and TRAP0 instructions. -;------------------------------------------------------------------ -; (1) System Calls -; :parameters in r0-r7. -; :r8 has the system call number -; (2) Break Points -;------------------------------------------------------------------ +; --------------------------------------------- +; syscall TRAP +; ABI: (r0-r7) upto 8 args, (r8) syscall number +; --------------------------------------------- ENTRY(EV_Trap) EXCEPTION_PROLOGUE - ;------- (4) What caused the Trap -------------- - lr r12, [ecr] - bmsk.f 0, r12, 7 + ;============ TRAP 1 :breakpoints + ; Check ECR for trap with arg (PROLOGUE ensures r9 has ECR) + bmsk.f 0, r9, 7 bnz trap_with_param - ; ======= (5a) Trap is due to System Call ======== + ;============ TRAP (no param): syscall top level - ; Before doing anything, return from CPU Exception Mode - FAKE_RET_FROM_EXCPN r11 + ; First return from Exception to pure K mode (Exception/IRQs renabled) + FAKE_RET_FROM_EXCPN - ; If syscall tracing ongoing, invoke pre-pos-hooks + ; If syscall tracing ongoing, invoke pre-post-hooks GET_CURR_THR_INFO_FLAGS r10 btst r10, TIF_SYSCALL_TRACE bnz tracesys ; this never comes back - ;============ This is normal System Call case ========== - ; Sys-call num shd not exceed the total system calls avail + ;============ Normal syscall case + + ; syscall num shd not exceed the total system calls avail cmp r8, NR_syscalls mov.hi r0, -ENOSYS bhi ret_from_system_call @@ -565,7 +315,7 @@ resume_user_mode_begin: ; Fast Path return to user mode if no pending work GET_CURR_THR_INFO_FLAGS r9 and.f 0, r9, _TIF_WORK_MASK - bz restore_regs + bz .Lrestore_regs ; --- (Slow Path #1) task preemption --- bbit0 r9, TIF_NEED_RESCHED, .Lchk_pend_signals @@ -624,11 +374,11 @@ resume_kernel_mode: ; Can't preempt if preemption disabled GET_CURR_THR_INFO_FROM_SP r10 ld r8, [r10, THREAD_INFO_PREEMPT_COUNT] - brne r8, 0, restore_regs + brne r8, 0, .Lrestore_regs ; check if this task's NEED_RESCHED flag set ld r9, [r10, THREAD_INFO_FLAGS] - bbit0 r9, TIF_NEED_RESCHED, restore_regs + bbit0 r9, TIF_NEED_RESCHED, .Lrestore_regs ; Invoke PREEMPTION bl preempt_schedule_irq @@ -636,142 +386,7 @@ resume_kernel_mode: ; preempt_schedule_irq() always returns with IRQ disabled #endif - ; fall through - -;############# Return from Intr/Excp/Trap (ARC Specifics) ############## -; -; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap) -; IRQ shd definitely not happen between now and rtie -; All 2 entry points to here already disable interrupts - -restore_regs : - - TRACE_ASM_IRQ_ENABLE - - lr r10, [status32] - - ; Restore REG File. In case multiple Events outstanding, - ; use the same priorty as rtie: EXCPN, L2 IRQ, L1 IRQ, None - ; Note that we use realtime STATUS32 (not pt_regs->status32) to - ; decide that. - - ; if Returning from Exception - bbit0 r10, STATUS_AE_BIT, not_exception - RESTORE_ALL_SYS - rtie - - ; Not Exception so maybe Interrupts (Level 1 or 2) - -not_exception: - -#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS - - ; Level 2 interrupt return Path - from hardware standpoint - bbit0 r10, STATUS_A2_BIT, not_level2_interrupt - - ;------------------------------------------------------------------ - ; However the context returning might not have taken L2 intr itself - ; e.g. Task'A' user-code -> L2 intr -> schedule -> 'B' user-code ret - ; Special considerations needed for the context which took L2 intr - - ld r9, [sp, PT_event] ; Ensure this is L2 intr context - brne r9, event_IRQ2, 149f - - ;------------------------------------------------------------------ - ; if L2 IRQ interrupted an L1 ISR, we'd disabled preemption earlier - ; so that sched doesn't move to new task, causing L1 to be delayed - ; undeterministically. Now that we've achieved that, let's reset - ; things to what they were, before returning from L2 context - ;---------------------------------------------------------------- - - ld r9, [sp, PT_status32] ; get statu32_l2 (saved in pt_regs) - bbit0 r9, STATUS_A1_BIT, 149f ; L1 not active when L2 IRQ, so normal - - ; decrement thread_info->preempt_count (re-enable preemption) - GET_CURR_THR_INFO_FROM_SP r10 - ld r9, [r10, THREAD_INFO_PREEMPT_COUNT] - - ; paranoid check, given A1 was active when A2 happened, preempt count - ; must not be 0 because we would have incremented it. - ; If this does happen we simply HALT as it means a BUG !!! - cmp r9, 0 - bnz 2f - flag 1 - -2: - sub r9, r9, 1 - st r9, [r10, THREAD_INFO_PREEMPT_COUNT] - -149: - ;return from level 2 - RESTORE_ALL_INT2 -debug_marker_l2: - rtie - -not_level2_interrupt: - -#endif - - bbit0 r10, STATUS_A1_BIT, not_level1_interrupt + b .Lrestore_regs - ;return from level 1 +##### DONT ADD CODE HERE - .Lrestore_regs actually follows in entry-<isa>.S - RESTORE_ALL_INT1 -debug_marker_l1: - rtie - -not_level1_interrupt: - - ;this case is for syscalls or Exceptions (with fake rtie) - - RESTORE_ALL_SYS -debug_marker_syscall: - rtie - -END(ret_from_exception) - -ENTRY(ret_from_fork) - ; when the forked child comes here from the __switch_to function - ; r0 has the last task pointer. - ; put last task in scheduler queue - bl @schedule_tail - - ld r9, [sp, PT_status32] - brne r9, 0, 1f - - jl.d [r14] ; kernel thread entry point - mov r0, r13 ; (see PF_KTHREAD block in copy_thread) - -1: - ; Return to user space - ; 1. Any forked task (Reach here via BRne above) - ; 2. First ever init task (Reach here via return from JL above) - ; This is the historic "kernel_execve" use-case, to return to init - ; user mode, in a round about way since that is always done from - ; a kernel thread which is executed via JL above but always returns - ; out whenever kernel_execve (now inline do_fork()) is involved - b ret_from_exception -END(ret_from_fork) - -;################### Special Sys Call Wrappers ########################## - -ENTRY(sys_clone_wrapper) - SAVE_CALLEE_SAVED_USER - bl @sys_clone - DISCARD_CALLEE_SAVED_USER - - GET_CURR_THR_INFO_FLAGS r10 - btst r10, TIF_SYSCALL_TRACE - bnz tracesys_exit - - b ret_from_system_call -END(sys_clone_wrapper) - -#ifdef CONFIG_ARC_DW2_UNWIND -; Workaround for bug 94179 (STAR ): -; Despite -fasynchronous-unwind-tables, linker is not making dwarf2 unwinder -; section (.debug_frame) as loadable. So we force it here. -; This also fixes STAR 9000487933 where the prev-workaround (objcopy --setflag) -; would not work after a clean build due to kernel build system dependencies. -.section .debug_frame, "wa",@progbits -#endif diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index b0e8666fdccc..812f95e6ae69 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -49,8 +49,6 @@ 1: .endm - .cpu A7 - .section .init.text, "ax",@progbits .type stext, @function .globl stext @@ -83,6 +81,7 @@ stext: st.ab 0, [r5, 4] 1: +#ifdef CONFIG_ARC_UBOOT_SUPPORT ; Uboot - kernel ABI ; r0 = [0] No uboot interaction, [1] cmdline in r2, [2] DTB in r2 ; r1 = magic number (board identity, unused as of now @@ -90,6 +89,7 @@ stext: ; These are handled later in setup_arch() st r0, [@uboot_tag] st r2, [@uboot_arg] +#endif ; setup "current" tsk and optionally cache it in dedicated r25 mov r9, @init_task diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c new file mode 100644 index 000000000000..6208c630abed --- /dev/null +++ b/arch/arc/kernel/intc-arcv2.c @@ -0,0 +1,143 @@ +/* + * Copyright (C) 2014 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/irqdomain.h> +#include <linux/irqchip.h> +#include "../../drivers/irqchip/irqchip.h" +#include <asm/irq.h> + +/* + * Early Hardware specific Interrupt setup + * -Called very early (start_kernel -> setup_arch -> setup_processor) + * -Platform Independent (must for any ARC Core) + * -Needed for each CPU (hence not foldable into init_IRQ) + */ +void arc_init_IRQ(void) +{ + unsigned int tmp; + + struct aux_irq_ctrl { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int res3:18, save_idx_regs:1, res2:1, + save_u_to_u:1, save_lp_regs:1, save_blink:1, + res:4, save_nr_gpr_pairs:5; +#else + unsigned int save_nr_gpr_pairs:5, res:4, + save_blink:1, save_lp_regs:1, save_u_to_u:1, + res2:1, save_idx_regs:1, res3:18; +#endif + } ictrl; + + *(unsigned int *)&ictrl = 0; + + ictrl.save_nr_gpr_pairs = 6; /* r0 to r11 (r12 saved manually) */ + ictrl.save_blink = 1; + ictrl.save_lp_regs = 1; /* LP_COUNT, LP_START, LP_END */ + ictrl.save_u_to_u = 0; /* user ctxt saved on kernel stack */ + ictrl.save_idx_regs = 1; /* JLI, LDI, EI */ + + WRITE_AUX(AUX_IRQ_CTRL, ictrl); + + /* setup status32, don't enable intr yet as kernel doesn't want */ + tmp = read_aux_reg(0xa); + tmp |= ISA_INIT_STATUS_BITS; + tmp &= ~STATUS_IE_MASK; + asm volatile("flag %0 \n"::"r"(tmp)); + + /* + * ARCv2 core intc provides multiple interrupt priorities (upto 16). + * Typical builds though have only two levels (0-high, 1-low) + * Linux by default uses lower prio 1 for most irqs, reserving 0 for + * NMI style interrupts in future (say perf) + * + * Read the intc BCR to confirm that Linux default priority is avail + * in h/w + * + * Note: + * IRQ_BCR[27..24] contains N-1 (for N priority levels) and prio level + * is 0 based. + */ + tmp = (read_aux_reg(ARC_REG_IRQ_BCR) >> 24 ) & 0xF; + if (ARCV2_IRQ_DEF_PRIO > tmp) + panic("Linux default irq prio incorrect\n"); +} + +static void arcv2_irq_mask(struct irq_data *data) +{ + write_aux_reg(AUX_IRQ_SELECT, data->irq); + write_aux_reg(AUX_IRQ_ENABLE, 0); +} + +static void arcv2_irq_unmask(struct irq_data *data) +{ + write_aux_reg(AUX_IRQ_SELECT, data->irq); + write_aux_reg(AUX_IRQ_ENABLE, 1); +} + +void arcv2_irq_enable(struct irq_data *data) +{ + /* set default priority */ + write_aux_reg(AUX_IRQ_SELECT, data->irq); + write_aux_reg(AUX_IRQ_PRIORITY, ARCV2_IRQ_DEF_PRIO); + + /* + * hw auto enables (linux unmask) all by default + * So no need to do IRQ_ENABLE here + * XXX: However OSCI LAN need it + */ + write_aux_reg(AUX_IRQ_ENABLE, 1); +} + +static struct irq_chip arcv2_irq_chip = { + .name = "ARCv2 core Intc", + .irq_mask = arcv2_irq_mask, + .irq_unmask = arcv2_irq_unmask, + .irq_enable = arcv2_irq_enable +}; + +static int arcv2_irq_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hw) +{ + if (irq == TIMER0_IRQ || irq == IPI_IRQ) + irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_percpu_irq); + else + irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_level_irq); + + return 0; +} + +static const struct irq_domain_ops arcv2_irq_ops = { + .xlate = irq_domain_xlate_onecell, + .map = arcv2_irq_map, +}; + +static struct irq_domain *root_domain; + +static int __init +init_onchip_IRQ(struct device_node *intc, struct device_node *parent) +{ + if (parent) + panic("DeviceTree incore intc not a root irq controller\n"); + + root_domain = irq_domain_add_legacy(intc, NR_CPU_IRQS, 0, 0, + &arcv2_irq_ops, NULL); + + if (!root_domain) + panic("root irq domain not avail\n"); + + /* with this we don't need to export root_domain */ + irq_set_default_host(root_domain); + + return 0; +} + +IRQCHIP_DECLARE(arc_intc, "snps,archs-intc", init_onchip_IRQ); diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c new file mode 100644 index 000000000000..fcdddb631766 --- /dev/null +++ b/arch/arc/kernel/intc-compact.c @@ -0,0 +1,226 @@ +/* + * Copyright (C) 2011-12 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/irqdomain.h> +#include <linux/irqchip.h> +#include "../../drivers/irqchip/irqchip.h" +#include <asm/irq.h> + +/* + * Early Hardware specific Interrupt setup + * -Platform independent, needed for each CPU (not foldable into init_IRQ) + * -Called very early (start_kernel -> setup_arch -> setup_processor) + * + * what it does ? + * -Optionally, setup the High priority Interrupts as Level 2 IRQs + */ +void arc_init_IRQ(void) +{ + int level_mask = 0; + + /* setup any high priority Interrupts (Level2 in ARCompact jargon) */ + level_mask |= IS_ENABLED(CONFIG_ARC_IRQ3_LV2) << 3; + level_mask |= IS_ENABLED(CONFIG_ARC_IRQ5_LV2) << 5; + level_mask |= IS_ENABLED(CONFIG_ARC_IRQ6_LV2) << 6; + + /* + * Write to register, even if no LV2 IRQs configured to reset it + * in case bootloader had mucked with it + */ + write_aux_reg(AUX_IRQ_LEV, level_mask); + + if (level_mask) + pr_info("Level-2 interrupts bitset %x\n", level_mask); +} + +/* + * ARC700 core includes a simple on-chip intc supporting + * -per IRQ enable/disable + * -2 levels of interrupts (high/low) + * -all interrupts being level triggered + * + * To reduce platform code, we assume all IRQs directly hooked-up into intc. + * Platforms with external intc, hence cascaded IRQs, are free to over-ride + * below, per IRQ. + */ + +static void arc_irq_mask(struct irq_data *data) +{ + unsigned int ienb; + + ienb = read_aux_reg(AUX_IENABLE); + ienb &= ~(1 << data->irq); + write_aux_reg(AUX_IENABLE, ienb); +} + +static void arc_irq_unmask(struct irq_data *data) +{ + unsigned int ienb; + + ienb = read_aux_reg(AUX_IENABLE); + ienb |= (1 << data->irq); + write_aux_reg(AUX_IENABLE, ienb); +} + +static struct irq_chip onchip_intc = { + .name = "ARC In-core Intc", + .irq_mask = arc_irq_mask, + .irq_unmask = arc_irq_unmask, +}; + +static int arc_intc_domain_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hw) +{ + /* + * XXX: the IPI IRQ needs to be handled like TIMER too. However ARC core + * code doesn't own it (like TIMER0). ISS IDU / ezchip define it + * in platform header which can't be included here as it goes + * against multi-platform image philisophy + */ + if (irq == TIMER0_IRQ) + irq_set_chip_and_handler(irq, &onchip_intc, handle_percpu_irq); + else + irq_set_chip_and_handler(irq, &onchip_intc, handle_level_irq); + + return 0; +} + +static const struct irq_domain_ops arc_intc_domain_ops = { + .xlate = irq_domain_xlate_onecell, + .map = arc_intc_domain_map, +}; + +static struct irq_domain *root_domain; + +static int __init +init_onchip_IRQ(struct device_node *intc, struct device_node *parent) +{ + if (parent) + panic("DeviceTree incore intc not a root irq controller\n"); + + root_domain = irq_domain_add_legacy(intc, NR_CPU_IRQS, 0, 0, + &arc_intc_domain_ops, NULL); + + if (!root_domain) + panic("root irq domain not avail\n"); + + /* with this we don't need to export root_domain */ + irq_set_default_host(root_domain); + + return 0; +} + +IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ); + +/* + * arch_local_irq_enable - Enable interrupts. + * + * 1. Explicitly called to re-enable interrupts + * 2. Implicitly called from spin_unlock_irq, write_unlock_irq etc + * which maybe in hard ISR itself + * + * Semantics of this function change depending on where it is called from: + * + * -If called from hard-ISR, it must not invert interrupt priorities + * e.g. suppose TIMER is high priority (Level 2) IRQ + * Time hard-ISR, timer_interrupt( ) calls spin_unlock_irq several times. + * Here local_irq_enable( ) shd not re-enable lower priority interrupts + * -If called from soft-ISR, it must re-enable all interrupts + * soft ISR are low prioity jobs which can be very slow, thus all IRQs + * must be enabled while they run. + * Now hardware context wise we may still be in L2 ISR (not done rtie) + * still we must re-enable both L1 and L2 IRQs + * Another twist is prev scenario with flow being + * L1 ISR ==> interrupted by L2 ISR ==> L2 soft ISR + * here we must not re-enable Ll as prev Ll Interrupt's h/w context will get + * over-written (this is deficiency in ARC700 Interrupt mechanism) + */ + +#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS /* Complex version for 2 IRQ levels */ + +void arch_local_irq_enable(void) +{ + + unsigned long flags = arch_local_save_flags(); + + /* Allow both L1 and L2 at the onset */ + flags |= (STATUS_E1_MASK | STATUS_E2_MASK); + + /* Called from hard ISR (between irq_enter and irq_exit) */ + if (in_irq()) { + + /* If in L2 ISR, don't re-enable any further IRQs as this can + * cause IRQ priorities to get upside down. e.g. it could allow + * L1 be taken while in L2 hard ISR which is wrong not only in + * theory, it can also cause the dreaded L1-L2-L1 scenario + */ + if (flags & STATUS_A2_MASK) + flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK); + + /* Even if in L1 ISR, allowe Higher prio L2 IRQs */ + else if (flags & STATUS_A1_MASK) + flags &= ~(STATUS_E1_MASK); + } + + /* called from soft IRQ, ideally we want to re-enable all levels */ + + else if (in_softirq()) { + + /* However if this is case of L1 interrupted by L2, + * re-enabling both may cause whaco L1-L2-L1 scenario + * because ARC700 allows level 1 to interrupt an active L2 ISR + * Thus we disable both + * However some code, executing in soft ISR wants some IRQs + * to be enabled so we re-enable L2 only + * + * How do we determine L1 intr by L2 + * -A2 is set (means in L2 ISR) + * -E1 is set in this ISR's pt_regs->status32 which is + * saved copy of status32_l2 when l2 ISR happened + */ + struct pt_regs *pt = get_irq_regs(); + + if ((flags & STATUS_A2_MASK) && pt && + (pt->status32 & STATUS_A1_MASK)) { + /*flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK); */ + flags &= ~(STATUS_E1_MASK); + } + } + + arch_local_irq_restore(flags); +} + +#else /* ! CONFIG_ARC_COMPACT_IRQ_LEVELS */ + +/* + * Simpler version for only 1 level of interrupt + * Here we only Worry about Level 1 Bits + */ +void arch_local_irq_enable(void) +{ + unsigned long flags; + + /* + * ARC IDE Drivers tries to re-enable interrupts from hard-isr + * context which is simply wrong + */ + if (in_irq()) { + WARN_ONCE(1, "IRQ enabled from hard-isr"); + return; + } + + flags = arch_local_save_flags(); + flags |= (STATUS_E1_MASK | STATUS_E2_MASK); + arch_local_irq_restore(flags); +} +#endif +EXPORT_SYMBOL(arch_local_irq_enable); diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c index 620ec2fe32a9..2989a7bcf8a8 100644 --- a/arch/arc/kernel/irq.c +++ b/arch/arc/kernel/irq.c @@ -8,116 +8,10 @@ */ #include <linux/interrupt.h> -#include <linux/module.h> -#include <linux/of.h> -#include <linux/irqdomain.h> #include <linux/irqchip.h> -#include "../../drivers/irqchip/irqchip.h" -#include <asm/sections.h> -#include <asm/irq.h> #include <asm/mach_desc.h> /* - * Early Hardware specific Interrupt setup - * -Platform independent, needed for each CPU (not foldable into init_IRQ) - * -Called very early (start_kernel -> setup_arch -> setup_processor) - * - * what it does ? - * -Optionally, setup the High priority Interrupts as Level 2 IRQs - */ -void arc_init_IRQ(void) -{ - int level_mask = 0; - - /* setup any high priority Interrupts (Level2 in ARCompact jargon) */ - level_mask |= IS_ENABLED(CONFIG_ARC_IRQ3_LV2) << 3; - level_mask |= IS_ENABLED(CONFIG_ARC_IRQ5_LV2) << 5; - level_mask |= IS_ENABLED(CONFIG_ARC_IRQ6_LV2) << 6; - - /* - * Write to register, even if no LV2 IRQs configured to reset it - * in case bootloader had mucked with it - */ - write_aux_reg(AUX_IRQ_LEV, level_mask); - - if (level_mask) - pr_info("Level-2 interrupts bitset %x\n", level_mask); -} - -/* - * ARC700 core includes a simple on-chip intc supporting - * -per IRQ enable/disable - * -2 levels of interrupts (high/low) - * -all interrupts being level triggered - * - * To reduce platform code, we assume all IRQs directly hooked-up into intc. - * Platforms with external intc, hence cascaded IRQs, are free to over-ride - * below, per IRQ. - */ - -static void arc_irq_mask(struct irq_data *data) -{ - unsigned int ienb; - - ienb = read_aux_reg(AUX_IENABLE); - ienb &= ~(1 << data->irq); - write_aux_reg(AUX_IENABLE, ienb); -} - -static void arc_irq_unmask(struct irq_data *data) -{ - unsigned int ienb; - - ienb = read_aux_reg(AUX_IENABLE); - ienb |= (1 << data->irq); - write_aux_reg(AUX_IENABLE, ienb); -} - -static struct irq_chip onchip_intc = { - .name = "ARC In-core Intc", - .irq_mask = arc_irq_mask, - .irq_unmask = arc_irq_unmask, -}; - -static int arc_intc_domain_map(struct irq_domain *d, unsigned int irq, - irq_hw_number_t hw) -{ - if (irq == TIMER0_IRQ) - irq_set_chip_and_handler(irq, &onchip_intc, handle_percpu_irq); - else - irq_set_chip_and_handler(irq, &onchip_intc, handle_level_irq); - - return 0; -} - -static const struct irq_domain_ops arc_intc_domain_ops = { - .xlate = irq_domain_xlate_onecell, - .map = arc_intc_domain_map, -}; - -static struct irq_domain *root_domain; - -static int __init -init_onchip_IRQ(struct device_node *intc, struct device_node *parent) -{ - if (parent) - panic("DeviceTree incore intc not a root irq controller\n"); - - root_domain = irq_domain_add_legacy(intc, NR_CPU_IRQS, 0, 0, - &arc_intc_domain_ops, NULL); - - if (!root_domain) - panic("root irq domain not avail\n"); - - /* with this we don't need to export root_domain */ - irq_set_default_host(root_domain); - - return 0; -} - -IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ); - -/* * Late Interrupt system init called from start_kernel for Boot CPU only * * Since slab must already be initialized, platforms can start doing any @@ -178,107 +72,3 @@ void arc_request_percpu_irq(int irq, int cpu, enable_percpu_irq(irq, 0); } - -/* - * arch_local_irq_enable - Enable interrupts. - * - * 1. Explicitly called to re-enable interrupts - * 2. Implicitly called from spin_unlock_irq, write_unlock_irq etc - * which maybe in hard ISR itself - * - * Semantics of this function change depending on where it is called from: - * - * -If called from hard-ISR, it must not invert interrupt priorities - * e.g. suppose TIMER is high priority (Level 2) IRQ - * Time hard-ISR, timer_interrupt( ) calls spin_unlock_irq several times. - * Here local_irq_enable( ) shd not re-enable lower priority interrupts - * -If called from soft-ISR, it must re-enable all interrupts - * soft ISR are low prioity jobs which can be very slow, thus all IRQs - * must be enabled while they run. - * Now hardware context wise we may still be in L2 ISR (not done rtie) - * still we must re-enable both L1 and L2 IRQs - * Another twist is prev scenario with flow being - * L1 ISR ==> interrupted by L2 ISR ==> L2 soft ISR - * here we must not re-enable Ll as prev Ll Interrupt's h/w context will get - * over-written (this is deficiency in ARC700 Interrupt mechanism) - */ - -#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS /* Complex version for 2 IRQ levels */ - -void arch_local_irq_enable(void) -{ - - unsigned long flags; - flags = arch_local_save_flags(); - - /* Allow both L1 and L2 at the onset */ - flags |= (STATUS_E1_MASK | STATUS_E2_MASK); - - /* Called from hard ISR (between irq_enter and irq_exit) */ - if (in_irq()) { - - /* If in L2 ISR, don't re-enable any further IRQs as this can - * cause IRQ priorities to get upside down. e.g. it could allow - * L1 be taken while in L2 hard ISR which is wrong not only in - * theory, it can also cause the dreaded L1-L2-L1 scenario - */ - if (flags & STATUS_A2_MASK) - flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK); - - /* Even if in L1 ISR, allowe Higher prio L2 IRQs */ - else if (flags & STATUS_A1_MASK) - flags &= ~(STATUS_E1_MASK); - } - - /* called from soft IRQ, ideally we want to re-enable all levels */ - - else if (in_softirq()) { - - /* However if this is case of L1 interrupted by L2, - * re-enabling both may cause whaco L1-L2-L1 scenario - * because ARC700 allows level 1 to interrupt an active L2 ISR - * Thus we disable both - * However some code, executing in soft ISR wants some IRQs - * to be enabled so we re-enable L2 only - * - * How do we determine L1 intr by L2 - * -A2 is set (means in L2 ISR) - * -E1 is set in this ISR's pt_regs->status32 which is - * saved copy of status32_l2 when l2 ISR happened - */ - struct pt_regs *pt = get_irq_regs(); - if ((flags & STATUS_A2_MASK) && pt && - (pt->status32 & STATUS_A1_MASK)) { - /*flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK); */ - flags &= ~(STATUS_E1_MASK); - } - } - - arch_local_irq_restore(flags); -} - -#else /* ! CONFIG_ARC_COMPACT_IRQ_LEVELS */ - -/* - * Simpler version for only 1 level of interrupt - * Here we only Worry about Level 1 Bits - */ -void arch_local_irq_enable(void) -{ - unsigned long flags; - - /* - * ARC IDE Drivers tries to re-enable interrupts from hard-isr - * context which is simply wrong - */ - if (in_irq()) { - WARN_ONCE(1, "IRQ enabled from hard-isr"); - return; - } - - flags = arch_local_save_flags(); - flags |= (STATUS_E1_MASK | STATUS_E2_MASK); - arch_local_irq_restore(flags); -} -#endif -EXPORT_SYMBOL(arch_local_irq_enable); diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c new file mode 100644 index 000000000000..30284e8de6ff --- /dev/null +++ b/arch/arc/kernel/mcip.c @@ -0,0 +1,341 @@ +/* + * ARC ARConnect (MultiCore IP) support (formerly known as MCIP) + * + * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/smp.h> +#include <linux/irq.h> +#include <linux/spinlock.h> +#include <asm/mcip.h> + +static char smp_cpuinfo_buf[128]; +static int idu_detected; + +static DEFINE_RAW_SPINLOCK(mcip_lock); + +/* + * Any SMP specific init any CPU does when it comes up. + * Here we setup the CPU to enable Inter-Processor-Interrupts + * Called for each CPU + * -Master : init_IRQ() + * -Other(s) : start_kernel_secondary() + */ +void mcip_init_smp(unsigned int cpu) +{ + smp_ipi_irq_setup(cpu, IPI_IRQ); +} + +static void mcip_ipi_send(int cpu) +{ + unsigned long flags; + int ipi_was_pending; + + /* + * NOTE: We must spin here if the other cpu hasn't yet + * serviced a previous message. This can burn lots + * of time, but we MUST follows this protocol or + * ipi messages can be lost!!! + * Also, we must release the lock in this loop because + * the other side may get to this same loop and not + * be able to ack -- thus causing deadlock. + */ + + do { + raw_spin_lock_irqsave(&mcip_lock, flags); + __mcip_cmd(CMD_INTRPT_READ_STATUS, cpu); + ipi_was_pending = read_aux_reg(ARC_REG_MCIP_READBACK); + if (ipi_was_pending == 0) + break; /* break out but keep lock */ + raw_spin_unlock_irqrestore(&mcip_lock, flags); + } while (1); + + __mcip_cmd(CMD_INTRPT_GENERATE_IRQ, cpu); + raw_spin_unlock_irqrestore(&mcip_lock, flags); + +#ifdef CONFIG_ARC_IPI_DBG + if (ipi_was_pending) + pr_info("IPI ACK delayed from cpu %d\n", cpu); +#endif +} + +static void mcip_ipi_clear(int irq) +{ + unsigned int cpu, c; + unsigned long flags; + unsigned int __maybe_unused copy; + + raw_spin_lock_irqsave(&mcip_lock, flags); + + /* Who sent the IPI */ + __mcip_cmd(CMD_INTRPT_CHECK_SOURCE, 0); + + copy = cpu = read_aux_reg(ARC_REG_MCIP_READBACK); /* 1,2,4,8... */ + + /* + * In rare case, multiple concurrent IPIs sent to same target can + * possibly be coalesced by MCIP into 1 asserted IRQ, so @cpus can be + * "vectored" (multiple bits sets) as opposed to typical single bit + */ + do { + c = __ffs(cpu); /* 0,1,2,3 */ + __mcip_cmd(CMD_INTRPT_GENERATE_ACK, c); + cpu &= ~(1U << c); + } while (cpu); + + raw_spin_unlock_irqrestore(&mcip_lock, flags); + +#ifdef CONFIG_ARC_IPI_DBG + if (c != __ffs(copy)) + pr_info("IPIs from %x coalesced to %x\n", + copy, raw_smp_processor_id()); +#endif +} + +volatile int wake_flag; + +static void mcip_wakeup_cpu(int cpu, unsigned long pc) +{ + BUG_ON(cpu == 0); + wake_flag = cpu; +} + +void arc_platform_smp_wait_to_boot(int cpu) +{ + while (wake_flag != cpu) + ; + + wake_flag = 0; + __asm__ __volatile__("j @first_lines_of_secondary \n"); +} + +struct plat_smp_ops plat_smp_ops = { + .info = smp_cpuinfo_buf, + .cpu_kick = mcip_wakeup_cpu, + .ipi_send = mcip_ipi_send, + .ipi_clear = mcip_ipi_clear, +}; + +void mcip_init_early_smp(void) +{ +#define IS_AVAIL1(var, str) ((var) ? str : "") + + struct mcip_bcr { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad3:8, + idu:1, llm:1, num_cores:6, + iocoh:1, grtc:1, dbg:1, pad2:1, + msg:1, sem:1, ipi:1, pad:1, + ver:8; +#else + unsigned int ver:8, + pad:1, ipi:1, sem:1, msg:1, + pad2:1, dbg:1, grtc:1, iocoh:1, + num_cores:6, llm:1, idu:1, + pad3:8; +#endif + } mp; + + READ_BCR(ARC_REG_MCIP_BCR, mp); + + sprintf(smp_cpuinfo_buf, + "Extn [SMP]\t: ARConnect (v%d): %d cores with %s%s%s%s\n", + mp.ver, mp.num_cores, + IS_AVAIL1(mp.ipi, "IPI "), + IS_AVAIL1(mp.idu, "IDU "), + IS_AVAIL1(mp.dbg, "DEBUG "), + IS_AVAIL1(mp.grtc, "GRTC")); + + idu_detected = mp.idu; + + if (mp.dbg) { + __mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, 0xf); + __mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xf, 0xf); + } + + if (IS_ENABLED(CONFIG_ARC_HAS_GRTC) && !mp.grtc) + panic("kernel trying to use non-existent GRTC\n"); +} + +/*************************************************************************** + * ARCv2 Interrupt Distribution Unit (IDU) + * + * Connects external "COMMON" IRQs to core intc, providing: + * -dynamic routing (IRQ affinity) + * -load balancing (Round Robin interrupt distribution) + * -1:N distribution + * + * It physically resides in the MCIP hw block + */ + +#include <linux/irqchip.h> +#include <linux/of.h> +#include <linux/of_irq.h> +#include "../../drivers/irqchip/irqchip.h" + +/* + * Set the DEST for @cmn_irq to @cpu_mask (1 bit per core) + */ +static void idu_set_dest(unsigned int cmn_irq, unsigned int cpu_mask) +{ + __mcip_cmd_data(CMD_IDU_SET_DEST, cmn_irq, cpu_mask); +} + +static void idu_set_mode(unsigned int cmn_irq, unsigned int lvl, + unsigned int distr) +{ + union { + unsigned int word; + struct { + unsigned int distr:2, pad:2, lvl:1, pad2:27; + }; + } data; + + data.distr = distr; + data.lvl = lvl; + __mcip_cmd_data(CMD_IDU_SET_MODE, cmn_irq, data.word); +} + +static void idu_irq_mask(struct irq_data *data) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&mcip_lock, flags); + __mcip_cmd_data(CMD_IDU_SET_MASK, data->hwirq, 1); + raw_spin_unlock_irqrestore(&mcip_lock, flags); +} + +static void idu_irq_unmask(struct irq_data *data) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&mcip_lock, flags); + __mcip_cmd_data(CMD_IDU_SET_MASK, data->hwirq, 0); + raw_spin_unlock_irqrestore(&mcip_lock, flags); +} + +static int +idu_irq_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool f) +{ + return IRQ_SET_MASK_OK; +} + +static struct irq_chip idu_irq_chip = { + .name = "MCIP IDU Intc", + .irq_mask = idu_irq_mask, + .irq_unmask = idu_irq_unmask, +#ifdef CONFIG_SMP + .irq_set_affinity = idu_irq_set_affinity, +#endif + +}; + +static int idu_first_irq; + +static void idu_cascade_isr(unsigned int core_irq, struct irq_desc *desc) +{ + struct irq_domain *domain = irq_desc_get_handler_data(desc); + unsigned int idu_irq; + + idu_irq = core_irq - idu_first_irq; + generic_handle_irq(irq_find_mapping(domain, idu_irq)); +} + +static int idu_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hwirq) +{ + irq_set_chip_and_handler(virq, &idu_irq_chip, handle_level_irq); + irq_set_status_flags(virq, IRQ_MOVE_PCNTXT); + + return 0; +} + +static int idu_irq_xlate(struct irq_domain *d, struct device_node *n, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type) +{ + irq_hw_number_t hwirq = *out_hwirq = intspec[0]; + int distri = intspec[1]; + unsigned long flags; + + *out_type = IRQ_TYPE_NONE; + + /* XXX: validate distribution scheme again online cpu mask */ + if (distri == 0) { + /* 0 - Round Robin to all cpus, otherwise 1 bit per core */ + raw_spin_lock_irqsave(&mcip_lock, flags); + idu_set_dest(hwirq, BIT(num_online_cpus()) - 1); + idu_set_mode(hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_RR); + raw_spin_unlock_irqrestore(&mcip_lock, flags); + } else { + /* + * DEST based distribution for Level Triggered intr can only + * have 1 CPU, so generalize it to always contain 1 cpu + */ + int cpu = ffs(distri); + + if (cpu != fls(distri)) + pr_warn("IDU irq %lx distri mode set to cpu %x\n", + hwirq, cpu); + + raw_spin_lock_irqsave(&mcip_lock, flags); + idu_set_dest(hwirq, cpu); + idu_set_mode(hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_DEST); + raw_spin_unlock_irqrestore(&mcip_lock, flags); + } + + return 0; +} + +static const struct irq_domain_ops idu_irq_ops = { + .xlate = idu_irq_xlate, + .map = idu_irq_map, +}; + +/* + * [16, 23]: Statically assigned always private-per-core (Timers, WDT, IPI) + * [24, 23+C]: If C > 0 then "C" common IRQs + * [24+C, N]: Not statically assigned, private-per-core + */ + + +static int __init +idu_of_init(struct device_node *intc, struct device_node *parent) +{ + struct irq_domain *domain; + /* Read IDU BCR to confirm nr_irqs */ + int nr_irqs = of_irq_count(intc); + int i, irq; + + if (!idu_detected) + panic("IDU not detected, but DeviceTree using it"); + + pr_info("MCIP: IDU referenced from Devicetree %d irqs\n", nr_irqs); + + domain = irq_domain_add_linear(intc, nr_irqs, &idu_irq_ops, NULL); + + /* Parent interrupts (core-intc) are already mapped */ + + for (i = 0; i < nr_irqs; i++) { + /* + * Return parent uplink IRQs (towards core intc) 24,25,..... + * this step has been done before already + * however we need it to get the parent virq and set IDU handler + * as first level isr + */ + irq = irq_of_parse_and_map(intc, i); + if (!i) + idu_first_irq = irq; + + irq_set_handler_data(irq, domain); + irq_set_chained_handler(irq, idu_cascade_isr); + } + + __mcip_cmd(CMD_IDU_ENABLE, 0); + + return 0; +} +IRQCHIP_DECLARE(arcv2_idu_intc, "snps,archs-idu-intc", idu_of_init); diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index fd2ec50102f2..1287388c258a 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -266,10 +266,9 @@ static int arc_pmu_add(struct perf_event *event, int flags) static int arc_pmu_device_probe(struct platform_device *pdev) { - struct arc_pmu *arc_pmu; struct arc_reg_pct_build pct_bcr; struct arc_reg_cc_build cc_bcr; - int i, j, ret; + int i, j; union cc_name { struct { @@ -336,9 +335,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev) /* ARC 700 PMU does not support sampling events */ arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; - ret = perf_pmu_register(&arc_pmu->pmu, pdev->name, PERF_TYPE_RAW); - - return ret; + return perf_pmu_register(&arc_pmu->pmu, pdev->name, PERF_TYPE_RAW); } #ifdef CONFIG_OF diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index e095c557afdd..44092456776f 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -44,7 +44,11 @@ SYSCALL_DEFINE0(arc_gettls) void arch_cpu_idle(void) { /* sleep, but enable all interrupts before committing */ - __asm__("sleep 0x3"); + if (is_isa_arcompact()) { + __asm__("sleep 0x3"); + } else { + __asm__("sleep 0x10"); + } } asmlinkage void ret_from_fork(void); @@ -166,8 +170,7 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp) * [L] ZOL loop inhibited to begin with - cleared by a LP insn * Interrupts enabled */ - regs->status32 = STATUS_U_MASK | STATUS_L_MASK | - STATUS_E1_MASK | STATUS_E2_MASK; + regs->status32 = STATUS_U_MASK | STATUS_L_MASK | ISA_INIT_STATUS_BITS; /* bogus seed values for debugging */ regs->lp_start = 0x10; @@ -197,8 +200,11 @@ int elf_check_arch(const struct elf32_hdr *x) { unsigned int eflags; - if (x->e_machine != EM_ARCOMPACT) + if (x->e_machine != EM_ARC_INUSE) { + pr_err("ELF not built for %s ISA\n", + is_isa_arcompact() ? "ARCompact":"ARCv2"); return 0; + } eflags = x->e_flags; if ((eflags & EF_ARC_OSABI_MSK) < EF_ARC_OSABI_CURRENT) { diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c index 13b3ffb27a38..4442204fe238 100644 --- a/arch/arc/kernel/ptrace.c +++ b/arch/arc/kernel/ptrace.c @@ -47,10 +47,47 @@ static int genregs_get(struct task_struct *target, offsetof(struct user_regs_struct, LOC) + 4); REG_O_ZERO(pad); - REG_O_CHUNK(scratch, callee, ptregs); + REG_O_ONE(scratch.bta, &ptregs->bta); + REG_O_ONE(scratch.lp_start, &ptregs->lp_start); + REG_O_ONE(scratch.lp_end, &ptregs->lp_end); + REG_O_ONE(scratch.lp_count, &ptregs->lp_count); + REG_O_ONE(scratch.status32, &ptregs->status32); + REG_O_ONE(scratch.ret, &ptregs->ret); + REG_O_ONE(scratch.blink, &ptregs->blink); + REG_O_ONE(scratch.fp, &ptregs->fp); + REG_O_ONE(scratch.gp, &ptregs->r26); + REG_O_ONE(scratch.r12, &ptregs->r12); + REG_O_ONE(scratch.r11, &ptregs->r11); + REG_O_ONE(scratch.r10, &ptregs->r10); + REG_O_ONE(scratch.r9, &ptregs->r9); + REG_O_ONE(scratch.r8, &ptregs->r8); + REG_O_ONE(scratch.r7, &ptregs->r7); + REG_O_ONE(scratch.r6, &ptregs->r6); + REG_O_ONE(scratch.r5, &ptregs->r5); + REG_O_ONE(scratch.r4, &ptregs->r4); + REG_O_ONE(scratch.r3, &ptregs->r3); + REG_O_ONE(scratch.r2, &ptregs->r2); + REG_O_ONE(scratch.r1, &ptregs->r1); + REG_O_ONE(scratch.r0, &ptregs->r0); + REG_O_ONE(scratch.sp, &ptregs->sp); + REG_O_ZERO(pad2); - REG_O_CHUNK(callee, efa, cregs); - REG_O_CHUNK(efa, stop_pc, &target->thread.fault_address); + + REG_O_ONE(callee.r25, &cregs->r25); + REG_O_ONE(callee.r24, &cregs->r24); + REG_O_ONE(callee.r23, &cregs->r23); + REG_O_ONE(callee.r22, &cregs->r22); + REG_O_ONE(callee.r21, &cregs->r21); + REG_O_ONE(callee.r20, &cregs->r20); + REG_O_ONE(callee.r19, &cregs->r19); + REG_O_ONE(callee.r18, &cregs->r18); + REG_O_ONE(callee.r17, &cregs->r17); + REG_O_ONE(callee.r16, &cregs->r16); + REG_O_ONE(callee.r15, &cregs->r15); + REG_O_ONE(callee.r14, &cregs->r14); + REG_O_ONE(callee.r13, &cregs->r13); + + REG_O_ONE(efa, &target->thread.fault_address); if (!ret) { if (in_brkpt_trap(ptregs)) { @@ -97,12 +134,51 @@ static int genregs_set(struct task_struct *target, offsetof(struct user_regs_struct, LOC) + 4); REG_IGNORE_ONE(pad); - /* TBD: disallow updates to STATUS32 etc*/ - REG_IN_CHUNK(scratch, pad2, ptregs); /* pt_regs[bta..sp] */ + + REG_IN_ONE(scratch.bta, &ptregs->bta); + REG_IN_ONE(scratch.lp_start, &ptregs->lp_start); + REG_IN_ONE(scratch.lp_end, &ptregs->lp_end); + REG_IN_ONE(scratch.lp_count, &ptregs->lp_count); + + REG_IGNORE_ONE(scratch.status32); + + REG_IN_ONE(scratch.ret, &ptregs->ret); + REG_IN_ONE(scratch.blink, &ptregs->blink); + REG_IN_ONE(scratch.fp, &ptregs->fp); + REG_IN_ONE(scratch.gp, &ptregs->r26); + REG_IN_ONE(scratch.r12, &ptregs->r12); + REG_IN_ONE(scratch.r11, &ptregs->r11); + REG_IN_ONE(scratch.r10, &ptregs->r10); + REG_IN_ONE(scratch.r9, &ptregs->r9); + REG_IN_ONE(scratch.r8, &ptregs->r8); + REG_IN_ONE(scratch.r7, &ptregs->r7); + REG_IN_ONE(scratch.r6, &ptregs->r6); + REG_IN_ONE(scratch.r5, &ptregs->r5); + REG_IN_ONE(scratch.r4, &ptregs->r4); + REG_IN_ONE(scratch.r3, &ptregs->r3); + REG_IN_ONE(scratch.r2, &ptregs->r2); + REG_IN_ONE(scratch.r1, &ptregs->r1); + REG_IN_ONE(scratch.r0, &ptregs->r0); + REG_IN_ONE(scratch.sp, &ptregs->sp); + REG_IGNORE_ONE(pad2); - REG_IN_CHUNK(callee, efa, cregs); /* callee_regs[r25..r13] */ + + REG_IN_ONE(callee.r25, &cregs->r25); + REG_IN_ONE(callee.r24, &cregs->r24); + REG_IN_ONE(callee.r23, &cregs->r23); + REG_IN_ONE(callee.r22, &cregs->r22); + REG_IN_ONE(callee.r21, &cregs->r21); + REG_IN_ONE(callee.r20, &cregs->r20); + REG_IN_ONE(callee.r19, &cregs->r19); + REG_IN_ONE(callee.r18, &cregs->r18); + REG_IN_ONE(callee.r17, &cregs->r17); + REG_IN_ONE(callee.r16, &cregs->r16); + REG_IN_ONE(callee.r15, &cregs->r15); + REG_IN_ONE(callee.r14, &cregs->r14); + REG_IN_ONE(callee.r13, &cregs->r13); + REG_IGNORE_ONE(efa); /* efa update invalid */ - REG_IGNORE_ONE(stop_pc); /* PC updated via @ret */ + REG_IGNORE_ONE(stop_pc); /* PC updated via @ret */ return ret; } @@ -124,7 +200,7 @@ static const struct user_regset arc_regsets[] = { static const struct user_regset_view user_arc_view = { .name = UTS_MACHINE, - .e_machine = EM_ARCOMPACT, + .e_machine = EM_ARC_INUSE, .regsets = arc_regsets, .n = ARRAY_SIZE(arc_regsets) }; diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 1d167c6df8ca..a3d186211ed3 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -30,6 +30,8 @@ #define FIX_PTR(x) __asm__ __volatile__(";" : "+r"(x)) +unsigned int intr_to_DE_cnt; + /* Part of U-boot ABI: see head.S */ int __initdata uboot_tag; char __initdata *uboot_arg; @@ -54,7 +56,7 @@ static void read_arc_build_cfg_regs(void) cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE); READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space); - cpu->uncached_base = uncached_space.start << 24; + BUG_ON((uncached_space.start << 24) != ARC_UNCACHED_ADDR_SPACE); READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy); @@ -96,7 +98,7 @@ static void read_arc_build_cfg_regs(void) read_decode_mmu_bcr(); read_decode_cache_bcr(); - { + if (is_isa_arcompact()) { struct bcr_fp_arcompact sp, dp; struct bcr_bpu_arcompact bpu; @@ -112,6 +114,19 @@ static void read_arc_build_cfg_regs(void) cpu->bpu.num_cache = 256 << (bpu.ent - 1); cpu->bpu.num_pred = 256 << (bpu.ent - 1); } + } else { + struct bcr_fp_arcv2 spdp; + struct bcr_bpu_arcv2 bpu; + + READ_BCR(ARC_REG_FP_V2_BCR, spdp); + cpu->extn.fpu_sp = spdp.sp ? 1 : 0; + cpu->extn.fpu_dp = spdp.dp ? 1 : 0; + + READ_BCR(ARC_REG_BPU_BCR, bpu); + cpu->bpu.ver = bpu.ver; + cpu->bpu.full = bpu.ft; + cpu->bpu.num_cache = 256 << bpu.bce; + cpu->bpu.num_pred = 2048 << bpu.pte; } READ_BCR(ARC_REG_AP_BCR, bcr); @@ -131,6 +146,7 @@ static const struct cpuinfo_data arc_cpu_tbl[] = { { {0x30, "ARC 700" }, 0x33}, { {0x34, "ARC 700 R4.10"}, 0x34}, { {0x35, "ARC 700 R4.11"}, 0x35}, + { {0x50, "ARC HS38" }, 0x51}, { {0x00, NULL } } }; @@ -149,13 +165,17 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) FIX_PTR(cpu); - { + if (is_isa_arcompact()) { isa_nm = "ARCompact"; be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); atomic = cpu->isa.atomic1; if (!cpu->isa.ver) /* ISA BCR absent, use Kconfig info */ atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC); + } else { + isa_nm = "ARCv2"; + be = cpu->isa.be; + atomic = cpu->isa.atomic; } n += scnprintf(buf + n, len - n, @@ -183,16 +203,34 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s\nISA Extn\t: ", IS_AVAIL1(cpu->timers.t0, "Timer0 "), IS_AVAIL1(cpu->timers.t1, "Timer1 "), - IS_AVAIL2(cpu->timers.rtsc, "64-bit RTSC ", CONFIG_ARC_HAS_RTSC)); + IS_AVAIL2(cpu->timers.rtc, "64-bit RTC ", + CONFIG_ARC_HAS_RTC)); - n += i = scnprintf(buf + n, len - n, "%s%s", - IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC)); + n += i = scnprintf(buf + n, len - n, "%s%s%s%s%s", + IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC), + IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64), + IS_AVAIL1(cpu->isa.unalign, "unalign (not used)")); if (i) n += scnprintf(buf + n, len - n, "\n\t\t: "); + if (cpu->extn_mpy.ver) { + if (cpu->extn_mpy.ver <= 0x2) { /* ARCompact */ + n += scnprintf(buf + n, len - n, "mpy "); + } else { + int opt = 2; /* stock MPY/MPYH */ + + if (cpu->extn_mpy.dsp) /* OPT 7-9 */ + opt = cpu->extn_mpy.dsp + 6; + + n += scnprintf(buf + n, len - n, "mpy[opt %d] ", opt); + } + n += scnprintf(buf + n, len - n, "%s", + IS_USED(CONFIG_ARC_HAS_HW_MPY)); + } + n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n", - IS_AVAIL1(cpu->extn_mpy.ver, "mpy "), + IS_AVAIL1(cpu->isa.div_rem, "div_rem "), IS_AVAIL1(cpu->extn.norm, "norm "), IS_AVAIL1(cpu->extn.barrel, "barrel-shift "), IS_AVAIL1(cpu->extn.swap, "swap "), @@ -219,7 +257,7 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) n += scnprintf(buf + n, len - n, "Vector Table\t: %#x\nUncached Base\t: %#x\n", - cpu->vec_base, cpu->uncached_base); + cpu->vec_base, ARC_UNCACHED_ADDR_SPACE); if (cpu->extn.fpu_sp || cpu->extn.fpu_dp) n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n", @@ -254,8 +292,8 @@ static void arc_chk_core_config(void) if (!cpu->timers.t1) panic("Timer1 is not present!\n"); - if (IS_ENABLED(CONFIG_ARC_HAS_RTSC) && !cpu->timers.rtsc) - panic("RTSC is not present\n"); + if (IS_ENABLED(CONFIG_ARC_HAS_RTC) && !cpu->timers.rtc) + panic("RTC is not present\n"); #ifdef CONFIG_ARC_HAS_DCCM /* @@ -323,13 +361,16 @@ static inline int is_kernel(unsigned long addr) void __init setup_arch(char **cmdline_p) { +#ifdef CONFIG_ARC_UBOOT_SUPPORT /* make sure that uboot passed pointer to cmdline/dtb is valid */ if (uboot_tag && is_kernel((unsigned long)uboot_arg)) panic("Invalid uboot arg\n"); /* See if u-boot passed an external Device Tree blob */ machine_desc = setup_machine_fdt(uboot_arg); /* uboot_tag == 2 */ - if (!machine_desc) { + if (!machine_desc) +#endif + { /* No, so try the embedded one */ machine_desc = setup_machine_fdt(__dtb_start); if (!machine_desc) diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index 2251fb4bbfd7..004b7f0bc76c 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -67,7 +67,33 @@ stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs, sigset_t *set) { int err; - err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), regs, + struct user_regs_struct uregs; + + uregs.scratch.bta = regs->bta; + uregs.scratch.lp_start = regs->lp_start; + uregs.scratch.lp_end = regs->lp_end; + uregs.scratch.lp_count = regs->lp_count; + uregs.scratch.status32 = regs->status32; + uregs.scratch.ret = regs->ret; + uregs.scratch.blink = regs->blink; + uregs.scratch.fp = regs->fp; + uregs.scratch.gp = regs->r26; + uregs.scratch.r12 = regs->r12; + uregs.scratch.r11 = regs->r11; + uregs.scratch.r10 = regs->r10; + uregs.scratch.r9 = regs->r9; + uregs.scratch.r8 = regs->r8; + uregs.scratch.r7 = regs->r7; + uregs.scratch.r6 = regs->r6; + uregs.scratch.r5 = regs->r5; + uregs.scratch.r4 = regs->r4; + uregs.scratch.r3 = regs->r3; + uregs.scratch.r2 = regs->r2; + uregs.scratch.r1 = regs->r1; + uregs.scratch.r0 = regs->r0; + uregs.scratch.sp = regs->sp; + + err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), &uregs.scratch, sizeof(sf->uc.uc_mcontext.regs.scratch)); err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t)); @@ -78,14 +104,40 @@ static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf) { sigset_t set; int err; + struct user_regs_struct uregs; err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set)); if (!err) set_current_blocked(&set); - err |= __copy_from_user(regs, &(sf->uc.uc_mcontext.regs.scratch), + err |= __copy_from_user(&uregs.scratch, + &(sf->uc.uc_mcontext.regs.scratch), sizeof(sf->uc.uc_mcontext.regs.scratch)); + regs->bta = uregs.scratch.bta; + regs->lp_start = uregs.scratch.lp_start; + regs->lp_end = uregs.scratch.lp_end; + regs->lp_count = uregs.scratch.lp_count; + regs->status32 = uregs.scratch.status32; + regs->ret = uregs.scratch.ret; + regs->blink = uregs.scratch.blink; + regs->fp = uregs.scratch.fp; + regs->r26 = uregs.scratch.gp; + regs->r12 = uregs.scratch.r12; + regs->r11 = uregs.scratch.r11; + regs->r10 = uregs.scratch.r10; + regs->r9 = uregs.scratch.r9; + regs->r8 = uregs.scratch.r8; + regs->r7 = uregs.scratch.r7; + regs->r6 = uregs.scratch.r6; + regs->r5 = uregs.scratch.r5; + regs->r4 = uregs.scratch.r4; + regs->r3 = uregs.scratch.r3; + regs->r2 = uregs.scratch.r2; + regs->r1 = uregs.scratch.r1; + regs->r0 = uregs.scratch.r0; + regs->sp = uregs.scratch.sp; + return err; } @@ -284,7 +336,7 @@ static void arc_restart_syscall(struct k_sigaction *ka, struct pt_regs *regs) * their orig user space value when we ret from kernel */ regs->r0 = regs->orig_r0; - regs->ret -= 4; + regs->ret -= is_isa_arcv2() ? 2 : 4; break; } } @@ -325,10 +377,10 @@ void do_signal(struct pt_regs *regs) if (regs->r0 == -ERESTARTNOHAND || regs->r0 == -ERESTARTSYS || regs->r0 == -ERESTARTNOINTR) { regs->r0 = regs->orig_r0; - regs->ret -= 4; + regs->ret -= is_isa_arcv2() ? 2 : 4; } else if (regs->r0 == -ERESTART_RESTARTBLOCK) { regs->r8 = __NR_restart_syscall; - regs->ret -= 4; + regs->ret -= is_isa_arcv2() ? 2 : 4; } syscall_wont_restart(regs); /* No more restarts */ } diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index 6a400b1b0b62..be13d12420ba 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -31,7 +31,7 @@ arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED; arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED; #endif -struct plat_smp_ops plat_smp_ops; +struct plat_smp_ops __weak plat_smp_ops; /* XXX: per cpu ? Only needed once in early seconday boot */ struct task_struct *secondary_idle_tsk; @@ -182,7 +182,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) /* * not supported here */ -int __init setup_profiling_timer(unsigned int multiplier) +int setup_profiling_timer(unsigned int multiplier) { return -EINVAL; } @@ -278,8 +278,10 @@ static void ipi_cpu_stop(void) machine_halt(); } -static inline void __do_IPI(unsigned long msg) +static inline int __do_IPI(unsigned long msg) { + int rc = 0; + switch (msg) { case IPI_RESCHEDULE: scheduler_ipi(); @@ -294,8 +296,10 @@ static inline void __do_IPI(unsigned long msg) break; default: - pr_warn("IPI with unexpected msg %ld\n", msg); + rc = 1; } + + return rc; } /* @@ -305,6 +309,7 @@ static inline void __do_IPI(unsigned long msg) irqreturn_t do_IPI(int irq, void *dev_id) { unsigned long pending; + unsigned long __maybe_unused copy; pr_debug("IPI [%ld] received on cpu %d\n", *this_cpu_ptr(&ipi_data), smp_processor_id()); @@ -316,11 +321,18 @@ irqreturn_t do_IPI(int irq, void *dev_id) * "dequeue" the msg corresponding to this IPI (and possibly other * piggybacked msg from elided IPIs: see ipi_send_msg_one() above) */ - pending = xchg(this_cpu_ptr(&ipi_data), 0); + copy = pending = xchg(this_cpu_ptr(&ipi_data), 0); do { unsigned long msg = __ffs(pending); - __do_IPI(msg); + int rc; + + rc = __do_IPI(msg); +#ifdef CONFIG_ARC_IPI_DBG + /* IPI received but no valid @msg */ + if (rc) + pr_info("IPI with bogus msg %ld in %ld\n", msg, copy); +#endif pending &= ~(1U << msg); } while (pending); diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index 92320d6f737c..001de4ce711e 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -122,19 +122,17 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, while (1) { address = UNW_PC(&frame_info); - if (address && __kernel_text_address(address)) { - if (consumer_fn(address, arg) == -1) - break; - } + if (!address || !__kernel_text_address(address)) + break; - ret = arc_unwind(&frame_info); + if (consumer_fn(address, arg) == -1) + break; - if (ret == 0) { - frame_info.regs.r63 = frame_info.regs.r31; - continue; - } else { + ret = arc_unwind(&frame_info); + if (ret) break; - } + + frame_info.regs.r63 = frame_info.regs.r31; } return address; /* return the last address it saw */ diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c index dbe74f418019..3364d2bbc515 100644 --- a/arch/arc/kernel/time.c +++ b/arch/arc/kernel/time.c @@ -26,6 +26,7 @@ * while TIMER1 for free running (clocksource) * * Newer ARC700 cores have 64bit clk fetching RTSC insn, preferred over TIMER1 + * which however is currently broken */ #include <linux/spinlock.h> @@ -44,6 +45,8 @@ #include <asm/clk.h> #include <asm/mach_desc.h> +#include <asm/mcip.h> + /* Timer related Aux registers */ #define ARC_REG_TIMER0_LIMIT 0x23 /* timer 0 limit */ #define ARC_REG_TIMER0_CTRL 0x22 /* timer 0 control */ @@ -59,14 +62,10 @@ /********** Clock Source Device *********/ -#ifdef CONFIG_ARC_HAS_RTSC +#ifdef CONFIG_ARC_HAS_GRTC -int arc_counter_setup(void) +static int arc_counter_setup(void) { - /* - * For SMP this needs to be 0. However Kconfig glue doesn't - * enable this option for SMP configs - */ return 1; } @@ -75,45 +74,84 @@ static cycle_t arc_counter_read(struct clocksource *cs) unsigned long flags; union { #ifdef CONFIG_CPU_BIG_ENDIAN - struct { u32 high, low; }; + struct { u32 h, l; }; #else - struct { u32 low, high; }; + struct { u32 l, h; }; #endif cycle_t full; } stamp; - flags = arch_local_irq_save(); + local_irq_save(flags); - __asm__ __volatile( - " .extCoreRegister tsch, 58, r, cannot_shortcut \n" - " rtsc %0, 0 \n" - " mov %1, 0 \n" - : "=r" (stamp.low), "=r" (stamp.high)); + __mcip_cmd(CMD_GRTC_READ_LO, 0); + stamp.l = read_aux_reg(ARC_REG_MCIP_READBACK); + + __mcip_cmd(CMD_GRTC_READ_HI, 0); + stamp.h = read_aux_reg(ARC_REG_MCIP_READBACK); - arch_local_irq_restore(flags); + local_irq_restore(flags); return stamp.full; } static struct clocksource arc_counter = { - .name = "ARC RTSC", - .rating = 300, + .name = "ARConnect GRTC", + .rating = 400, .read = arc_counter_read, - .mask = CLOCKSOURCE_MASK(32), + .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -#else /* !CONFIG_ARC_HAS_RTSC */ +#else + +#ifdef CONFIG_ARC_HAS_RTC + +#define AUX_RTC_CTRL 0x103 +#define AUX_RTC_LOW 0x104 +#define AUX_RTC_HIGH 0x105 -static bool is_usable_as_clocksource(void) +int arc_counter_setup(void) { -#ifdef CONFIG_SMP - return 0; + write_aux_reg(AUX_RTC_CTRL, 1); + + /* Not usable in SMP */ + return !IS_ENABLED(CONFIG_SMP); +} + +static cycle_t arc_counter_read(struct clocksource *cs) +{ + unsigned long status; + union { +#ifdef CONFIG_CPU_BIG_ENDIAN + struct { u32 high, low; }; #else - return 1; + struct { u32 low, high; }; #endif + cycle_t full; + } stamp; + + + __asm__ __volatile( + "1: \n" + " lr %0, [AUX_RTC_LOW] \n" + " lr %1, [AUX_RTC_HIGH] \n" + " lr %2, [AUX_RTC_CTRL] \n" + " bbit0.nt %2, 31, 1b \n" + : "=r" (stamp.low), "=r" (stamp.high), "=r" (status)); + + return stamp.full; } +static struct clocksource arc_counter = { + .name = "ARCv2 RTC", + .rating = 350, + .read = arc_counter_read, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +#else /* !CONFIG_ARC_HAS_RTC */ + /* * set 32bit TIMER1 to keep counting monotonically and wraparound */ @@ -123,7 +161,8 @@ int arc_counter_setup(void) write_aux_reg(ARC_REG_TIMER1_CNT, 0); write_aux_reg(ARC_REG_TIMER1_CTRL, TIMER_CTRL_NH); - return is_usable_as_clocksource(); + /* Not usable in SMP */ + return !IS_ENABLED(CONFIG_SMP); } static cycle_t arc_counter_read(struct clocksource *cs) @@ -140,6 +179,7 @@ static struct clocksource arc_counter = { }; #endif +#endif /********** Clock Event Device *********/ diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index e00a01879025..e0cf99893212 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -14,6 +14,7 @@ #include <linux/proc_fs.h> #include <linux/file.h> #include <asm/arcregs.h> +#include <asm/irqflags.h> /* * Common routine to print scratch regs (r0-r12) or callee regs (r13-r25) @@ -34,7 +35,10 @@ static noinline void print_reg_file(long *reg_rev, int start_num) n += scnprintf(buf + n, len - n, "\n"); /* because pt_regs has regs reversed: r12..r0, r25..r13 */ - reg_rev--; + if (is_isa_arcv2() && start_num == 0) + reg_rev++; + else + reg_rev--; } if (start_num != 0) @@ -152,6 +156,15 @@ static void show_ecr_verbose(struct pt_regs *regs) ((cause_code == 0x02) ? "Write" : "EX")); } else if (vec == ECR_V_INSN_ERR) { pr_cont("Illegal Insn\n"); +#ifdef CONFIG_ISA_ARCV2 + } else if (vec == ECR_V_MEM_ERR) { + if (cause_code == 0x00) + pr_cont("Bus Error from Insn Mem\n"); + else if (cause_code == 0x10) + pr_cont("Bus Error from Data Mem\n"); + else + pr_cont("Bus Error, check PRM\n"); +#endif } else { pr_cont("Check Programmer's Manual\n"); } @@ -185,12 +198,20 @@ void show_regs(struct pt_regs *regs) pr_info("[STAT32]: 0x%08lx", regs->status32); -#define STS_BIT(r, bit) r->status32 & STATUS_##bit##_MASK ? #bit : "" - if (!user_mode(regs)) - pr_cont(" : %2s %2s %2s %2s %2s\n", - STS_BIT(regs, AE), STS_BIT(regs, A2), STS_BIT(regs, A1), - STS_BIT(regs, E2), STS_BIT(regs, E1)); +#define STS_BIT(r, bit) r->status32 & STATUS_##bit##_MASK ? #bit" " : "" +#ifdef CONFIG_ISA_ARCOMPACT + pr_cont(" : %2s%2s%2s%2s%2s%2s%2s\n", + (regs->status32 & STATUS_U_MASK) ? "U " : "K ", + STS_BIT(regs, DE), STS_BIT(regs, AE), + STS_BIT(regs, A2), STS_BIT(regs, A1), + STS_BIT(regs, E2), STS_BIT(regs, E1)); +#else + pr_cont(" : %2s%2s%2s%2s\n", + STS_BIT(regs, IE), + (regs->status32 & STATUS_U_MASK) ? "U " : "K ", + STS_BIT(regs, DE), STS_BIT(regs, AE)); +#endif pr_info("BTA: 0x%08lx\t SP: 0x%08lx\t FP: 0x%08lx\n", regs->bta, regs->sp, regs->fp); pr_info("LPS: 0x%08lx\tLPE: 0x%08lx\tLPC: 0x%08lx\n", diff --git a/arch/arc/lib/Makefile b/arch/arc/lib/Makefile index db46e200baba..b1656d156097 100644 --- a/arch/arc/lib/Makefile +++ b/arch/arc/lib/Makefile @@ -5,5 +5,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -lib-y := strchr-700.o strcmp.o strcpy-700.o strlen.o -lib-y += memcmp.o memcpy-700.o memset.o +lib-y := strchr-700.o strcpy-700.o strlen.o memcmp.o + +lib-$(CONFIG_ISA_ARCOMPACT) += memcpy-700.o memset.o strcmp.o +lib-$(CONFIG_ISA_ARCV2) += memcpy-archs.o memset-archs.o strcmp-archs.o diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S index 978bf8314dfb..a4015e7d9ab7 100644 --- a/arch/arc/lib/memcmp.S +++ b/arch/arc/lib/memcmp.S @@ -24,14 +24,32 @@ ENTRY(memcmp) ld r4,[r0,0] ld r5,[r1,0] lsr.f lp_count,r3,3 +#ifdef CONFIG_ISA_ARCV2 + /* In ARCv2 a branch can't be the last instruction in a zero overhead + * loop. + * So we move the branch to the start of the loop, duplicate it + * after the end, and set up r12 so that the branch isn't taken + * initially. + */ + mov_s r12,WORD2 + lpne .Loop_end + brne WORD2,r12,.Lodd + ld WORD2,[r0,4] +#else lpne .Loop_end ld_s WORD2,[r0,4] +#endif ld_s r12,[r1,4] brne r4,r5,.Leven ld.a r4,[r0,8] ld.a r5,[r1,8] +#ifdef CONFIG_ISA_ARCV2 +.Loop_end: + brne WORD2,r12,.Lodd +#else brne WORD2,r12,.Lodd .Loop_end: +#endif asl_s SHIFT,SHIFT,3 bhs_s .Last_cmp brne r4,r5,.Leven @@ -89,7 +107,6 @@ ENTRY(memcmp) bset.cs r0,r0,31 .Lodd: cmp_s WORD2,r12 - mov_s r0,1 j_s.d [blink] bset.cs r0,r0,31 @@ -100,14 +117,25 @@ ENTRY(memcmp) ldb r4,[r0,0] ldb r5,[r1,0] lsr.f lp_count,r3 +#ifdef CONFIG_ISA_ARCV2 + mov r12,r3 lpne .Lbyte_end + brne r3,r12,.Lbyte_odd +#else + lpne .Lbyte_end +#endif ldb_s r3,[r0,1] ldb r12,[r1,1] brne r4,r5,.Lbyte_even ldb.a r4,[r0,2] ldb.a r5,[r1,2] +#ifdef CONFIG_ISA_ARCV2 +.Lbyte_end: + brne r3,r12,.Lbyte_odd +#else brne r3,r12,.Lbyte_odd .Lbyte_end: +#endif bcc .Lbyte_even brne r4,r5,.Lbyte_even ldb_s r3,[r0,1] diff --git a/arch/arc/lib/memcpy-archs.S b/arch/arc/lib/memcpy-archs.S new file mode 100644 index 000000000000..1b2b3acfed52 --- /dev/null +++ b/arch/arc/lib/memcpy-archs.S @@ -0,0 +1,236 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> + +#ifdef __LITTLE_ENDIAN__ +# define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; << +# define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >> +# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM +# define MERGE_2(RX,RY,IMM) +# define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF +# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM +#else +# define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >> +# define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; << +# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; << +# define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; << +# define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM +# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08 +#endif + +#ifdef CONFIG_ARC_HAS_LL64 +# define PREFETCH_READ(RX) prefetch [RX, 56] +# define PREFETCH_WRITE(RX) prefetchw [RX, 64] +# define LOADX(DST,RX) ldd.ab DST, [RX, 8] +# define STOREX(SRC,RX) std.ab SRC, [RX, 8] +# define ZOLSHFT 5 +# define ZOLAND 0x1F +#else +# define PREFETCH_READ(RX) prefetch [RX, 28] +# define PREFETCH_WRITE(RX) prefetchw [RX, 32] +# define LOADX(DST,RX) ld.ab DST, [RX, 4] +# define STOREX(SRC,RX) st.ab SRC, [RX, 4] +# define ZOLSHFT 4 +# define ZOLAND 0xF +#endif + +ENTRY(memcpy) + prefetch [r1] ; Prefetch the read location + prefetchw [r0] ; Prefetch the write location + mov.f 0, r2 +;;; if size is zero + jz.d [blink] + mov r3, r0 ; don;t clobber ret val + +;;; if size <= 8 + cmp r2, 8 + bls.d @smallchunk + mov.f lp_count, r2 + + and.f r4, r0, 0x03 + rsub lp_count, r4, 4 + lpnz @aligndestination + ;; LOOP BEGIN + ldb.ab r5, [r1,1] + sub r2, r2, 1 + stb.ab r5, [r3,1] +aligndestination: + +;;; Check the alignment of the source + and.f r4, r1, 0x03 + bnz.d @sourceunaligned + +;;; CASE 0: Both source and destination are 32bit aligned +;;; Convert len to Dwords, unfold x4 + lsr.f lp_count, r2, ZOLSHFT + lpnz @copy32_64bytes + ;; LOOP START + LOADX (r6, r1) + PREFETCH_READ (r1) + PREFETCH_WRITE (r3) + LOADX (r8, r1) + LOADX (r10, r1) + LOADX (r4, r1) + STOREX (r6, r3) + STOREX (r8, r3) + STOREX (r10, r3) + STOREX (r4, r3) +copy32_64bytes: + + and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes +smallchunk: + lpnz @copyremainingbytes + ;; LOOP START + ldb.ab r5, [r1,1] + stb.ab r5, [r3,1] +copyremainingbytes: + + j [blink] +;;; END CASE 0 + +sourceunaligned: + cmp r4, 2 + beq.d @unalignedOffby2 + sub r2, r2, 1 + + bhi.d @unalignedOffby3 + ldb.ab r5, [r1, 1] + +;;; CASE 1: The source is unaligned, off by 1 + ;; Hence I need to read 1 byte for a 16bit alignment + ;; and 2bytes to reach 32bit alignment + ldh.ab r6, [r1, 2] + sub r2, r2, 2 + ;; Convert to words, unfold x2 + lsr.f lp_count, r2, 3 + MERGE_1 (r6, r6, 8) + MERGE_2 (r5, r5, 24) + or r5, r5, r6 + + ;; Both src and dst are aligned + lpnz @copy8bytes_1 + ;; LOOP START + ld.ab r6, [r1, 4] + prefetch [r1, 28] ;Prefetch the next read location + ld.ab r8, [r1,4] + prefetchw [r3, 32] ;Prefetch the next write location + + SHIFT_1 (r7, r6, 24) + or r7, r7, r5 + SHIFT_2 (r5, r6, 8) + + SHIFT_1 (r9, r8, 24) + or r9, r9, r5 + SHIFT_2 (r5, r8, 8) + + st.ab r7, [r3, 4] + st.ab r9, [r3, 4] +copy8bytes_1: + + ;; Write back the remaining 16bits + EXTRACT_1 (r6, r5, 16) + sth.ab r6, [r3, 2] + ;; Write back the remaining 8bits + EXTRACT_2 (r5, r5, 16) + stb.ab r5, [r3, 1] + + and.f lp_count, r2, 0x07 ;Last 8bytes + lpnz @copybytewise_1 + ;; LOOP START + ldb.ab r6, [r1,1] + stb.ab r6, [r3,1] +copybytewise_1: + j [blink] + +unalignedOffby2: +;;; CASE 2: The source is unaligned, off by 2 + ldh.ab r5, [r1, 2] + sub r2, r2, 1 + + ;; Both src and dst are aligned + ;; Convert to words, unfold x2 + lsr.f lp_count, r2, 3 +#ifdef __BIG_ENDIAN__ + asl.nz r5, r5, 16 +#endif + lpnz @copy8bytes_2 + ;; LOOP START + ld.ab r6, [r1, 4] + prefetch [r1, 28] ;Prefetch the next read location + ld.ab r8, [r1,4] + prefetchw [r3, 32] ;Prefetch the next write location + + SHIFT_1 (r7, r6, 16) + or r7, r7, r5 + SHIFT_2 (r5, r6, 16) + + SHIFT_1 (r9, r8, 16) + or r9, r9, r5 + SHIFT_2 (r5, r8, 16) + + st.ab r7, [r3, 4] + st.ab r9, [r3, 4] +copy8bytes_2: + +#ifdef __BIG_ENDIAN__ + lsr.nz r5, r5, 16 +#endif + sth.ab r5, [r3, 2] + + and.f lp_count, r2, 0x07 ;Last 8bytes + lpnz @copybytewise_2 + ;; LOOP START + ldb.ab r6, [r1,1] + stb.ab r6, [r3,1] +copybytewise_2: + j [blink] + +unalignedOffby3: +;;; CASE 3: The source is unaligned, off by 3 +;;; Hence, I need to read 1byte for achieve the 32bit alignment + + ;; Both src and dst are aligned + ;; Convert to words, unfold x2 + lsr.f lp_count, r2, 3 +#ifdef __BIG_ENDIAN__ + asl.ne r5, r5, 24 +#endif + lpnz @copy8bytes_3 + ;; LOOP START + ld.ab r6, [r1, 4] + prefetch [r1, 28] ;Prefetch the next read location + ld.ab r8, [r1,4] + prefetch [r3, 32] ;Prefetch the next write location + + SHIFT_1 (r7, r6, 8) + or r7, r7, r5 + SHIFT_2 (r5, r6, 24) + + SHIFT_1 (r9, r8, 8) + or r9, r9, r5 + SHIFT_2 (r5, r8, 24) + + st.ab r7, [r3, 4] + st.ab r9, [r3, 4] +copy8bytes_3: + +#ifdef __BIG_ENDIAN__ + lsr.nz r5, r5, 24 +#endif + stb.ab r5, [r3, 1] + + and.f lp_count, r2, 0x07 ;Last 8bytes + lpnz @copybytewise_3 + ;; LOOP START + ldb.ab r6, [r1,1] + stb.ab r6, [r3,1] +copybytewise_3: + j [blink] + +END(memcpy) diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S new file mode 100644 index 000000000000..92d573c734b5 --- /dev/null +++ b/arch/arc/lib/memset-archs.S @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> + +#undef PREALLOC_NOT_AVAIL + +#ifdef PREALLOC_NOT_AVAIL +#define PREWRITE(A,B) prefetchw [(A),(B)] +#else +#define PREWRITE(A,B) prealloc [(A),(B)] +#endif + +ENTRY(memset) + prefetchw [r0] ; Prefetch the write location + mov.f 0, r2 +;;; if size is zero + jz.d [blink] + mov r3, r0 ; don't clobber ret val + +;;; if length < 8 + brls.d.nt r2, 8, .Lsmallchunk + mov.f lp_count,r2 + + and.f r4, r0, 0x03 + rsub lp_count, r4, 4 + lpnz @.Laligndestination + ;; LOOP BEGIN + stb.ab r1, [r3,1] + sub r2, r2, 1 +.Laligndestination: + +;;; Destination is aligned + and r1, r1, 0xFF + asl r4, r1, 8 + or r4, r4, r1 + asl r5, r4, 16 + or r5, r5, r4 + mov r4, r5 + + sub3 lp_count, r2, 8 + cmp r2, 64 + bmsk.hi r2, r2, 5 + mov.ls lp_count, 0 + add3.hi r2, r2, 8 + +;;; Convert len to Dwords, unfold x8 + lsr.f lp_count, lp_count, 6 + lpnz @.Lset64bytes + ;; LOOP START + PREWRITE(r3, 64) ;Prefetch the next write location + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] +.Lset64bytes: + + lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes + lpnz .Lset32bytes + ;; LOOP START + prefetchw [r3, 32] ;Prefetch the next write location + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] +.Lset32bytes: + + and.f lp_count, r2, 0x1F ;Last remaining 31 bytes +.Lsmallchunk: + lpnz .Lcopy3bytes + ;; LOOP START + stb.ab r1, [r3, 1] +.Lcopy3bytes: + + j [blink] + +END(memset) + +ENTRY(memzero) + ; adjust bzero args to memset args + mov r2, r1 + b.d memset ;tail call so need to tinker with blink + mov r1, 0 +END(memzero) diff --git a/arch/arc/lib/strcmp-archs.S b/arch/arc/lib/strcmp-archs.S new file mode 100644 index 000000000000..4f338eec3365 --- /dev/null +++ b/arch/arc/lib/strcmp-archs.S @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> + +ENTRY(strcmp) + or r2, r0, r1 + bmsk_s r2, r2, 1 + brne r2, 0, @.Lcharloop + +;;; s1 and s2 are word aligned + ld.ab r2, [r0, 4] + + mov_s r12, 0x01010101 + ror r11, r12 + .align 4 +.LwordLoop: + ld.ab r3, [r1, 4] + ;; Detect NULL char in str1 + sub r4, r2, r12 + ld.ab r5, [r0, 4] + bic r4, r4, r2 + and r4, r4, r11 + brne.d.nt r4, 0, .LfoundNULL + ;; Check if the read locations are the same + cmp r2, r3 + beq.d .LwordLoop + mov.eq r2, r5 + + ;; A match is found, spot it out +#ifdef __LITTLE_ENDIAN__ + swape r3, r3 + mov_s r0, 1 + swape r2, r2 +#else + mov_s r0, 1 +#endif + cmp_s r2, r3 + j_s.d [blink] + bset.lo r0, r0, 31 + + .align 4 +.LfoundNULL: +#ifdef __BIG_ENDIAN__ + swape r4, r4 + swape r2, r2 + swape r3, r3 +#endif + ;; Find null byte + ffs r0, r4 + bmsk r2, r2, r0 + bmsk r3, r3, r0 + swape r2, r2 + swape r3, r3 + ;; make the return value + sub.f r0, r2, r3 + mov.hi r0, 1 + j_s.d [blink] + bset.lo r0, r0, 31 + + .align 4 +.Lcharloop: + ldb.ab r2, [r0, 1] + ldb.ab r3, [r1, 1] + nop + breq r2, 0, .Lcmpend + breq r2, r3, .Lcharloop + + .align 4 +.Lcmpend: + j_s.d [blink] + sub r0, r2, r3 +END(strcmp) diff --git a/arch/arc/mm/Makefile b/arch/arc/mm/Makefile index ac95cc239c1e..7beb941556c3 100644 --- a/arch/arc/mm/Makefile +++ b/arch/arc/mm/Makefile @@ -7,4 +7,4 @@ # obj-y := extable.o ioremap.o dma.o fault.o init.o -obj-y += tlb.o tlbex.o cache_arc700.o mmap.o +obj-y += tlb.o tlbex.o cache.o mmap.o diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache.c index 12b2100db073..b29d62ed4f7e 100644 --- a/arch/arc/mm/cache_arc700.c +++ b/arch/arc/mm/cache.c @@ -1,64 +1,12 @@ /* - * ARC700 VIPT Cache Management + * ARC Cache Management * + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. - * - * vineetg: May 2011: for Non-aliasing VIPT D-cache following can be NOPs - * -flush_cache_dup_mm (fork) - * -likewise for flush_cache_mm (exit/execve) - * -likewise for flush_cache_range,flush_cache_page (munmap, exit, COW-break) - * - * vineetg: Apr 2011 - * -Now that MMU can support larger pg sz (16K), the determiniation of - * aliasing shd not be based on assumption of 8k pg - * - * vineetg: Mar 2011 - * -optimised version of flush_icache_range( ) for making I/D coherent - * when vaddr is available (agnostic of num of aliases) - * - * vineetg: Mar 2011 - * -Added documentation about I-cache aliasing on ARC700 and the way it - * was handled up until MMU V2. - * -Spotted a three year old bug when killing the 4 aliases, which needs - * bottom 2 bits, so we need to do paddr | {0x00, 0x01, 0x02, 0x03} - * instead of paddr | {0x00, 0x01, 0x10, 0x11} - * (Rajesh you owe me one now) - * - * vineetg: Dec 2010 - * -Off-by-one error when computing num_of_lines to flush - * This broke signal handling with bionic which uses synthetic sigret stub - * - * vineetg: Mar 2010 - * -GCC can't generate ZOL for core cache flush loops. - * Conv them into iterations based as opposed to while (start < end) types - * - * Vineetg: July 2009 - * -In I-cache flush routine we used to chk for aliasing for every line INV. - * Instead now we setup routines per cache geometry and invoke them - * via function pointers. - * - * Vineetg: Jan 2009 - * -Cache Line flush routines used to flush an extra line beyond end addr - * because check was while (end >= start) instead of (end > start) - * =Some call sites had to work around by doing -1, -4 etc to end param - * =Some callers didnt care. This was spec bad in case of INV routines - * which would discard valid data (cause of the horrible ext2 bug - * in ARC IDE driver) - * - * vineetg: June 11th 2008: Fixed flush_icache_range( ) - * -Since ARC700 caches are not coherent (I$ doesnt snoop D$) both need - * to be flushed, which it was not doing. - * -load_module( ) passes vmalloc addr (Kernel Virtual Addr) to the API, - * however ARC cache maintenance OPs require PHY addr. Thus need to do - * vmalloc_to_phy. - * -Also added optimisation there, that for range > PAGE SIZE we flush the - * entire cache in one shot rather than line by line. For e.g. a module - * with Code sz 600k, old code flushed 600k worth of cache (line-by-line), - * while cache is only 16 or 32k. */ #include <linux/module.h> @@ -73,9 +21,15 @@ #include <asm/cachectl.h> #include <asm/setup.h> +static int l2_line_sz; + +void (*_cache_line_loop_ic_fn)(unsigned long paddr, unsigned long vaddr, + unsigned long sz, const int cacheop); + char *arc_cache_mumbojumbo(int c, char *buf, int len) { int n = 0; + struct cpuinfo_arc_cache *p; #define PR_CACHE(p, cfg, str) \ if (!(p)->ver) \ @@ -91,6 +45,11 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache"); PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache"); + p = &cpuinfo_arc700[c].slc; + if (p->ver) + n += scnprintf(buf + n, len - n, + "SLC\t\t: %uK, %uB Line\n", p->sz_k, p->line_len); + return buf; } @@ -101,7 +60,7 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) */ void read_decode_cache_bcr(void) { - struct cpuinfo_arc_cache *p_ic, *p_dc; + struct cpuinfo_arc_cache *p_ic, *p_dc, *p_slc; unsigned int cpu = smp_processor_id(); struct bcr_cache { #ifdef CONFIG_CPU_BIG_ENDIAN @@ -111,14 +70,29 @@ void read_decode_cache_bcr(void) #endif } ibcr, dbcr; + struct bcr_generic sbcr; + + struct bcr_slc_cfg { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:24, way:2, lsz:2, sz:4; +#else + unsigned int sz:4, lsz:2, way:2, pad:24; +#endif + } slc_cfg; + p_ic = &cpuinfo_arc700[cpu].icache; READ_BCR(ARC_REG_IC_BCR, ibcr); if (!ibcr.ver) goto dc_chk; - BUG_ON(ibcr.config != 3); - p_ic->assoc = 2; /* Fixed to 2w set assoc */ + if (ibcr.ver <= 3) { + BUG_ON(ibcr.config != 3); + p_ic->assoc = 2; /* Fixed to 2w set assoc */ + } else if (ibcr.ver >= 4) { + p_ic->assoc = 1 << ibcr.config; /* 1,2,4,8 */ + } + p_ic->line_len = 8 << ibcr.line_len; p_ic->sz_k = 1 << (ibcr.sz - 1); p_ic->ver = ibcr.ver; @@ -130,94 +104,140 @@ dc_chk: READ_BCR(ARC_REG_DC_BCR, dbcr); if (!dbcr.ver) - return; + goto slc_chk; + + if (dbcr.ver <= 3) { + BUG_ON(dbcr.config != 2); + p_dc->assoc = 4; /* Fixed to 4w set assoc */ + p_dc->vipt = 1; + p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1; + } else if (dbcr.ver >= 4) { + p_dc->assoc = 1 << dbcr.config; /* 1,2,4,8 */ + p_dc->vipt = 0; + p_dc->alias = 0; /* PIPT so can't VIPT alias */ + } - BUG_ON(dbcr.config != 2); - p_dc->assoc = 4; /* Fixed to 4w set assoc */ p_dc->line_len = 16 << dbcr.line_len; p_dc->sz_k = 1 << (dbcr.sz - 1); p_dc->ver = dbcr.ver; - p_dc->vipt = 1; - p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1; + +slc_chk: + if (!is_isa_arcv2()) + return; + + p_slc = &cpuinfo_arc700[cpu].slc; + READ_BCR(ARC_REG_SLC_BCR, sbcr); + if (sbcr.ver) { + READ_BCR(ARC_REG_SLC_CFG, slc_cfg); + p_slc->ver = sbcr.ver; + p_slc->sz_k = 128 << slc_cfg.sz; + l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64; + } } /* - * 1. Validate the Cache Geomtery (compile time config matches hardware) - * 2. If I-cache suffers from aliasing, setup work arounds (difft flush rtn) - * (aliasing D-cache configurations are not supported YET) - * 3. Enable the Caches, setup default flush mode for D-Cache - * 3. Calculate the SHMLBA used by user space + * Line Operation on {I,D}-Cache */ -void arc_cache_init(void) -{ - unsigned int __maybe_unused cpu = smp_processor_id(); - char str[256]; - - printk(arc_cache_mumbojumbo(0, str, sizeof(str))); - if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) { - struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache; +#define OP_INV 0x1 +#define OP_FLUSH 0x2 +#define OP_FLUSH_N_INV 0x3 +#define OP_INV_IC 0x4 - if (!ic->ver) - panic("cache support enabled but non-existent cache\n"); +/* + * I-Cache Aliasing in ARC700 VIPT caches (MMU v1-v3) + * + * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag. + * The orig Cache Management Module "CDU" only required paddr to invalidate a + * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry. + * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching + * the exact same line. + * + * However for larger Caches (way-size > page-size) - i.e. in Aliasing config, + * paddr alone could not be used to correctly index the cache. + * + * ------------------ + * MMU v1/v2 (Fixed Page Size 8k) + * ------------------ + * The solution was to provide CDU with these additonal vaddr bits. These + * would be bits [x:13], x would depend on cache-geometry, 13 comes from + * standard page size of 8k. + * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits + * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the + * orig 5 bits of paddr were anyways ignored by CDU line ops, as they + * represent the offset within cache-line. The adv of using this "clumsy" + * interface for additional info was no new reg was needed in CDU programming + * model. + * + * 17:13 represented the max num of bits passable, actual bits needed were + * fewer, based on the num-of-aliases possible. + * -for 2 alias possibility, only bit 13 needed (32K cache) + * -for 4 alias possibility, bits 14:13 needed (64K cache) + * + * ------------------ + * MMU v3 + * ------------------ + * This ver of MMU supports variable page sizes (1k-16k): although Linux will + * only support 8k (default), 16k and 4k. + * However from hardware perspective, smaller page sizes aggrevate aliasing + * meaning more vaddr bits needed to disambiguate the cache-line-op ; + * the existing scheme of piggybacking won't work for certain configurations. + * Two new registers IC_PTAG and DC_PTAG inttoduced. + * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs + */ - if (ic->line_len != L1_CACHE_BYTES) - panic("ICache line [%d] != kernel Config [%d]", - ic->line_len, L1_CACHE_BYTES); +static inline +void __cache_line_loop_v2(unsigned long paddr, unsigned long vaddr, + unsigned long sz, const int op) +{ + unsigned int aux_cmd; + int num_lines; + const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE; - if (ic->ver != CONFIG_ARC_MMU_VER) - panic("Cache ver [%d] doesn't match MMU ver [%d]\n", - ic->ver, CONFIG_ARC_MMU_VER); + if (op == OP_INV_IC) { + aux_cmd = ARC_REG_IC_IVIL; + } else { + /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ + aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; } - if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) { - struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache; - int handled; - - if (!dc->ver) - panic("cache support enabled but non-existent cache\n"); + /* Ensure we properly floor/ceil the non-line aligned/sized requests + * and have @paddr - aligned to cache line and integral @num_lines. + * This however can be avoided for page sized since: + * -@paddr will be cache-line aligned already (being page aligned) + * -@sz will be integral multiple of line size (being page sized). + */ + if (!full_page) { + sz += paddr & ~CACHE_LINE_MASK; + paddr &= CACHE_LINE_MASK; + vaddr &= CACHE_LINE_MASK; + } - if (dc->line_len != L1_CACHE_BYTES) - panic("DCache line [%d] != kernel Config [%d]", - dc->line_len, L1_CACHE_BYTES); + num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); - /* check for D-Cache aliasing */ - handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); + /* MMUv2 and before: paddr contains stuffed vaddrs bits */ + paddr |= (vaddr >> PAGE_SHIFT) & 0x1F; - if (dc->alias && !handled) - panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); - else if (!dc->alias && handled) - panic("Don't need CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + while (num_lines-- > 0) { + write_aux_reg(aux_cmd, paddr); + paddr += L1_CACHE_BYTES; } } -#define OP_INV 0x1 -#define OP_FLUSH 0x2 -#define OP_FLUSH_N_INV 0x3 -#define OP_INV_IC 0x4 - -/* - * Common Helper for Line Operations on {I,D}-Cache - */ -static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr, - unsigned long sz, const int cacheop) +static inline +void __cache_line_loop_v3(unsigned long paddr, unsigned long vaddr, + unsigned long sz, const int op) { unsigned int aux_cmd, aux_tag; int num_lines; - const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE; + const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE; - if (cacheop == OP_INV_IC) { + if (op == OP_INV_IC) { aux_cmd = ARC_REG_IC_IVIL; -#if (CONFIG_ARC_MMU_VER > 2) aux_tag = ARC_REG_IC_PTAG; -#endif - } - else { - /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ - aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; -#if (CONFIG_ARC_MMU_VER > 2) + } else { + aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; aux_tag = ARC_REG_DC_PTAG; -#endif } /* Ensure we properly floor/ceil the non-line aligned/sized requests @@ -226,177 +246,169 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr, * -@paddr will be cache-line aligned already (being page aligned) * -@sz will be integral multiple of line size (being page sized). */ - if (!full_page_op) { + if (!full_page) { sz += paddr & ~CACHE_LINE_MASK; paddr &= CACHE_LINE_MASK; vaddr &= CACHE_LINE_MASK; } - num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); -#if (CONFIG_ARC_MMU_VER <= 2) - /* MMUv2 and before: paddr contains stuffed vaddrs bits */ - paddr |= (vaddr >> PAGE_SHIFT) & 0x1F; -#else - /* if V-P const for loop, PTAG can be written once outside loop */ - if (full_page_op) + /* + * MMUv3, cache ops require paddr in PTAG reg + * if V-P const for loop, PTAG can be written once outside loop + */ + if (full_page) write_aux_reg(aux_tag, paddr); -#endif while (num_lines-- > 0) { -#if (CONFIG_ARC_MMU_VER > 2) - /* MMUv3, cache ops require paddr seperately */ - if (!full_page_op) { + if (!full_page) { write_aux_reg(aux_tag, paddr); paddr += L1_CACHE_BYTES; } write_aux_reg(aux_cmd, vaddr); vaddr += L1_CACHE_BYTES; -#else + } +} + +/* + * In HS38x (MMU v4), although icache is VIPT, only paddr is needed for cache + * maintenance ops (in IVIL reg), as long as icache doesn't alias. + * + * For Aliasing icache, vaddr is also needed (in IVIL), while paddr is + * specified in PTAG (similar to MMU v3) + */ +static inline +void __cache_line_loop_v4(unsigned long paddr, unsigned long vaddr, + unsigned long sz, const int cacheop) +{ + unsigned int aux_cmd; + int num_lines; + const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE; + + if (cacheop == OP_INV_IC) { + aux_cmd = ARC_REG_IC_IVIL; + } else { + /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ + aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; + } + + /* Ensure we properly floor/ceil the non-line aligned/sized requests + * and have @paddr - aligned to cache line and integral @num_lines. + * This however can be avoided for page sized since: + * -@paddr will be cache-line aligned already (being page aligned) + * -@sz will be integral multiple of line size (being page sized). + */ + if (!full_page_op) { + sz += paddr & ~CACHE_LINE_MASK; + paddr &= CACHE_LINE_MASK; + } + + num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); + + while (num_lines-- > 0) { write_aux_reg(aux_cmd, paddr); paddr += L1_CACHE_BYTES; -#endif } } +#if (CONFIG_ARC_MMU_VER < 3) +#define __cache_line_loop __cache_line_loop_v2 +#elif (CONFIG_ARC_MMU_VER == 3) +#define __cache_line_loop __cache_line_loop_v3 +#elif (CONFIG_ARC_MMU_VER > 3) +#define __cache_line_loop __cache_line_loop_v4 +#endif + #ifdef CONFIG_ARC_HAS_DCACHE /*************************************************************** * Machine specific helpers for Entire D-Cache or Per Line ops */ -static inline unsigned int __before_dc_op(const int op) +static inline void __before_dc_op(const int op) { - unsigned int reg = reg; - if (op == OP_FLUSH_N_INV) { /* Dcache provides 2 cmd: FLUSH or INV * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE * flush-n-inv is achieved by INV cmd but with IM=1 * So toggle INV sub-mode depending on op request and default */ - reg = read_aux_reg(ARC_REG_DC_CTRL); - write_aux_reg(ARC_REG_DC_CTRL, reg | DC_CTRL_INV_MODE_FLUSH) - ; + const unsigned int ctl = ARC_REG_DC_CTRL; + write_aux_reg(ctl, read_aux_reg(ctl) | DC_CTRL_INV_MODE_FLUSH); } - - return reg; } -static inline void __after_dc_op(const int op, unsigned int reg) +static inline void __after_dc_op(const int op) { - if (op & OP_FLUSH) /* flush / flush-n-inv both wait */ - while (read_aux_reg(ARC_REG_DC_CTRL) & DC_CTRL_FLUSH_STATUS); + if (op & OP_FLUSH) { + const unsigned int ctl = ARC_REG_DC_CTRL; + unsigned int reg; - /* Switch back to default Invalidate mode */ - if (op == OP_FLUSH_N_INV) - write_aux_reg(ARC_REG_DC_CTRL, reg & ~DC_CTRL_INV_MODE_FLUSH); + /* flush / flush-n-inv both wait */ + while ((reg = read_aux_reg(ctl)) & DC_CTRL_FLUSH_STATUS) + ; + + /* Switch back to default Invalidate mode */ + if (op == OP_FLUSH_N_INV) + write_aux_reg(ctl, reg & ~DC_CTRL_INV_MODE_FLUSH); + } } /* * Operation on Entire D-Cache - * @cacheop = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV} + * @op = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV} * Note that constant propagation ensures all the checks are gone * in generated code */ -static inline void __dc_entire_op(const int cacheop) +static inline void __dc_entire_op(const int op) { - unsigned int ctrl_reg; int aux; - ctrl_reg = __before_dc_op(cacheop); + __before_dc_op(op); - if (cacheop & OP_INV) /* Inv or flush-n-inv use same cmd reg */ + if (op & OP_INV) /* Inv or flush-n-inv use same cmd reg */ aux = ARC_REG_DC_IVDC; else aux = ARC_REG_DC_FLSH; write_aux_reg(aux, 0x1); - __after_dc_op(cacheop, ctrl_reg); + __after_dc_op(op); } /* For kernel mappings cache operation: index is same as paddr */ #define __dc_line_op_k(p, sz, op) __dc_line_op(p, p, sz, op) /* - * D-Cache : Per Line INV (discard or wback+discard) or FLUSH (wback) + * D-Cache Line ops: Per Line INV (discard or wback+discard) or FLUSH (wback) */ static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr, - unsigned long sz, const int cacheop) + unsigned long sz, const int op) { unsigned long flags; - unsigned int ctrl_reg; local_irq_save(flags); - ctrl_reg = __before_dc_op(cacheop); + __before_dc_op(op); - __cache_line_loop(paddr, vaddr, sz, cacheop); + __cache_line_loop(paddr, vaddr, sz, op); - __after_dc_op(cacheop, ctrl_reg); + __after_dc_op(op); local_irq_restore(flags); } #else -#define __dc_entire_op(cacheop) -#define __dc_line_op(paddr, vaddr, sz, cacheop) -#define __dc_line_op_k(paddr, sz, cacheop) +#define __dc_entire_op(op) +#define __dc_line_op(paddr, vaddr, sz, op) +#define __dc_line_op_k(paddr, sz, op) #endif /* CONFIG_ARC_HAS_DCACHE */ - #ifdef CONFIG_ARC_HAS_ICACHE -/* - * I-Cache Aliasing in ARC700 VIPT caches - * - * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag. - * The orig Cache Management Module "CDU" only required paddr to invalidate a - * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry. - * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching - * the exact same line. - * - * However for larger Caches (way-size > page-size) - i.e. in Aliasing config, - * paddr alone could not be used to correctly index the cache. - * - * ------------------ - * MMU v1/v2 (Fixed Page Size 8k) - * ------------------ - * The solution was to provide CDU with these additonal vaddr bits. These - * would be bits [x:13], x would depend on cache-geometry, 13 comes from - * standard page size of 8k. - * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits - * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the - * orig 5 bits of paddr were anyways ignored by CDU line ops, as they - * represent the offset within cache-line. The adv of using this "clumsy" - * interface for additional info was no new reg was needed in CDU programming - * model. - * - * 17:13 represented the max num of bits passable, actual bits needed were - * fewer, based on the num-of-aliases possible. - * -for 2 alias possibility, only bit 13 needed (32K cache) - * -for 4 alias possibility, bits 14:13 needed (64K cache) - * - * ------------------ - * MMU v3 - * ------------------ - * This ver of MMU supports variable page sizes (1k-16k): although Linux will - * only support 8k (default), 16k and 4k. - * However from hardware perspective, smaller page sizes aggrevate aliasing - * meaning more vaddr bits needed to disambiguate the cache-line-op ; - * the existing scheme of piggybacking won't work for certain configurations. - * Two new registers IC_PTAG and DC_PTAG inttoduced. - * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs - */ - -/*********************************************************** - * Machine specific helper for per line I-Cache invalidate. - */ - static inline void __ic_entire_inv(void) { write_aux_reg(ARC_REG_IC_IVIC, 1); @@ -410,7 +422,7 @@ __ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr, unsigned long flags; local_irq_save(flags); - __cache_line_loop(paddr, vaddr, sz, OP_INV_IC); + (*_cache_line_loop_ic_fn)(paddr, vaddr, sz, OP_INV_IC); local_irq_restore(flags); } @@ -453,6 +465,53 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr, #endif /* CONFIG_ARC_HAS_ICACHE */ +noinline void slc_op(unsigned long paddr, unsigned long sz, const int op) +{ +#ifdef CONFIG_ISA_ARCV2 + unsigned long flags; + unsigned int ctrl; + + local_irq_save(flags); + + /* + * The Region Flush operation is specified by CTRL.RGN_OP[11..9] + * - b'000 (default) is Flush, + * - b'001 is Invalidate if CTRL.IM == 0 + * - b'001 is Flush-n-Invalidate if CTRL.IM == 1 + */ + ctrl = read_aux_reg(ARC_REG_SLC_CTRL); + + /* Don't rely on default value of IM bit */ + if (!(op & OP_FLUSH)) /* i.e. OP_INV */ + ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */ + else + ctrl |= SLC_CTRL_IM; + + if (op & OP_INV) + ctrl |= SLC_CTRL_RGN_OP_INV; /* Inv or flush-n-inv */ + else + ctrl &= ~SLC_CTRL_RGN_OP_INV; + + write_aux_reg(ARC_REG_SLC_CTRL, ctrl); + + /* + * Lower bits are ignored, no need to clip + * END needs to be setup before START (latter triggers the operation) + * END can't be same as START, so add (l2_line_sz - 1) to sz + */ + write_aux_reg(ARC_REG_SLC_RGN_END, (paddr + sz + l2_line_sz - 1)); + write_aux_reg(ARC_REG_SLC_RGN_START, paddr); + + while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY); + + local_irq_restore(flags); +#endif +} + +static inline int need_slc_flush(void) +{ + return is_isa_arcv2() && l2_line_sz; +} /*********************************************************** * Exported APIs @@ -493,7 +552,7 @@ void flush_dcache_page(struct page *page) } else if (page_mapped(page)) { /* kernel reading from page with U-mapping */ - void *paddr = page_address(page); + unsigned long paddr = (unsigned long)page_address(page); unsigned long vaddr = page->index << PAGE_CACHE_SHIFT; if (addr_not_cache_congruent(paddr, vaddr)) @@ -502,22 +561,30 @@ void flush_dcache_page(struct page *page) } EXPORT_SYMBOL(flush_dcache_page); - void dma_cache_wback_inv(unsigned long start, unsigned long sz) { __dc_line_op_k(start, sz, OP_FLUSH_N_INV); + + if (need_slc_flush()) + slc_op(start, sz, OP_FLUSH_N_INV); } EXPORT_SYMBOL(dma_cache_wback_inv); void dma_cache_inv(unsigned long start, unsigned long sz) { __dc_line_op_k(start, sz, OP_INV); + + if (need_slc_flush()) + slc_op(start, sz, OP_INV); } EXPORT_SYMBOL(dma_cache_inv); void dma_cache_wback(unsigned long start, unsigned long sz) { __dc_line_op_k(start, sz, OP_FLUSH); + + if (need_slc_flush()) + slc_op(start, sz, OP_FLUSH); } EXPORT_SYMBOL(dma_cache_wback); @@ -605,7 +672,7 @@ void __inv_icache_page(unsigned long paddr, unsigned long vaddr) * wrapper to clearout kernel or userspace mappings of a page * For kernel mappings @vaddr == @paddr */ -void ___flush_dcache_page(unsigned long paddr, unsigned long vaddr) +void __flush_dcache_page(unsigned long paddr, unsigned long vaddr) { __dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV); } @@ -637,7 +704,7 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr, u_vaddr &= PAGE_MASK; - ___flush_dcache_page(paddr, u_vaddr); + __flush_dcache_page(paddr, u_vaddr); if (vma->vm_flags & VM_EXEC) __inv_icache_page(paddr, u_vaddr); @@ -663,8 +730,8 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page, void copy_user_highpage(struct page *to, struct page *from, unsigned long u_vaddr, struct vm_area_struct *vma) { - void *kfrom = page_address(from); - void *kto = page_address(to); + unsigned long kfrom = (unsigned long)page_address(from); + unsigned long kto = (unsigned long)page_address(to); int clean_src_k_mappings = 0; /* @@ -680,7 +747,7 @@ void copy_user_highpage(struct page *to, struct page *from, clean_src_k_mappings = 1; } - copy_page(kto, kfrom); + copy_page((void *)kto, (void *)kfrom); /* * Mark DST page K-mapping as dirty for a later finalization by @@ -721,3 +788,56 @@ SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags) flush_cache_all(); return 0; } + +void arc_cache_init(void) +{ + unsigned int __maybe_unused cpu = smp_processor_id(); + char str[256]; + + printk(arc_cache_mumbojumbo(0, str, sizeof(str))); + + if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) { + struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache; + + if (!ic->ver) + panic("cache support enabled but non-existent cache\n"); + + if (ic->line_len != L1_CACHE_BYTES) + panic("ICache line [%d] != kernel Config [%d]", + ic->line_len, L1_CACHE_BYTES); + + if (ic->ver != CONFIG_ARC_MMU_VER) + panic("Cache ver [%d] doesn't match MMU ver [%d]\n", + ic->ver, CONFIG_ARC_MMU_VER); + + /* + * In MMU v4 (HS38x) the alising icache config uses IVIL/PTAG + * pair to provide vaddr/paddr respectively, just as in MMU v3 + */ + if (is_isa_arcv2() && ic->alias) + _cache_line_loop_ic_fn = __cache_line_loop_v3; + else + _cache_line_loop_ic_fn = __cache_line_loop; + } + + if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) { + struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache; + + if (!dc->ver) + panic("cache support enabled but non-existent cache\n"); + + if (dc->line_len != L1_CACHE_BYTES) + panic("DCache line [%d] != kernel Config [%d]", + dc->line_len, L1_CACHE_BYTES); + + /* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */ + if (is_isa_arcompact()) { + int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); + + if (dc->alias && !handled) + panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + else if (!dc->alias && handled) + panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + } + } +} diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 12cc6485b218..74a637a1cfc4 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -14,8 +14,6 @@ * Cache bit off in the TLB entry. * * The default DMA address == Phy address which is 0x8000_0000 based. - * A platform/device can make it zero based, by over-riding - * plat_{dma,kernel}_addr_to_{kernel,dma} */ #include <linux/dma-mapping.h> @@ -37,7 +35,7 @@ void *dma_alloc_noncoherent(struct device *dev, size_t size, return NULL; /* This is bus address, platform dependent */ - *dma_handle = plat_kernel_addr_to_dma(dev, paddr); + *dma_handle = (dma_addr_t)paddr; return paddr; } @@ -46,8 +44,7 @@ EXPORT_SYMBOL(dma_alloc_noncoherent); void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) { - free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle), - size); + free_pages_exact((void *)dma_handle, size); } EXPORT_SYMBOL(dma_free_noncoherent); @@ -67,7 +64,19 @@ void *dma_alloc_coherent(struct device *dev, size_t size, memset(kvaddr, 0, size); /* This is bus address, platform dependent */ - *dma_handle = plat_kernel_addr_to_dma(dev, paddr); + *dma_handle = (dma_addr_t)paddr; + + /* + * Evict any existing L1 and/or L2 lines for the backing page + * in case it was used earlier as a normal "cached" page. + * Yeah this bit us - STAR 9000898266 + * + * Although core does call flush_cache_vmap(), it gets kvaddr hence + * can't be used to efficiently flush L1 and/or L2 which need paddr + * Currently flush_cache_vmap nukes the L1 cache completely which + * will be optimized as a separate commit + */ + dma_cache_wback_inv((unsigned long)paddr, size); return kvaddr; } @@ -78,8 +87,7 @@ void dma_free_coherent(struct device *dev, size_t size, void *kvaddr, { iounmap((void __force __iomem *)kvaddr); - free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle), - size); + free_pages_exact((void *)dma_handle, size); } EXPORT_SYMBOL(dma_free_coherent); diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 7f47d2a56f44..2c7ce8bb7475 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -113,6 +113,8 @@ static inline void __tlb_entry_erase(void) write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); } +#if (CONFIG_ARC_MMU_VER < 4) + static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid) { unsigned int idx; @@ -210,6 +212,28 @@ static void tlb_entry_insert(unsigned int pd0, unsigned int pd1) write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); } +#else /* CONFIG_ARC_MMU_VER >= 4) */ + +static void utlb_invalidate(void) +{ + /* No need since uTLB is always in sync with JTLB */ +} + +static void tlb_entry_erase(unsigned int vaddr_n_asid) +{ + write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid | _PAGE_PRESENT); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry); +} + +static void tlb_entry_insert(unsigned int pd0, unsigned int pd1) +{ + write_aux_reg(ARC_REG_TLBPD0, pd0); + write_aux_reg(ARC_REG_TLBPD1, pd1); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry); +} + +#endif + /* * Un-conditionally (without lookup) erase the entire MMU contents */ @@ -582,23 +606,42 @@ void read_decode_mmu_bcr(void) #endif } *mmu3; + struct bcr_mmu_4 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1, + n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3; +#else + /* DTLB ITLB JES JE JA */ + unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2, + pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8; +#endif + } *mmu4; + tmp = read_aux_reg(ARC_REG_MMU_BCR); mmu->ver = (tmp >> 24); if (mmu->ver <= 2) { mmu2 = (struct bcr_mmu_1_2 *)&tmp; - mmu->pg_sz = PAGE_SIZE; + mmu->pg_sz_k = TO_KB(PAGE_SIZE); mmu->sets = 1 << mmu2->sets; mmu->ways = 1 << mmu2->ways; mmu->u_dtlb = mmu2->u_dtlb; mmu->u_itlb = mmu2->u_itlb; - } else { + } else if (mmu->ver == 3) { mmu3 = (struct bcr_mmu_3 *)&tmp; - mmu->pg_sz = 512 << mmu3->pg_sz; + mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1); mmu->sets = 1 << mmu3->sets; mmu->ways = 1 << mmu3->ways; mmu->u_dtlb = mmu3->u_dtlb; mmu->u_itlb = mmu3->u_itlb; + } else { + mmu4 = (struct bcr_mmu_4 *)&tmp; + mmu->pg_sz_k = 1 << (mmu4->sz0 - 1); + mmu->s_pg_sz_m = 1 << (mmu4->sz1 - 11); + mmu->sets = 64 << mmu4->n_entry; + mmu->ways = mmu4->n_ways * 2; + mmu->u_dtlb = mmu4->u_dtlb * 4; + mmu->u_itlb = mmu4->u_itlb * 4; } mmu->num_tlb = mmu->sets * mmu->ways; @@ -608,10 +651,15 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) { int n = 0; struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu; + char super_pg[64] = ""; + + if (p_mmu->s_pg_sz_m) + scnprintf(super_pg, 64, "%dM Super Page%s, ", + p_mmu->s_pg_sz_m, " (not used)"); n += scnprintf(buf + n, len - n, - "MMU [v%x]\t: %dk PAGE, JTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n", - p_mmu->ver, TO_KB(p_mmu->pg_sz), + "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n", + p_mmu->ver, p_mmu->pg_sz_k, super_pg, p_mmu->num_tlb, p_mmu->sets, p_mmu->ways, p_mmu->u_dtlb, p_mmu->u_itlb, IS_ENABLED(CONFIG_ARC_MMU_SASID) ? ",SASID" : ""); @@ -639,7 +687,7 @@ void arc_mmu_init(void) mmu->ver, CONFIG_ARC_MMU_VER); } - if (mmu->pg_sz != PAGE_SIZE) + if (mmu->pg_sz_k != TO_KB(PAGE_SIZE)) panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE)); /* Enable the MMU */ diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index d572f1c2c724..f6f4c3cb505d 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -35,8 +35,6 @@ * Rahul Trivedi, Amit Bhor: Codito Technologies 2004 */ - .cpu A7 - #include <linux/linkage.h> #include <asm/entry.h> #include <asm/mmu.h> @@ -46,6 +44,7 @@ #include <asm/processor.h> #include <asm/tlb-mmu1.h> +#ifdef CONFIG_ISA_ARCOMPACT ;----------------------------------------------------------------- ; ARC700 Exception Handling doesn't auto-switch stack and it only provides ; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0" @@ -123,6 +122,24 @@ ex_saved_reg1: #endif .endm +#else /* ARCv2 */ + +.macro TLBMISS_FREEUP_REGS + PUSH r0 + PUSH r1 + PUSH r2 + PUSH r3 +.endm + +.macro TLBMISS_RESTORE_REGS + POP r3 + POP r2 + POP r1 + POP r0 +.endm + +#endif + ;============================================================================ ; Troubleshooting Stuff ;============================================================================ @@ -241,6 +258,7 @@ ex_saved_reg1: ; Commit the TLB entry into MMU .macro COMMIT_ENTRY_TO_MMU +#if (CONFIG_ARC_MMU_VER < 4) /* Get free TLB slot: Set = computed from vaddr, way = random */ sr TLBGetIndex, [ARC_REG_TLBCOMMAND] @@ -251,6 +269,10 @@ ex_saved_reg1: #else sr TLBWrite, [ARC_REG_TLBCOMMAND] #endif + +#else + sr TLBInsertEntry, [ARC_REG_TLBCOMMAND] +#endif .endm @@ -291,6 +313,7 @@ ENTRY(EV_TLBMissI) CONV_PTE_TO_TLB COMMIT_ENTRY_TO_MMU TLBMISS_RESTORE_REGS +EV_TLBMissI_fast_ret: ; additional label for VDK OS-kit instrumentation rtie END(EV_TLBMissI) @@ -356,6 +379,7 @@ ENTRY(EV_TLBMissD) COMMIT_ENTRY_TO_MMU TLBMISS_RESTORE_REGS +EV_TLBMissD_fast_ret: ; additional label for VDK OS-kit instrumentation rtie ;-------- Common routine to call Linux Page Fault Handler ----------- @@ -366,19 +390,5 @@ do_slow_path_pf: ; Slow path TLB Miss handled as a regular ARC Exception ; (stack switching / save the complete reg-file). - EXCEPTION_PROLOGUE - - ; ------- setup args for Linux Page fault Hanlder --------- - mov_s r1, sp - lr r0, [efa] - - ; We don't want exceptions to be disabled while the fault is handled. - ; Now that we have saved the context we return from exception hence - ; exceptions get re-enable - - FAKE_RET_FROM_EXCPN r9 - - bl do_page_fault - b ret_from_exception - + b call_do_page_fault END(EV_TLBMissD) diff --git a/arch/arc/plat-arcfpga/Kconfig b/arch/arc/plat-arcfpga/Kconfig deleted file mode 100644 index 217593a70751..000000000000 --- a/arch/arc/plat-arcfpga/Kconfig +++ /dev/null @@ -1,33 +0,0 @@ -# -# Copyright (C) 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. -# - -menuconfig ARC_PLAT_FPGA_LEGACY - bool "\"Legacy\" ARC FPGA dev Boards" - select ARC_HAS_COH_CACHES if SMP - help - Support for ARC development boards, provided by Synopsys. - These are based on FPGA or ISS. e.g. - - ARCAngel4 - - ML509 - - MetaWare ISS - -if ARC_PLAT_FPGA_LEGACY - -config ISS_SMP_EXTN - bool "ARC SMP Extensions (ISS Models only)" - default n - depends on SMP - help - SMP Extensions to ARC700, in a "simulation only" Model, supported in - ARC ISS (Instruction Set Simulator). - The SMP extensions include: - -IDU (Interrupt Distribution Unit) - -XTL (To enable CPU start/stop/set-PC for another CPU) - It doesn't provide coherent Caches and/or Atomic Ops (LLOCK/SCOND) - -endif diff --git a/arch/arc/plat-arcfpga/include/plat/smp.h b/arch/arc/plat-arcfpga/include/plat/smp.h deleted file mode 100644 index c09eb4cfc77c..000000000000 --- a/arch/arc/plat-arcfpga/include/plat/smp.h +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Rajeshwar Ranga: Interrupt Distribution Unit API's - */ - -#ifndef __PLAT_ARCFPGA_SMP_H -#define __PLAT_ARCFPGA_SMP_H - -#ifdef CONFIG_SMP - -#include <linux/types.h> -#include <asm/arcregs.h> - -#define ARC_AUX_IDU_REG_CMD 0x2000 -#define ARC_AUX_IDU_REG_PARAM 0x2001 - -#define ARC_AUX_XTL_REG_CMD 0x2002 -#define ARC_AUX_XTL_REG_PARAM 0x2003 - -#define ARC_REG_MP_BCR 0x2021 - -#define ARC_XTL_CMD_WRITE_PC 0x04 -#define ARC_XTL_CMD_CLEAR_HALT 0x02 - -/* - * Build Configuration Register which identifies the sub-components - */ -struct bcr_mp { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int mp_arch:16, pad:5, sdu:1, idu:1, scu:1, ver:8; -#else - unsigned int ver:8, scu:1, idu:1, sdu:1, pad:5, mp_arch:16; -#endif -}; - -/* IDU supports 256 common interrupts */ -#define NR_IDU_IRQS 256 - -/* - * The Aux Regs layout is same bit-by-bit in both BE/LE modes. - * However when casted as a bitfield encoded "C" struct, gcc treats it as - * memory, generating different code for BE/LE, requiring strcture adj (see - * include/asm/arcregs.h) - * - * However when manually "carving" the value for a Aux, no special handling - * of BE is needed because of the property discribed above - */ -#define IDU_SET_COMMAND(irq, cmd) \ -do { \ - uint32_t __val; \ - __val = (((irq & 0xFF) << 8) | (cmd & 0xFF)); \ - write_aux_reg(ARC_AUX_IDU_REG_CMD, __val); \ -} while (0) - -#define IDU_SET_PARAM(par) write_aux_reg(ARC_AUX_IDU_REG_PARAM, par) -#define IDU_GET_PARAM() read_aux_reg(ARC_AUX_IDU_REG_PARAM) - -/* IDU Commands */ -#define IDU_DISABLE 0x00 -#define IDU_ENABLE 0x01 -#define IDU_IRQ_CLEAR 0x02 -#define IDU_IRQ_ASSERT 0x03 -#define IDU_IRQ_WMODE 0x04 -#define IDU_IRQ_STATUS 0x05 -#define IDU_IRQ_ACK 0x06 -#define IDU_IRQ_PEND 0x07 -#define IDU_IRQ_RMODE 0x08 -#define IDU_IRQ_WBITMASK 0x09 -#define IDU_IRQ_RBITMASK 0x0A - -#define idu_enable() IDU_SET_COMMAND(0, IDU_ENABLE) -#define idu_disable() IDU_SET_COMMAND(0, IDU_DISABLE) - -#define idu_irq_assert(irq) IDU_SET_COMMAND((irq), IDU_IRQ_ASSERT) -#define idu_irq_clear(irq) IDU_SET_COMMAND((irq), IDU_IRQ_CLEAR) - -/* IDU Interrupt Mode - Destination Encoding */ -#define IDU_IRQ_MOD_DISABLE 0x00 -#define IDU_IRQ_MOD_ROUND_RECP 0x01 -#define IDU_IRQ_MOD_TCPU_FIRSTRECP 0x02 -#define IDU_IRQ_MOD_TCPU_ALLRECP 0x03 - -/* IDU Interrupt Mode - Triggering Mode */ -#define IDU_IRQ_MODE_LEVEL_TRIG 0x00 -#define IDU_IRQ_MODE_PULSE_TRIG 0x01 - -#define IDU_IRQ_MODE_PARAM(dest_mode, trig_mode) \ - (((trig_mode & 0x01) << 15) | (dest_mode & 0xFF)) - -struct idu_irq_config { - uint8_t irq; - uint8_t dest_mode; - uint8_t trig_mode; -}; - -struct idu_irq_status { - uint8_t irq; - bool enabled; - bool status; - bool ack; - bool pend; - uint8_t next_rr; -}; - -extern void idu_irq_set_tgtcpu(uint8_t irq, uint32_t mask); -extern void idu_irq_set_mode(uint8_t irq, uint8_t dest_mode, uint8_t trig_mode); - -extern void iss_model_init_smp(unsigned int cpu); -extern void iss_model_init_early_smp(void); - -#endif /* CONFIG_SMP */ - -#endif diff --git a/arch/arc/plat-arcfpga/smp.c b/arch/arc/plat-arcfpga/smp.c deleted file mode 100644 index 64797ba3bbe3..000000000000 --- a/arch/arc/plat-arcfpga/smp.c +++ /dev/null @@ -1,186 +0,0 @@ -/* - * ARC700 Simulation-only Extensions for SMP - * - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Vineet Gupta - 2012 : split off arch common and plat specific SMP - * Rajeshwar Ranga - 2007 : Interrupt Distribution Unit API's - */ - -#include <linux/smp.h> -#include <linux/irq.h> -#include <plat/smp.h> - -#define IDU_INTERRUPT_0 16 - -static char smp_cpuinfo_buf[128]; - -/* - *------------------------------------------------------------------- - * Platform specific callbacks expected by arch SMP code - *------------------------------------------------------------------- - */ - -/* - * Master kick starting another CPU - */ -static void iss_model_smp_wakeup_cpu(int cpu, unsigned long pc) -{ - /* setup the start PC */ - write_aux_reg(ARC_AUX_XTL_REG_PARAM, pc); - - /* Trigger WRITE_PC cmd for this cpu */ - write_aux_reg(ARC_AUX_XTL_REG_CMD, - (ARC_XTL_CMD_WRITE_PC | (cpu << 8))); - - /* Take the cpu out of Halt */ - write_aux_reg(ARC_AUX_XTL_REG_CMD, - (ARC_XTL_CMD_CLEAR_HALT | (cpu << 8))); - -} - -static inline int get_hw_config_num_irq(void) -{ - uint32_t val = read_aux_reg(ARC_REG_VECBASE_BCR); - - switch (val & 0x03) { - case 0: - return 16; - case 1: - return 32; - case 2: - return 8; - default: - return 0; - } - - return 0; -} - -/* - * Any SMP specific init any CPU does when it comes up. - * Here we setup the CPU to enable Inter-Processor-Interrupts - * Called for each CPU - * -Master : init_IRQ() - * -Other(s) : start_kernel_secondary() - */ -void iss_model_init_smp(unsigned int cpu) -{ - /* Check if CPU is configured for more than 16 interrupts */ - if (NR_IRQS <= 16 || get_hw_config_num_irq() <= 16) - panic("[arcfpga] IRQ system can't support IDU IPI\n"); - - idu_disable(); - - /**************************************************************** - * IDU provides a set of Common IRQs, each of which can be dynamically - * attached to (1|many|all) CPUs. - * The Common IRQs [0-15] are mapped as CPU pvt [16-31] - * - * Here we use a simple 1:1 mapping: - * A CPU 'x' is wired to Common IRQ 'x'. - * So an IDU ASSERT on IRQ 'x' will trigger Interupt on CPU 'x', which - * makes up for our simple IPI plumbing. - * - * TBD: Have a dedicated multicast IRQ for sending IPIs to all CPUs - * w/o having to do one-at-a-time - ******************************************************************/ - - /* - * Claim an IRQ which would trigger IPI on this CPU. - * In IDU parlance it involves setting up a cpu bitmask for the IRQ - * The bitmap here contains only 1 CPU (self). - */ - idu_irq_set_tgtcpu(cpu, 0x1 << cpu); - - /* Set the IRQ destination to use the bitmask above */ - idu_irq_set_mode(cpu, 7, /* XXX: IDU_IRQ_MOD_TCPU_ALLRECP: ISS bug */ - IDU_IRQ_MODE_PULSE_TRIG); - - idu_enable(); - - /* Attach the arch-common IPI ISR to our IDU IRQ */ - smp_ipi_irq_setup(cpu, IDU_INTERRUPT_0 + cpu); -} - -static void iss_model_ipi_send(int cpu) -{ - idu_irq_assert(cpu); -} - -static void iss_model_ipi_clear(int irq) -{ - idu_irq_clear(IDU_INTERRUPT_0 + smp_processor_id()); -} - -void iss_model_init_early_smp(void) -{ -#define IS_AVAIL1(var, str) ((var) ? str : "") - - struct bcr_mp mp; - - READ_BCR(ARC_REG_MP_BCR, mp); - - sprintf(smp_cpuinfo_buf, "Extn [ISS-SMP]: v%d, arch(%d) %s %s %s\n", - mp.ver, mp.mp_arch, IS_AVAIL1(mp.scu, "SCU"), - IS_AVAIL1(mp.idu, "IDU"), IS_AVAIL1(mp.sdu, "SDU")); - - plat_smp_ops.info = smp_cpuinfo_buf; - - plat_smp_ops.cpu_kick = iss_model_smp_wakeup_cpu; - plat_smp_ops.ipi_send = iss_model_ipi_send; - plat_smp_ops.ipi_clear = iss_model_ipi_clear; -} - -/* - *------------------------------------------------------------------- - * Low level Platform IPI Providers - *------------------------------------------------------------------- - */ - -/* Set the Mode for the Common IRQ */ -void idu_irq_set_mode(uint8_t irq, uint8_t dest_mode, uint8_t trig_mode) -{ - uint32_t par = IDU_IRQ_MODE_PARAM(dest_mode, trig_mode); - - IDU_SET_PARAM(par); - IDU_SET_COMMAND(irq, IDU_IRQ_WMODE); -} - -/* Set the target cpu Bitmask for Common IRQ */ -void idu_irq_set_tgtcpu(uint8_t irq, uint32_t mask) -{ - IDU_SET_PARAM(mask); - IDU_SET_COMMAND(irq, IDU_IRQ_WBITMASK); -} - -/* Get the Interrupt Acknowledged status for IRQ (as CPU Bitmask) */ -bool idu_irq_get_ack(uint8_t irq) -{ - uint32_t val; - - IDU_SET_COMMAND(irq, IDU_IRQ_ACK); - val = IDU_GET_PARAM(); - - return val & (1 << irq); -} - -/* - * Get the Interrupt Pending status for IRQ (as CPU Bitmask) - * -Pending means CPU has not yet noticed the IRQ (e.g. disabled) - * -After Interrupt has been taken, the IPI expcitily needs to be - * cleared, to be acknowledged. - */ -bool idu_irq_get_pend(uint8_t irq) -{ - uint32_t val; - - IDU_SET_COMMAND(irq, IDU_IRQ_PEND); - val = IDU_GET_PARAM(); - - return val & (1 << irq); -} diff --git a/arch/arc/plat-axs10x/Kconfig b/arch/arc/plat-axs10x/Kconfig new file mode 100644 index 000000000000..d475f9d4847c --- /dev/null +++ b/arch/arc/plat-axs10x/Kconfig @@ -0,0 +1,46 @@ +# +# Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com) +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# + +menuconfig ARC_PLAT_AXS10X + bool "Synopsys ARC AXS10x Software Development Platforms" + select DW_APB_ICTL + select GPIO_DWAPB + select OF_GPIO + select GENERIC_IRQ_CHIP + select ARCH_REQUIRE_GPIOLIB + help + Support for the ARC AXS10x Software Development Platforms. + + The AXS10x Platforms consist of a mainboard with peripherals, + on which several daughter cards can be placed. The daughter cards + typically contain a CPU and memory. + +if ARC_PLAT_AXS10X + +config AXS101 + depends on ISA_ARCOMPACT + bool "AXS101 with AXC001 CPU Card (ARC 770D/EM6/AS221)" + help + This adds support for the 770D/EM6/AS221 CPU Card. Only the ARC + 770D is supported in Linux. + + The AXS101 Platform consists of an AXS10x mainboard with + this daughtercard. Please use the axs101.dts device tree + with this configuration. + +config AXS103 + bool "AXS103 with AXC003 CPU Card (ARC HS38x)" + depends on ISA_ARCV2 + help + This adds support for the HS38x CPU Card. + + The AXS103 Platform consists of an AXS10x mainboard with + this daughtercard. Please use the axs103.dts device tree + with this configuration. + +endif diff --git a/arch/arc/plat-axs10x/Makefile b/arch/arc/plat-axs10x/Makefile new file mode 100644 index 000000000000..d4748f27f86e --- /dev/null +++ b/arch/arc/plat-axs10x/Makefile @@ -0,0 +1,9 @@ +# +# Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com) +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# + +obj-$(CONFIG_ARC_PLAT_AXS10X) += axs10x.o diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c new file mode 100644 index 000000000000..99f7da513a48 --- /dev/null +++ b/arch/arc/plat-axs10x/axs10x.c @@ -0,0 +1,484 @@ +/* + * AXS101/AXS103 Software Development Platform + * + * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/of_platform.h> + +#include <asm/asm-offsets.h> +#include <asm/clk.h> +#include <asm/io.h> +#include <asm/mach_desc.h> +#include <asm/mcip.h> + +#define AXS_MB_CGU 0xE0010000 +#define AXS_MB_CREG 0xE0011000 + +#define CREG_MB_IRQ_MUX (AXS_MB_CREG + 0x214) +#define CREG_MB_SW_RESET (AXS_MB_CREG + 0x220) +#define CREG_MB_VER (AXS_MB_CREG + 0x230) +#define CREG_MB_CONFIG (AXS_MB_CREG + 0x234) + +#define AXC001_CREG 0xF0001000 +#define AXC001_GPIO_INTC 0xF0003000 + +static void __init axs10x_enable_gpio_intc_wire(void) +{ + /* + * Peripherals on CPU Card and Mother Board are wired to cpu intc via + * intermediate DW APB GPIO blocks (mainly for debouncing) + * + * --------------------- + * | snps,arc700-intc | + * --------------------- + * | #7 | #15 + * ------------------- ------------------- + * | snps,dw-apb-gpio | | snps,dw-apb-gpio | + * ------------------- ------------------- + * | | + * | [ Debug UART on cpu card ] + * | + * ------------------------ + * | snps,dw-apb-intc (MB)| + * ------------------------ + * | | | | + * [eth] [uart] [... other perip on Main Board] + * + * Current implementation of "irq-dw-apb-ictl" driver doesn't work well + * with stacked INTCs. In particular problem happens if its master INTC + * not yet instantiated. See discussion here - + * https://lkml.org/lkml/2015/3/4/755 + * + * So setup the first gpio block as a passive pass thru and hide it from + * DT hardware topology - connect MB intc directly to cpu intc + * The GPIO "wire" needs to be init nevertheless (here) + * + * One side adv is that peripheral interrupt handling avoids one nested + * intc ISR hop + */ +#define GPIO_INTEN (AXC001_GPIO_INTC + 0x30) +#define GPIO_INTMASK (AXC001_GPIO_INTC + 0x34) +#define GPIO_INTTYPE_LEVEL (AXC001_GPIO_INTC + 0x38) +#define GPIO_INT_POLARITY (AXC001_GPIO_INTC + 0x3c) +#define MB_TO_GPIO_IRQ 12 + + iowrite32(~(1 << MB_TO_GPIO_IRQ), (void __iomem *) GPIO_INTMASK); + iowrite32(0, (void __iomem *) GPIO_INTTYPE_LEVEL); + iowrite32(~0, (void __iomem *) GPIO_INT_POLARITY); + iowrite32(1 << MB_TO_GPIO_IRQ, (void __iomem *) GPIO_INTEN); +} + +static inline void __init +write_cgu_reg(uint32_t value, void __iomem *reg, void __iomem *lock_reg) +{ + unsigned int loops = 128 * 1024, ctr; + + iowrite32(value, reg); + + ctr = loops; + while (((ioread32(lock_reg) & 1) == 1) && ctr--) /* wait for unlock */ + cpu_relax(); + + ctr = loops; + while (((ioread32(lock_reg) & 1) == 0) && ctr--) /* wait for re-lock */ + cpu_relax(); +} + +static void __init axs10x_print_board_ver(unsigned int creg, const char *str) +{ + union ver { + struct { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:11, y:12, m:4, d:5; +#else + unsigned int d:5, m:4, y:12, pad:11; +#endif + }; + unsigned int val; + } board; + + board.val = ioread32((void __iomem *)creg); + pr_info("AXS: %s FPGA Date: %u-%u-%u\n", str, board.d, board.m, + board.y); +} + +static void __init axs10x_early_init(void) +{ + int mb_rev; + char mb[32]; + + /* Determine motherboard version */ + if (ioread32((void __iomem *) CREG_MB_CONFIG) & (1 << 28)) + mb_rev = 3; /* HT-3 (rev3.0) */ + else + mb_rev = 2; /* HT-2 (rev2.0) */ + + axs10x_enable_gpio_intc_wire(); + + scnprintf(mb, 32, "MainBoard v%d", mb_rev); + axs10x_print_board_ver(CREG_MB_VER, mb); +} + +#ifdef CONFIG_AXS101 + +#define CREG_CPU_ADDR_770 (AXC001_CREG + 0x20) +#define CREG_CPU_ADDR_TUNN (AXC001_CREG + 0x60) +#define CREG_CPU_ADDR_770_UPD (AXC001_CREG + 0x34) +#define CREG_CPU_ADDR_TUNN_UPD (AXC001_CREG + 0x74) + +#define CREG_CPU_ARC770_IRQ_MUX (AXC001_CREG + 0x114) +#define CREG_CPU_GPIO_UART_MUX (AXC001_CREG + 0x120) + +/* + * Set up System Memory Map for ARC cpu / peripherals controllers + * + * Each AXI master has a 4GB memory map specified as 16 apertures of 256MB, each + * of which maps to a corresponding 256MB aperture in Target slave memory map. + * + * e.g. ARC cpu AXI Master's aperture 8 (0x8000_0000) is mapped to aperture 0 + * (0x0000_0000) of DDR Port 0 (slave #1) + * + * Access from cpu to MB controllers such as GMAC is setup using AXI Tunnel: + * which has master/slaves on both ends. + * e.g. aperture 14 (0xE000_0000) of ARC cpu is mapped to aperture 14 + * (0xE000_0000) of CPU Card AXI Tunnel slave (slave #3) which is mapped to + * MB AXI Tunnel Master, which also has a mem map setup + * + * In the reverse direction, MB AXI Masters (e.g. GMAC) mem map is setup + * to map to MB AXI Tunnel slave which connects to CPU Card AXI Tunnel Master + */ +struct aperture { + unsigned int slave_sel:4, slave_off:4, pad:24; +}; + +/* CPU Card target slaves */ +#define AXC001_SLV_NONE 0 +#define AXC001_SLV_DDR_PORT0 1 +#define AXC001_SLV_SRAM 2 +#define AXC001_SLV_AXI_TUNNEL 3 +#define AXC001_SLV_AXI2APB 6 +#define AXC001_SLV_DDR_PORT1 7 + +/* MB AXI Target slaves */ +#define AXS_MB_SLV_NONE 0 +#define AXS_MB_SLV_AXI_TUNNEL_CPU 1 +#define AXS_MB_SLV_AXI_TUNNEL_HAPS 2 +#define AXS_MB_SLV_SRAM 3 +#define AXS_MB_SLV_CONTROL 4 + +/* MB AXI masters */ +#define AXS_MB_MST_TUNNEL_CPU 0 +#define AXS_MB_MST_USB_OHCI 10 + +/* + * memmap for ARC core on CPU Card + */ +static const struct aperture axc001_memmap[16] = { + {AXC001_SLV_AXI_TUNNEL, 0x0}, + {AXC001_SLV_AXI_TUNNEL, 0x1}, + {AXC001_SLV_SRAM, 0x0}, /* 0x2000_0000: Local SRAM */ + {AXC001_SLV_NONE, 0x0}, + {AXC001_SLV_NONE, 0x0}, + {AXC001_SLV_NONE, 0x0}, + {AXC001_SLV_NONE, 0x0}, + {AXC001_SLV_NONE, 0x0}, + {AXC001_SLV_DDR_PORT0, 0x0}, /* 0x8000_0000: DDR 0..256M */ + {AXC001_SLV_DDR_PORT0, 0x1}, /* 0x9000_0000: DDR 256..512M */ + {AXC001_SLV_DDR_PORT0, 0x2}, + {AXC001_SLV_DDR_PORT0, 0x3}, + {AXC001_SLV_NONE, 0x0}, + {AXC001_SLV_AXI_TUNNEL, 0xD}, + {AXC001_SLV_AXI_TUNNEL, 0xE}, /* MB: CREG, CGU... */ + {AXC001_SLV_AXI2APB, 0x0}, /* CPU Card local CREG, CGU... */ +}; + +/* + * memmap for CPU Card AXI Tunnel Master (for access by MB controllers) + * GMAC (MB) -> MB AXI Tunnel slave -> CPU Card AXI Tunnel Master -> DDR + */ +static const struct aperture axc001_axi_tunnel_memmap[16] = { + {AXC001_SLV_AXI_TUNNEL, 0x0}, + {AXC001_SLV_AXI_TUNNEL, 0x1}, + {AXC001_SLV_SRAM, 0x0}, + {AXC001_SLV_NONE, 0x0}, + {AXC001_SLV_NONE, 0x0}, + {AXC001_SLV_NONE, 0x0}, + {AXC001_SLV_NONE, 0x0}, + {AXC001_SLV_NONE, 0x0}, + {AXC001_SLV_DDR_PORT1, 0x0}, + {AXC001_SLV_DDR_PORT1, 0x1}, + {AXC001_SLV_DDR_PORT1, 0x2}, + {AXC001_SLV_DDR_PORT1, 0x3}, + {AXC001_SLV_NONE, 0x0}, + {AXC001_SLV_AXI_TUNNEL, 0xD}, + {AXC001_SLV_AXI_TUNNEL, 0xE}, + {AXC001_SLV_AXI2APB, 0x0}, +}; + +/* + * memmap for MB AXI Masters + * Same mem map for all perip controllers as well as MB AXI Tunnel Master + */ +static const struct aperture axs_mb_memmap[16] = { + {AXS_MB_SLV_SRAM, 0x0}, + {AXS_MB_SLV_SRAM, 0x0}, + {AXS_MB_SLV_NONE, 0x0}, + {AXS_MB_SLV_NONE, 0x0}, + {AXS_MB_SLV_NONE, 0x0}, + {AXS_MB_SLV_NONE, 0x0}, + {AXS_MB_SLV_NONE, 0x0}, + {AXS_MB_SLV_NONE, 0x0}, + {AXS_MB_SLV_AXI_TUNNEL_CPU, 0x8}, /* DDR on CPU Card */ + {AXS_MB_SLV_AXI_TUNNEL_CPU, 0x9}, /* DDR on CPU Card */ + {AXS_MB_SLV_AXI_TUNNEL_CPU, 0xA}, + {AXS_MB_SLV_AXI_TUNNEL_CPU, 0xB}, + {AXS_MB_SLV_NONE, 0x0}, + {AXS_MB_SLV_AXI_TUNNEL_HAPS, 0xD}, + {AXS_MB_SLV_CONTROL, 0x0}, /* MB Local CREG, CGU... */ + {AXS_MB_SLV_AXI_TUNNEL_CPU, 0xF}, +}; + +static noinline void __init +axs101_set_memmap(void __iomem *base, const struct aperture map[16]) +{ + unsigned int slave_select, slave_offset; + int i; + + slave_select = slave_offset = 0; + for (i = 0; i < 8; i++) { + slave_select |= map[i].slave_sel << (i << 2); + slave_offset |= map[i].slave_off << (i << 2); + } + + iowrite32(slave_select, base + 0x0); /* SLV0 */ + iowrite32(slave_offset, base + 0x8); /* OFFSET0 */ + + slave_select = slave_offset = 0; + for (i = 0; i < 8; i++) { + slave_select |= map[i+8].slave_sel << (i << 2); + slave_offset |= map[i+8].slave_off << (i << 2); + } + + iowrite32(slave_select, base + 0x4); /* SLV1 */ + iowrite32(slave_offset, base + 0xC); /* OFFSET1 */ +} + +static void __init axs101_early_init(void) +{ + int i; + + /* ARC 770D memory view */ + axs101_set_memmap((void __iomem *) CREG_CPU_ADDR_770, axc001_memmap); + iowrite32(1, (void __iomem *) CREG_CPU_ADDR_770_UPD); + + /* AXI tunnel memory map (incoming traffic from MB into CPU Card */ + axs101_set_memmap((void __iomem *) CREG_CPU_ADDR_TUNN, + axc001_axi_tunnel_memmap); + iowrite32(1, (void __iomem *) CREG_CPU_ADDR_TUNN_UPD); + + /* MB peripherals memory map */ + for (i = AXS_MB_MST_TUNNEL_CPU; i <= AXS_MB_MST_USB_OHCI; i++) + axs101_set_memmap((void __iomem *) AXS_MB_CREG + (i << 4), + axs_mb_memmap); + + iowrite32(0x3ff, (void __iomem *) AXS_MB_CREG + 0x100); /* Update */ + + /* GPIO pins 18 and 19 are used as UART rx and tx, respectively. */ + iowrite32(0x01, (void __iomem *) CREG_CPU_GPIO_UART_MUX); + + /* Set up the MB interrupt system: mux interrupts to GPIO7) */ + iowrite32(0x01, (void __iomem *) CREG_MB_IRQ_MUX); + + /* reset ethernet and ULPI interfaces */ + iowrite32(0x18, (void __iomem *) CREG_MB_SW_RESET); + + /* map GPIO 14:10 to ARC 9:5 (IRQ mux change for MB v2 onwards) */ + iowrite32(0x52, (void __iomem *) CREG_CPU_ARC770_IRQ_MUX); + + axs10x_early_init(); +} + +#endif /* CONFIG_AXS101 */ + +#ifdef CONFIG_AXS103 + +#define AXC003_CGU 0xF0000000 +#define AXC003_CREG 0xF0001000 +#define AXC003_MST_AXI_TUNNEL 0 +#define AXC003_MST_HS38 1 + +#define CREG_CPU_AXI_M0_IRQ_MUX (AXC003_CREG + 0x440) +#define CREG_CPU_GPIO_UART_MUX (AXC003_CREG + 0x480) +#define CREG_CPU_TUN_IO_CTRL (AXC003_CREG + 0x494) + + +union pll_reg { + struct { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:17, noupd:1, bypass:1, edge:1, high:6, low:6; +#else + unsigned int low:6, high:6, edge:1, bypass:1, noupd:1, pad:17; +#endif + }; + unsigned int val; +}; + +static unsigned int __init axs103_get_freq(void) +{ + union pll_reg idiv, fbdiv, odiv; + unsigned int f = 33333333; + + idiv.val = ioread32((void __iomem *)AXC003_CGU + 0x80 + 0); + fbdiv.val = ioread32((void __iomem *)AXC003_CGU + 0x80 + 4); + odiv.val = ioread32((void __iomem *)AXC003_CGU + 0x80 + 8); + + if (idiv.bypass != 1) + f = f / (idiv.low + idiv.high); + + if (fbdiv.bypass != 1) + f = f * (fbdiv.low + fbdiv.high); + + if (odiv.bypass != 1) + f = f / (odiv.low + odiv.high); + + f = (f + 500000) / 1000000; /* Rounding */ + return f; +} + +static inline unsigned int __init encode_div(unsigned int id, int upd) +{ + union pll_reg div; + + div.val = 0; + + div.noupd = !upd; + div.bypass = id == 1 ? 1 : 0; + div.edge = (id%2 == 0) ? 0 : 1; /* 0 = rising */ + div.low = (id%2 == 0) ? id >> 1 : (id >> 1)+1; + div.high = id >> 1; + + return div.val; +} + +noinline static void __init +axs103_set_freq(unsigned int id, unsigned int fd, unsigned int od) +{ + write_cgu_reg(encode_div(id, 0), + (void __iomem *)AXC003_CGU + 0x80 + 0, + (void __iomem *)AXC003_CGU + 0x110); + + write_cgu_reg(encode_div(fd, 0), + (void __iomem *)AXC003_CGU + 0x80 + 4, + (void __iomem *)AXC003_CGU + 0x110); + + write_cgu_reg(encode_div(od, 1), + (void __iomem *)AXC003_CGU + 0x80 + 8, + (void __iomem *)AXC003_CGU + 0x110); +} + +static void __init axs103_early_init(void) +{ + switch (arc_get_core_freq()/1000000) { + case 33: + axs103_set_freq(1, 1, 1); + break; + case 50: + axs103_set_freq(1, 30, 20); + break; + case 75: + axs103_set_freq(2, 45, 10); + break; + case 90: + axs103_set_freq(2, 54, 10); + break; + case 100: + axs103_set_freq(1, 30, 10); + break; + case 125: + axs103_set_freq(2, 45, 6); + break; + default: + /* + * In this case, core_frequency derived from + * DT "clock-frequency" might not match with board value. + * Hence update it to match the board value. + */ + arc_set_core_freq(axs103_get_freq() * 1000000); + break; + } + + pr_info("Freq is %dMHz\n", axs103_get_freq()); + + /* Memory maps already config in pre-bootloader */ + + /* set GPIO mux to UART */ + iowrite32(0x01, (void __iomem *) CREG_CPU_GPIO_UART_MUX); + + iowrite32((0x00100000U | 0x000C0000U | 0x00003322U), + (void __iomem *) CREG_CPU_TUN_IO_CTRL); + + /* Set up the AXS_MB interrupt system.*/ + iowrite32(12, (void __iomem *) (CREG_CPU_AXI_M0_IRQ_MUX + + (AXC003_MST_HS38 << 2))); + + /* connect ICTL - Main Board with GPIO line */ + iowrite32(0x01, (void __iomem *) CREG_MB_IRQ_MUX); + + axs10x_print_board_ver(AXC003_CREG + 4088, "AXC003 CPU Card"); + + axs10x_early_init(); + +#ifdef CONFIG_ARC_MCIP + /* No Hardware init, but filling the smp ops callbacks */ + mcip_init_early_smp(); +#endif +} +#endif + +#ifdef CONFIG_AXS101 + +static const char *axs101_compat[] __initconst = { + "snps,axs101", + NULL, +}; + +MACHINE_START(AXS101, "axs101") + .dt_compat = axs101_compat, + .init_early = axs101_early_init, +MACHINE_END + +#endif /* CONFIG_AXS101 */ + +#ifdef CONFIG_AXS103 + +static const char *axs103_compat[] __initconst = { + "snps,axs103", + NULL, +}; + +MACHINE_START(AXS103, "axs103") + .dt_compat = axs103_compat, + .init_early = axs103_early_init, +#ifdef CONFIG_ARC_MCIP + .init_smp = mcip_init_smp, +#endif +MACHINE_END + +/* + * For the VDK OS-kit, to get the offset to pid and command fields + */ +char coware_swa_pid_offset[TASK_PID]; +char coware_swa_comm_offset[TASK_COMM]; + +#endif /* CONFIG_AXS103 */ diff --git a/arch/arc/plat-sim/Kconfig b/arch/arc/plat-sim/Kconfig new file mode 100644 index 000000000000..18e39fcc488a --- /dev/null +++ b/arch/arc/plat-sim/Kconfig @@ -0,0 +1,14 @@ +# +# Copyright (C) 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# + +menuconfig ARC_PLAT_SIM + bool "ARC nSIM based simulation virtual platforms" + select ARC_HAS_COH_CACHES if SMP + help + Support for nSIM based ARC simulation platforms + This includes the standalone nSIM (uart only) vs. System C OSCI VP diff --git a/arch/arc/plat-arcfpga/Makefile b/arch/arc/plat-sim/Makefile index 66fd0ecd68b3..00b1a958cec7 100644 --- a/arch/arc/plat-arcfpga/Makefile +++ b/arch/arc/plat-sim/Makefile @@ -6,7 +6,4 @@ # published by the Free Software Foundation. # -KBUILD_CFLAGS += -Iarch/arc/plat-arcfpga/include - obj-y := platform.o -obj-$(CONFIG_ISS_SMP_EXTN) += smp.o diff --git a/arch/arc/plat-arcfpga/platform.c b/arch/arc/plat-sim/platform.c index afc88254acc1..d9e35b4a2f08 100644 --- a/arch/arc/plat-arcfpga/platform.c +++ b/arch/arc/plat-sim/platform.c @@ -1,5 +1,5 @@ /* - * ARC FPGA Platform support code + * ARC simulation Platform support code * * Copyright (C) 2012 Synopsys, Inc. (www.synopsys.com) * @@ -10,7 +10,7 @@ #include <linux/init.h> #include <asm/mach_desc.h> -#include <plat/smp.h> +#include <asm/mcip.h> /*----------------------- Machine Descriptions ------------------------------ * @@ -20,26 +20,18 @@ * callback set, by matching the DT compatible name. */ -static const char *legacy_fpga_compat[] __initconst = { - "snps,arc-angel4", - "snps,arc-ml509", - NULL, -}; - -MACHINE_START(LEGACY_FPGA, "legacy_fpga") - .dt_compat = legacy_fpga_compat, -#ifdef CONFIG_ISS_SMP_EXTN - .init_early = iss_model_init_early_smp, - .init_smp = iss_model_init_smp, -#endif -MACHINE_END - static const char *simulation_compat[] __initconst = { "snps,nsim", + "snps,nsim_hs", "snps,nsimosci", + "snps,nsimosci_hs", NULL, }; MACHINE_START(SIMULATION, "simulation") .dt_compat = simulation_compat, +#ifdef CONFIG_ARC_MCIP + .init_early = mcip_init_early_smp, + .init_smp = mcip_init_smp, +#endif MACHINE_END diff --git a/arch/arm/boot/dts/armada-370-xp.dtsi b/arch/arm/boot/dts/armada-370-xp.dtsi index 7f0252c580e4..a718866ba52d 100644 --- a/arch/arm/boot/dts/armada-370-xp.dtsi +++ b/arch/arm/boot/dts/armada-370-xp.dtsi @@ -268,7 +268,6 @@ }; eth0: ethernet@70000 { - compatible = "marvell,armada-370-neta"; reg = <0x70000 0x4000>; interrupts = <8>; clocks = <&gateclk 4>; @@ -284,7 +283,6 @@ }; eth1: ethernet@74000 { - compatible = "marvell,armada-370-neta"; reg = <0x74000 0x4000>; interrupts = <10>; clocks = <&gateclk 3>; diff --git a/arch/arm/boot/dts/armada-370.dtsi b/arch/arm/boot/dts/armada-370.dtsi index 3f036bd635f4..53a1a5abe147 100644 --- a/arch/arm/boot/dts/armada-370.dtsi +++ b/arch/arm/boot/dts/armada-370.dtsi @@ -311,6 +311,14 @@ dmacap,memset; }; }; + + ethernet@70000 { + compatible = "marvell,armada-370-neta"; + }; + + ethernet@74000 { + compatible = "marvell,armada-370-neta"; + }; }; }; }; diff --git a/arch/arm/boot/dts/armada-xp-mv78260.dtsi b/arch/arm/boot/dts/armada-xp-mv78260.dtsi index 8479fdc9e9c2..c5fdc99f0dbe 100644 --- a/arch/arm/boot/dts/armada-xp-mv78260.dtsi +++ b/arch/arm/boot/dts/armada-xp-mv78260.dtsi @@ -318,7 +318,7 @@ }; eth3: ethernet@34000 { - compatible = "marvell,armada-370-neta"; + compatible = "marvell,armada-xp-neta"; reg = <0x34000 0x4000>; interrupts = <14>; clocks = <&gateclk 1>; diff --git a/arch/arm/boot/dts/armada-xp-mv78460.dtsi b/arch/arm/boot/dts/armada-xp-mv78460.dtsi index 661d54c81580..0e24f1a38540 100644 --- a/arch/arm/boot/dts/armada-xp-mv78460.dtsi +++ b/arch/arm/boot/dts/armada-xp-mv78460.dtsi @@ -356,7 +356,7 @@ }; eth3: ethernet@34000 { - compatible = "marvell,armada-370-neta"; + compatible = "marvell,armada-xp-neta"; reg = <0x34000 0x4000>; interrupts = <14>; clocks = <&gateclk 1>; diff --git a/arch/arm/boot/dts/armada-xp.dtsi b/arch/arm/boot/dts/armada-xp.dtsi index e78ce4ab6b75..0854d4493da7 100644 --- a/arch/arm/boot/dts/armada-xp.dtsi +++ b/arch/arm/boot/dts/armada-xp.dtsi @@ -185,7 +185,7 @@ }; eth2: ethernet@30000 { - compatible = "marvell,armada-370-neta"; + compatible = "marvell,armada-xp-neta"; reg = <0x30000 0x4000>; interrupts = <12>; clocks = <&gateclk 2>; @@ -228,6 +228,14 @@ }; }; + ethernet@70000 { + compatible = "marvell,armada-xp-neta"; + }; + + ethernet@74000 { + compatible = "marvell,armada-xp-neta"; + }; + xor@f0900 { compatible = "marvell,orion-xor"; reg = <0xF0900 0x100 diff --git a/arch/arm/include/asm/xen/hypervisor.h b/arch/arm/include/asm/xen/hypervisor.h index 1317ee40f4df..04ff8e7b37df 100644 --- a/arch/arm/include/asm/xen/hypervisor.h +++ b/arch/arm/include/asm/xen/hypervisor.h @@ -1,6 +1,8 @@ #ifndef _ASM_ARM_XEN_HYPERVISOR_H #define _ASM_ARM_XEN_HYPERVISOR_H +#include <linux/init.h> + extern struct shared_info *HYPERVISOR_shared_info; extern struct start_info *xen_start_info; @@ -18,4 +20,10 @@ static inline enum paravirt_lazy_mode paravirt_get_lazy_mode(void) extern struct dma_map_ops *xen_dma_ops; +#ifdef CONFIG_XEN +void __init xen_early_init(void); +#else +static inline void xen_early_init(void) { return; } +#endif + #endif /* _ASM_ARM_XEN_HYPERVISOR_H */ diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h index 0b579b2f4e0e..1bee8ca12494 100644 --- a/arch/arm/include/asm/xen/page.h +++ b/arch/arm/include/asm/xen/page.h @@ -12,7 +12,6 @@ #include <xen/interface/grant_table.h> #define phys_to_machine_mapping_valid(pfn) (1) -#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) #define pte_mfn pte_pfn #define mfn_pte pfn_pte diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index e6d8c7658ffd..36c18b73c1f4 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -46,6 +46,7 @@ #include <asm/cacheflush.h> #include <asm/cachetype.h> #include <asm/tlbflush.h> +#include <asm/xen/hypervisor.h> #include <asm/prom.h> #include <asm/mach/arch.h> @@ -972,6 +973,7 @@ void __init setup_arch(char **cmdline_p) arm_dt_init_cpu_maps(); psci_init(); + xen_early_init(); #ifdef CONFIG_SMP if (is_smp()) { if (!mdesc->smp_init || !mdesc->smp_init()) { diff --git a/arch/arm/mach-lpc32xx/irq.c b/arch/arm/mach-lpc32xx/irq.c index 9ecb8f9c4ef5..d4f7dc87042b 100644 --- a/arch/arm/mach-lpc32xx/irq.c +++ b/arch/arm/mach-lpc32xx/irq.c @@ -283,25 +283,25 @@ static int lpc32xx_set_irq_type(struct irq_data *d, unsigned int type) case IRQ_TYPE_EDGE_RISING: /* Rising edge sensitive */ __lpc32xx_set_irq_type(d->hwirq, 1, 1); - __irq_set_handler_locked(d->hwirq, handle_edge_irq); + __irq_set_handler_locked(d->irq, handle_edge_irq); break; case IRQ_TYPE_EDGE_FALLING: /* Falling edge sensitive */ __lpc32xx_set_irq_type(d->hwirq, 0, 1); - __irq_set_handler_locked(d->hwirq, handle_edge_irq); + __irq_set_handler_locked(d->irq, handle_edge_irq); break; case IRQ_TYPE_LEVEL_LOW: /* Low level sensitive */ __lpc32xx_set_irq_type(d->hwirq, 0, 0); - __irq_set_handler_locked(d->hwirq, handle_level_irq); + __irq_set_handler_locked(d->irq, handle_level_irq); break; case IRQ_TYPE_LEVEL_HIGH: /* High level sensitive */ __lpc32xx_set_irq_type(d->hwirq, 1, 0); - __irq_set_handler_locked(d->hwirq, handle_level_irq); + __irq_set_handler_locked(d->irq, handle_level_irq); break; /* Other modes are not supported */ diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 7d0f07020c80..6c09cc440a2b 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -24,6 +24,7 @@ #include <linux/cpuidle.h> #include <linux/cpufreq.h> #include <linux/cpu.h> +#include <linux/console.h> #include <linux/mm.h> @@ -51,7 +52,9 @@ EXPORT_SYMBOL_GPL(xen_have_vector_callback); int xen_platform_pci_unplug = XEN_UNPLUG_ALL; EXPORT_SYMBOL_GPL(xen_platform_pci_unplug); -static __read_mostly int xen_events_irq = -1; +static __read_mostly unsigned int xen_events_irq; + +static __initdata struct device_node *xen_node; int xen_remap_domain_mfn_array(struct vm_area_struct *vma, unsigned long addr, @@ -150,40 +153,28 @@ static irqreturn_t xen_arm_callback(int irq, void *arg) * documentation of the Xen Device Tree format. */ #define GRANT_TABLE_PHYSADDR 0 -static int __init xen_guest_init(void) +void __init xen_early_init(void) { - struct xen_add_to_physmap xatp; - static struct shared_info *shared_info_page = 0; - struct device_node *node; int len; const char *s = NULL; const char *version = NULL; const char *xen_prefix = "xen,xen-"; - struct resource res; - phys_addr_t grant_frames; - node = of_find_compatible_node(NULL, NULL, "xen,xen"); - if (!node) { + xen_node = of_find_compatible_node(NULL, NULL, "xen,xen"); + if (!xen_node) { pr_debug("No Xen support\n"); - return 0; + return; } - s = of_get_property(node, "compatible", &len); + s = of_get_property(xen_node, "compatible", &len); if (strlen(xen_prefix) + 3 < len && !strncmp(xen_prefix, s, strlen(xen_prefix))) version = s + strlen(xen_prefix); if (version == NULL) { pr_debug("Xen version not found\n"); - return 0; + return; } - if (of_address_to_resource(node, GRANT_TABLE_PHYSADDR, &res)) - return 0; - grant_frames = res.start; - xen_events_irq = irq_of_parse_and_map(node, 0); - pr_info("Xen %s support found, events_irq=%d gnttab_frame=%pa\n", - version, xen_events_irq, &grant_frames); - if (xen_events_irq < 0) - return -ENODEV; + pr_info("Xen %s support found\n", version); xen_domain_type = XEN_HVM_DOMAIN; @@ -194,9 +185,34 @@ static int __init xen_guest_init(void) else xen_start_info->flags &= ~(SIF_INITDOMAIN|SIF_PRIVILEGED); - if (!shared_info_page) - shared_info_page = (struct shared_info *) - get_zeroed_page(GFP_KERNEL); + if (!console_set_on_cmdline && !xen_initial_domain()) + add_preferred_console("hvc", 0, NULL); +} + +static int __init xen_guest_init(void) +{ + struct xen_add_to_physmap xatp; + struct shared_info *shared_info_page = NULL; + struct resource res; + phys_addr_t grant_frames; + + if (!xen_domain()) + return 0; + + if (of_address_to_resource(xen_node, GRANT_TABLE_PHYSADDR, &res)) { + pr_err("Xen grant table base address not found\n"); + return -ENODEV; + } + grant_frames = res.start; + + xen_events_irq = irq_of_parse_and_map(xen_node, 0); + if (!xen_events_irq) { + pr_err("Xen event channel interrupt not found\n"); + return -ENODEV; + } + + shared_info_page = (struct shared_info *)get_zeroed_page(GFP_KERNEL); + if (!shared_info_page) { pr_err("not enough memory\n"); return -ENOMEM; diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index 498325074a06..03e75fef15b8 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -15,10 +15,10 @@ #include <xen/xen.h> #include <xen/interface/grant_table.h> #include <xen/interface/memory.h> +#include <xen/page.h> #include <xen/swiotlb-xen.h> #include <asm/cacheflush.h> -#include <asm/xen/page.h> #include <asm/xen/hypercall.h> #include <asm/xen/interface.h> diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index cb7a14c5cd69..887596c67b12 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -10,10 +10,10 @@ #include <xen/xen.h> #include <xen/interface/memory.h> +#include <xen/page.h> #include <xen/swiotlb-xen.h> #include <asm/cacheflush.h> -#include <asm/xen/page.h> #include <asm/xen/hypercall.h> #include <asm/xen/interface.h> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index ffd3970721bf..f3067d4d4e35 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -64,6 +64,7 @@ #include <asm/psci.h> #include <asm/efi.h> #include <asm/virt.h> +#include <asm/xen/hypervisor.h> unsigned long elf_hwcap __read_mostly; EXPORT_SYMBOL_GPL(elf_hwcap); @@ -401,6 +402,7 @@ void __init setup_arch(char **cmdline_p) } else { psci_acpi_init(); } + xen_early_init(); cpu_read_bootcpu_ops(); #ifdef CONFIG_SMP diff --git a/arch/avr32/mach-at32ap/extint.c b/arch/avr32/mach-at32ap/extint.c index cfb298d66305..2d48b6a46166 100644 --- a/arch/avr32/mach-at32ap/extint.c +++ b/arch/avr32/mach-at32ap/extint.c @@ -231,8 +231,7 @@ static int __init eic_probe(struct platform_device *pdev) irq_set_chip_data(eic->first_irq + i, eic); } - irq_set_chained_handler(int_irq, demux_eic_irq); - irq_set_handler_data(int_irq, eic); + irq_set_chained_handler_and_data(int_irq, demux_eic_irq, eic); if (pdev->id == 0) { nmi_eic = eic; diff --git a/arch/m68k/mac/psc.c b/arch/m68k/mac/psc.c index 835fa04511c8..272dde481d17 100644 --- a/arch/m68k/mac/psc.c +++ b/arch/m68k/mac/psc.c @@ -148,14 +148,10 @@ static void psc_irq(unsigned int irq, struct irq_desc *desc) void __init psc_register_interrupts(void) { - irq_set_chained_handler(IRQ_AUTO_3, psc_irq); - irq_set_handler_data(IRQ_AUTO_3, (void *)0x30); - irq_set_chained_handler(IRQ_AUTO_4, psc_irq); - irq_set_handler_data(IRQ_AUTO_4, (void *)0x40); - irq_set_chained_handler(IRQ_AUTO_5, psc_irq); - irq_set_handler_data(IRQ_AUTO_5, (void *)0x50); - irq_set_chained_handler(IRQ_AUTO_6, psc_irq); - irq_set_handler_data(IRQ_AUTO_6, (void *)0x60); + irq_set_chained_handler_and_data(IRQ_AUTO_3, psc_irq, (void *)0x30); + irq_set_chained_handler_and_data(IRQ_AUTO_4, psc_irq, (void *)0x40); + irq_set_chained_handler_and_data(IRQ_AUTO_5, psc_irq, (void *)0x50); + irq_set_chained_handler_and_data(IRQ_AUTO_6, psc_irq, (void *)0x60); } void psc_irq_enable(int irq) { diff --git a/arch/mips/ath25/ar2315.c b/arch/mips/ath25/ar2315.c index 8742e1cee492..ec9a371f1e62 100644 --- a/arch/mips/ath25/ar2315.c +++ b/arch/mips/ath25/ar2315.c @@ -161,8 +161,8 @@ void __init ar2315_arch_init_irq(void) irq = irq_create_mapping(domain, AR2315_MISC_IRQ_AHB); setup_irq(irq, &ar2315_ahb_err_interrupt); - irq_set_chained_handler(AR2315_IRQ_MISC, ar2315_misc_irq_handler); - irq_set_handler_data(AR2315_IRQ_MISC, domain); + irq_set_chained_handler_and_data(AR2315_IRQ_MISC, + ar2315_misc_irq_handler, domain); ar2315_misc_irq_domain = domain; } diff --git a/arch/mips/ath25/ar5312.c b/arch/mips/ath25/ar5312.c index 094b938fd603..e63e38fa4880 100644 --- a/arch/mips/ath25/ar5312.c +++ b/arch/mips/ath25/ar5312.c @@ -156,8 +156,8 @@ void __init ar5312_arch_init_irq(void) irq = irq_create_mapping(domain, AR5312_MISC_IRQ_AHB_PROC); setup_irq(irq, &ar5312_ahb_err_interrupt); - irq_set_chained_handler(AR5312_IRQ_MISC, ar5312_misc_irq_handler); - irq_set_handler_data(AR5312_IRQ_MISC, domain); + irq_set_chained_handler_and_data(AR5312_IRQ_MISC, + ar5312_misc_irq_handler, domain); ar5312_misc_irq_domain = domain; } diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c index dadb30306a0a..f8d0acb4f973 100644 --- a/arch/mips/pci/pci-ar2315.c +++ b/arch/mips/pci/pci-ar2315.c @@ -384,8 +384,8 @@ static void ar2315_pci_irq_init(struct ar2315_pci_ctrl *apc) apc->irq_ext = irq_create_mapping(apc->domain, AR2315_PCI_IRQ_EXT); - irq_set_chained_handler(apc->irq, ar2315_pci_irq_handler); - irq_set_handler_data(apc->irq, apc); + irq_set_chained_handler_and_data(apc->irq, ar2315_pci_irq_handler, + apc); /* Clear any pending Abort or external Interrupts * and enable interrupt processing */ diff --git a/arch/mips/ralink/irq.c b/arch/mips/ralink/irq.c index da301e0a2f1f..53707aacc0f8 100644 --- a/arch/mips/ralink/irq.c +++ b/arch/mips/ralink/irq.c @@ -184,8 +184,7 @@ static int __init intc_of_init(struct device_node *node, rt_intc_w32(INTC_INT_GLOBAL, INTC_REG_ENABLE); - irq_set_chained_handler(irq, ralink_intc_irq_handler); - irq_set_handler_data(irq, domain); + irq_set_chained_handler_and_data(irq, ralink_intc_irq_handler, domain); /* tell the kernel which irq is used for performance monitoring */ rt_perfcount_irq = irq_create_mapping(domain, 9); diff --git a/arch/mn10300/kernel/irq.c b/arch/mn10300/kernel/irq.c index 6ab3b73efcf8..480de70f4059 100644 --- a/arch/mn10300/kernel/irq.c +++ b/arch/mn10300/kernel/irq.c @@ -320,11 +320,11 @@ void migrate_irqs(void) if (irqd_is_per_cpu(data)) continue; - if (cpumask_test_cpu(self, &data->affinity) && + if (cpumask_test_cpu(self, data->affinity) && !cpumask_intersects(&irq_affinity[irq], cpu_online_mask)) { int cpu_id; cpu_id = cpumask_first(cpu_online_mask); - cpumask_set_cpu(cpu_id, &data->affinity); + cpumask_set_cpu(cpu_id, data->affinity); } /* We need to operate irq_affinity_online atomically. */ arch_local_cli_save(flags); @@ -335,7 +335,7 @@ void migrate_irqs(void) GxICR(irq) = x & GxICR_LEVEL; tmp = GxICR(irq); - new = cpumask_any_and(&data->affinity, + new = cpumask_any_and(data->affinity, cpu_online_mask); irq_affinity_online[irq] = new; diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index 64707750c780..940cbddd9237 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -17,13 +17,15 @@ CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_DEVICE=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y -CONFIG_RESOURCE_COUNTERS=y CONFIG_CGROUP_PERF=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_CGROUP=y +CONFIG_NAMESPACES=y CONFIG_SCHED_AUTOGROUP=y CONFIG_BLK_DEV_INITRD=y +CONFIG_EXPERT=y +CONFIG_BPF_SYSCALL=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=m @@ -44,6 +46,7 @@ CONFIG_SOLARIS_X86_PARTITION=y CONFIG_UNIXWARE_DISKLABEL=y CONFIG_CFQ_GROUP_IOSCHED=y CONFIG_DEFAULT_DEADLINE=y +CONFIG_LIVEPATCH=y CONFIG_MARCH_Z196=y CONFIG_TUNE_ZEC12=y CONFIG_NR_CPUS=256 @@ -242,9 +245,9 @@ CONFIG_NF_CONNTRACK_IPV4=m # CONFIG_NF_CONNTRACK_PROC_COMPAT is not set CONFIG_NF_TABLES_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m -CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_NAT_IPV4=m +CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m @@ -264,8 +267,8 @@ CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NF_TABLES_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NF_NAT_IPV6=m +CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -353,7 +356,6 @@ CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_OSD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 -CONFIG_BLK_DEV_XIP=y CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_VIRTIO_BLK=y @@ -458,7 +460,6 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT2_FS_XIP=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_POSIX_ACL=y @@ -544,7 +545,6 @@ CONFIG_FRAME_WARN=1024 CONFIG_READABLE_ASM=y CONFIG_UNUSED_SYMBOLS=y CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_PAGEALLOC=y CONFIG_DEBUG_OBJECTS=y CONFIG_DEBUG_OBJECTS_SELFTEST=y @@ -558,6 +558,7 @@ CONFIG_SLUB_STATS=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_VM=y CONFIG_DEBUG_VM_RB=y +CONFIG_DEBUG_MEMORY_INIT=y CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m CONFIG_DEBUG_PER_CPU_MAPS=y CONFIG_DEBUG_SHIRQ=y @@ -575,7 +576,6 @@ CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y CONFIG_DEBUG_CREDENTIALS=y -CONFIG_PROVE_RCU=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=300 CONFIG_NOTIFIER_ERROR_INJECTION=m @@ -611,7 +611,6 @@ CONFIG_TEST_BPF=m # CONFIG_STRICT_DEVMEM is not set CONFIG_S390_PTDUMP=y CONFIG_ENCRYPTED_KEYS=m -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_SELINUX=y diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index 5c3097272cd8..d793fec91797 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -17,11 +17,13 @@ CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_DEVICE=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y -CONFIG_RESOURCE_COUNTERS=y CONFIG_CGROUP_PERF=y CONFIG_BLK_CGROUP=y +CONFIG_NAMESPACES=y CONFIG_SCHED_AUTOGROUP=y CONFIG_BLK_DEV_INITRD=y +CONFIG_EXPERT=y +CONFIG_BPF_SYSCALL=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=m @@ -240,9 +242,9 @@ CONFIG_NF_CONNTRACK_IPV4=m # CONFIG_NF_CONNTRACK_PROC_COMPAT is not set CONFIG_NF_TABLES_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m -CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_NAT_IPV4=m +CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m @@ -262,8 +264,8 @@ CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NF_TABLES_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NF_NAT_IPV6=m +CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -350,7 +352,6 @@ CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_OSD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 -CONFIG_BLK_DEV_XIP=y CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_VIRTIO_BLK=y @@ -455,7 +456,6 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT2_FS_XIP=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_POSIX_ACL=y @@ -538,7 +538,7 @@ CONFIG_DEBUG_INFO=y CONFIG_FRAME_WARN=1024 CONFIG_UNUSED_SYMBOLS=y CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_MEMORY_INIT=y CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m CONFIG_PANIC_ON_OOPS=y CONFIG_TIMER_STATS=y @@ -558,7 +558,6 @@ CONFIG_ATOMIC64_SELFTEST=y # CONFIG_STRICT_DEVMEM is not set CONFIG_S390_PTDUMP=y CONFIG_ENCRYPTED_KEYS=m -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_SELINUX=y diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index bda70f1ffd2c..38a77e9c8aa6 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -17,11 +17,13 @@ CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_DEVICE=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y -CONFIG_RESOURCE_COUNTERS=y CONFIG_CGROUP_PERF=y CONFIG_BLK_CGROUP=y +CONFIG_NAMESPACES=y CONFIG_SCHED_AUTOGROUP=y CONFIG_BLK_DEV_INITRD=y +CONFIG_EXPERT=y +CONFIG_BPF_SYSCALL=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=m @@ -42,9 +44,10 @@ CONFIG_SOLARIS_X86_PARTITION=y CONFIG_UNIXWARE_DISKLABEL=y CONFIG_CFQ_GROUP_IOSCHED=y CONFIG_DEFAULT_DEADLINE=y +CONFIG_LIVEPATCH=y CONFIG_MARCH_Z196=y CONFIG_TUNE_ZEC12=y -CONFIG_NR_CPUS=256 +CONFIG_NR_CPUS=512 CONFIG_HZ_100=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y @@ -238,9 +241,9 @@ CONFIG_NF_CONNTRACK_IPV4=m # CONFIG_NF_CONNTRACK_PROC_COMPAT is not set CONFIG_NF_TABLES_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m -CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_NAT_IPV4=m +CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m @@ -260,8 +263,8 @@ CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NF_TABLES_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NF_NAT_IPV6=m +CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -348,7 +351,6 @@ CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_OSD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 -CONFIG_BLK_DEV_XIP=y CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_VIRTIO_BLK=y @@ -453,7 +455,6 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT2_FS_XIP=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_POSIX_ACL=y @@ -536,7 +537,7 @@ CONFIG_DEBUG_INFO=y CONFIG_FRAME_WARN=1024 CONFIG_UNUSED_SYMBOLS=y CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_MEMORY_INIT=y CONFIG_PANIC_ON_OOPS=y CONFIG_TIMER_STATS=y CONFIG_RCU_TORTURE_TEST=m @@ -553,7 +554,6 @@ CONFIG_ATOMIC64_SELFTEST=y # CONFIG_STRICT_DEVMEM is not set CONFIG_S390_PTDUMP=y CONFIG_ENCRYPTED_KEYS=m -CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_SELINUX=y diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 83ef702d2403..9256b48e7e43 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -8,7 +8,6 @@ CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y -CONFIG_RCU_FAST_NO_HZ=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CGROUPS=y @@ -31,9 +30,11 @@ CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y +CONFIG_BLK_DEV_INTEGRITY=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_DEFAULT_DEADLINE=y +CONFIG_LIVEPATCH=y CONFIG_MARCH_Z196=y CONFIG_NR_CPUS=256 CONFIG_HZ_100=y @@ -41,7 +42,6 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y -CONFIG_CMA=y CONFIG_CRASH_DUMP=y CONFIG_BINFMT_MISC=m CONFIG_HIBERNATION=y @@ -125,6 +125,7 @@ CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_PAGEALLOC=y CONFIG_DETECT_HUNG_TASK=y +CONFIG_PANIC_ON_OOPS=y CONFIG_TIMER_STATS=y CONFIG_DEBUG_RT_MUTEXES=y CONFIG_PROVE_LOCKING=y @@ -135,12 +136,16 @@ CONFIG_DEBUG_LIST=y CONFIG_DEBUG_PI_LIST=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y -CONFIG_PROVE_RCU=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 +# CONFIG_RCU_CPU_STALL_INFO is not set CONFIG_RCU_TRACE=y CONFIG_LATENCYTOP=y CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y +CONFIG_TRACER_SNAPSHOT=y +CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y +CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_UPROBE_EVENT=y CONFIG_KPROBES_SANITY_TEST=y # CONFIG_STRICT_DEVMEM is not set CONFIG_S390_PTDUMP=y @@ -187,6 +192,7 @@ CONFIG_CRYPTO_ZLIB=m CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_LZ4=m CONFIG_CRYPTO_LZ4HC=m +CONFIG_CRYPTO_ANSI_CPRNG=m CONFIG_ZCRYPT=m CONFIG_CRYPTO_SHA1_S390=m CONFIG_CRYPTO_SHA256_S390=m diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h index f5a8e2fcde0c..91541000378e 100644 --- a/arch/s390/include/asm/cpu.h +++ b/arch/s390/include/asm/cpu.h @@ -8,8 +8,6 @@ #ifndef _ASM_S390_CPU_H #define _ASM_S390_CPU_H -#define MAX_CPU_ADDRESS 255 - #ifndef __ASSEMBLY__ #include <linux/types.h> diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index ece606c2ee86..39ae6a359747 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -94,7 +94,6 @@ struct dump_save_areas { }; extern struct dump_save_areas dump_save_areas; -struct save_area_ext *dump_save_area_create(int cpu); extern void do_reipl(void); extern void do_halt(void); diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index c891f41b2753..f6ff06077631 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -11,6 +11,7 @@ #include <asm/cpu.h> #define SCLP_CHP_INFO_MASK_SIZE 32 +#define SCLP_MAX_CORES 256 struct sclp_chp_info { u8 recognized[SCLP_CHP_INFO_MASK_SIZE]; @@ -26,7 +27,7 @@ struct sclp_ipl_info { char loadparm[LOADPARM_LEN]; }; -struct sclp_cpu_entry { +struct sclp_core_entry { u8 core_id; u8 reserved0[2]; u8 : 3; @@ -38,12 +39,11 @@ struct sclp_cpu_entry { u8 reserved1; } __attribute__((packed)); -struct sclp_cpu_info { +struct sclp_core_info { unsigned int configured; unsigned int standby; unsigned int combined; - int has_cpu_type; - struct sclp_cpu_entry cpu[MAX_CPU_ADDRESS + 1]; + struct sclp_core_entry core[SCLP_MAX_CORES]; }; struct sclp_info { @@ -51,7 +51,7 @@ struct sclp_info { unsigned char has_vt220 : 1; unsigned char has_siif : 1; unsigned char has_sigpif : 1; - unsigned char has_cpu_type : 1; + unsigned char has_core_type : 1; unsigned char has_sprp : 1; unsigned int ibc; unsigned int mtid; @@ -60,15 +60,15 @@ struct sclp_info { unsigned long long rzm; unsigned long long rnmax; unsigned long long hamax; - unsigned int max_cpu; + unsigned int max_cores; unsigned long hsa_size; unsigned long long facilities; }; extern struct sclp_info sclp; -int sclp_get_cpu_info(struct sclp_cpu_info *info); -int sclp_cpu_configure(u8 cpu); -int sclp_cpu_deconfigure(u8 cpu); +int sclp_get_core_info(struct sclp_core_info *info); +int sclp_core_configure(u8 core); +int sclp_core_deconfigure(u8 core); int sclp_sdias_blk_count(void); int sclp_sdias_copy(void *dest, int blk_num, int nr_blks); int sclp_chp_configure(struct chp_id chpid); diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index b3bd0282dd98..5df26b11cf47 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -29,6 +29,7 @@ extern void smp_call_ipl_cpu(void (*func)(void *), void *); extern int smp_find_processor_id(u16 address); extern int smp_store_status(int cpu); +extern void smp_save_dump_cpus(void); extern int smp_vcpu_scheduled(int cpu); extern void smp_yield_cpu(int cpu); extern void smp_cpu_set_polarization(int cpu, int val); @@ -54,6 +55,7 @@ static inline int smp_store_status(int cpu) { return 0; } static inline int smp_vcpu_scheduled(int cpu) { return 1; } static inline void smp_yield_cpu(int cpu) { } static inline void smp_fill_possible_mask(void) { } +static inline void smp_save_dump_cpus(void) { } #endif /* CONFIG_SMP */ diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index daed3fde42ec..326f717df587 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -78,15 +78,20 @@ s390_base_pgm_handler_fn: # # Calls diag 308 subcode 1 and continues execution # -# The following conditions must be ensured before calling this function: -# * Prefix register = 0 -# * Lowcore protection is disabled -# ENTRY(diag308_reset) larl %r4,.Lctlregs # Save control registers stctg %c0,%c15,0(%r4) + lg %r2,0(%r4) # Disable lowcore protection + nilh %r2,0xefff + larl %r4,.Lctlreg0 + stg %r2,0(%r4) + lctlg %c0,%c0,0(%r4) larl %r4,.Lfpctl # Floating point control register stfpc 0(%r4) + larl %r4,.Lprefix # Save prefix register + stpx 0(%r4) + larl %r4,.Lprefix_zero # Set prefix register to 0 + spx 0(%r4) larl %r4,.Lcontinue_psw # Save PSW flags epsw %r2,%r3 stm %r2,%r3,0(%r4) @@ -106,6 +111,8 @@ ENTRY(diag308_reset) lctlg %c0,%c15,0(%r4) larl %r4,.Lfpctl # Restore floating point ctl register lfpc 0(%r4) + larl %r4,.Lprefix # Restore prefix register + spx 0(%r4) larl %r4,.Lcontinue_psw # Restore PSW flags lpswe 0(%r4) .Lcontinue: @@ -122,10 +129,16 @@ ENTRY(diag308_reset) .section .bss .align 8 +.Lctlreg0: + .quad 0 .Lctlregs: .rept 16 .quad 0 .endr .Lfpctl: .long 0 +.Lprefix: + .long 0 +.Lprefix_zero: + .long 0 .previous diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 7a75ad4594e3..0c6c01eb3613 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -45,31 +45,6 @@ static struct memblock_type oldmem_type = { struct dump_save_areas dump_save_areas; /* - * Allocate and add a save area for a CPU - */ -struct save_area_ext *dump_save_area_create(int cpu) -{ - struct save_area_ext **save_areas, *save_area; - - save_area = kmalloc(sizeof(*save_area), GFP_KERNEL); - if (!save_area) - return NULL; - if (cpu + 1 > dump_save_areas.count) { - dump_save_areas.count = cpu + 1; - save_areas = krealloc(dump_save_areas.areas, - dump_save_areas.count * sizeof(void *), - GFP_KERNEL | __GFP_ZERO); - if (!save_areas) { - kfree(save_area); - return NULL; - } - dump_save_areas.areas = save_areas; - } - dump_save_areas.areas[cpu] = save_area; - return save_area; -} - -/* * Return physical address for virtual address */ static inline void *load_real_addr(void *addr) @@ -416,7 +391,7 @@ static void *nt_s390_vx_low(void *ptr, __vector128 *vx_regs) ptr += len; /* Copy lower halves of SIMD registers 0-15 */ for (i = 0; i < 16; i++) { - memcpy(ptr, &vx_regs[i], 8); + memcpy(ptr, &vx_regs[i].u[2], 8); ptr += 8; } return ptr; diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index e6a1578fc000..afe05bfb7e00 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1572,7 +1572,7 @@ static int param_set_sfb_size(const char *val, const struct kernel_param *kp) } #define param_check_sfb_size(name, p) __param_check(name, p, void) -static struct kernel_param_ops param_ops_sfb_size = { +static const struct kernel_param_ops param_ops_sfb_size = { .set = param_set_sfb_size, .get = param_get_sfb_size, }; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 73941bf42350..f7f027caaaaa 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -868,6 +868,11 @@ void __init setup_arch(char **cmdline_p) check_initrd(); reserve_crashkernel(); + /* + * Be aware that smp_save_dump_cpus() triggers a system reset. + * Therefore CPU and device initialization should be done afterwards. + */ + smp_save_dump_cpus(); setup_resources(); setup_vmcoreinfo(); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 0d9d59d4710e..6f54c175f5c9 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -31,6 +31,7 @@ #include <linux/cpu.h> #include <linux/slab.h> #include <linux/crash_dump.h> +#include <linux/memblock.h> #include <asm/asm-offsets.h> #include <asm/switch_to.h> #include <asm/facility.h> @@ -69,7 +70,7 @@ struct pcpu { u16 address; /* physical cpu address */ }; -static u8 boot_cpu_type; +static u8 boot_core_type; static struct pcpu pcpu_devices[NR_CPUS]; unsigned int smp_cpu_mt_shift; @@ -531,15 +532,12 @@ EXPORT_SYMBOL(smp_ctl_clear_bit); #ifdef CONFIG_CRASH_DUMP -static inline void __smp_store_cpu_state(int cpu, u16 address, int is_boot_cpu) +static void __smp_store_cpu_state(struct save_area_ext *sa_ext, u16 address, + int is_boot_cpu) { - void *lc = pcpu_devices[0].lowcore; - struct save_area_ext *sa_ext; + void *lc = (void *)(unsigned long) store_prefix(); unsigned long vx_sa; - sa_ext = dump_save_area_create(cpu); - if (!sa_ext) - panic("could not allocate memory for save area\n"); if (is_boot_cpu) { /* Copy the registers of the boot CPU. */ copy_oldmem_page(1, (void *) &sa_ext->sa, sizeof(sa_ext->sa), @@ -554,14 +552,33 @@ static inline void __smp_store_cpu_state(int cpu, u16 address, int is_boot_cpu) if (!MACHINE_HAS_VX) return; /* Get the VX registers */ - vx_sa = __get_free_page(GFP_KERNEL); + vx_sa = memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (!vx_sa) panic("could not allocate memory for VX save area\n"); __pcpu_sigp_relax(address, SIGP_STORE_ADDITIONAL_STATUS, vx_sa, NULL); memcpy(sa_ext->vx_regs, (void *) vx_sa, sizeof(sa_ext->vx_regs)); - free_page(vx_sa); + memblock_free(vx_sa, PAGE_SIZE); } +int smp_store_status(int cpu) +{ + unsigned long vx_sa; + struct pcpu *pcpu; + + pcpu = pcpu_devices + cpu; + if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS, + 0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED) + return -EIO; + if (!MACHINE_HAS_VX) + return 0; + vx_sa = __pa(pcpu->lowcore->vector_save_area_addr); + __pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, + vx_sa, NULL); + return 0; +} + +#endif /* CONFIG_CRASH_DUMP */ + /* * Collect CPU state of the previous, crashed system. * There are four cases: @@ -589,10 +606,12 @@ static inline void __smp_store_cpu_state(int cpu, u16 address, int is_boot_cpu) * old system. The ELF sections are picked up by the crash_dump code * via elfcorehdr_addr. */ -static void __init smp_store_cpu_states(struct sclp_cpu_info *info) +void __init smp_save_dump_cpus(void) { - unsigned int cpu, address, i, j; - int is_boot_cpu; +#ifdef CONFIG_CRASH_DUMP + int addr, cpu, boot_cpu_addr, max_cpu_addr; + struct save_area_ext *sa_ext; + bool is_boot_cpu; if (is_kdump_kernel()) /* Previous system stored the CPU states. Nothing to do. */ @@ -602,42 +621,36 @@ static void __init smp_store_cpu_states(struct sclp_cpu_info *info) return; /* Set multi-threading state to the previous system. */ pcpu_set_smt(sclp.mtid_prev); - /* Collect CPU states. */ - cpu = 0; - for (i = 0; i < info->configured; i++) { - /* Skip CPUs with different CPU type. */ - if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type) + max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev; + for (cpu = 0, addr = 0; addr <= max_cpu_addr; addr++) { + if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0, NULL) == + SIGP_CC_NOT_OPERATIONAL) continue; - for (j = 0; j <= smp_cpu_mtid; j++, cpu++) { - address = (info->cpu[i].core_id << smp_cpu_mt_shift) + j; - is_boot_cpu = (address == pcpu_devices[0].address); - if (is_boot_cpu && !OLDMEM_BASE) - /* Skip boot CPU for standard zfcp dump. */ - continue; - /* Get state for this CPu. */ - __smp_store_cpu_state(cpu, address, is_boot_cpu); - } + cpu += 1; } -} - -int smp_store_status(int cpu) -{ - unsigned long vx_sa; - struct pcpu *pcpu; - - pcpu = pcpu_devices + cpu; - if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS, - 0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED) - return -EIO; - if (!MACHINE_HAS_VX) - return 0; - vx_sa = __pa(pcpu->lowcore->vector_save_area_addr); - __pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, - vx_sa, NULL); - return 0; -} - + dump_save_areas.areas = (void *)memblock_alloc(sizeof(void *) * cpu, 8); + dump_save_areas.count = cpu; + boot_cpu_addr = stap(); + for (cpu = 0, addr = 0; addr <= max_cpu_addr; addr++) { + if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0, NULL) == + SIGP_CC_NOT_OPERATIONAL) + continue; + sa_ext = (void *) memblock_alloc(sizeof(*sa_ext), 8); + dump_save_areas.areas[cpu] = sa_ext; + if (!sa_ext) + panic("could not allocate memory for save area\n"); + is_boot_cpu = (addr == boot_cpu_addr); + cpu += 1; + if (is_boot_cpu && !OLDMEM_BASE) + /* Skip boot CPU for standard zfcp dump. */ + continue; + /* Get state for this CPU. */ + __smp_store_cpu_state(sa_ext, addr, is_boot_cpu); + } + diag308_reset(); + pcpu_set_smt(0); #endif /* CONFIG_CRASH_DUMP */ +} void smp_cpu_set_polarization(int cpu, int val) { @@ -649,21 +662,22 @@ int smp_cpu_get_polarization(int cpu) return pcpu_devices[cpu].polarization; } -static struct sclp_cpu_info *smp_get_cpu_info(void) +static struct sclp_core_info *smp_get_core_info(void) { static int use_sigp_detection; - struct sclp_cpu_info *info; + struct sclp_core_info *info; int address; info = kzalloc(sizeof(*info), GFP_KERNEL); - if (info && (use_sigp_detection || sclp_get_cpu_info(info))) { + if (info && (use_sigp_detection || sclp_get_core_info(info))) { use_sigp_detection = 1; - for (address = 0; address <= MAX_CPU_ADDRESS; + for (address = 0; + address < (SCLP_MAX_CORES << smp_cpu_mt_shift); address += (1U << smp_cpu_mt_shift)) { if (__pcpu_sigp_relax(address, SIGP_SENSE, 0, NULL) == SIGP_CC_NOT_OPERATIONAL) continue; - info->cpu[info->configured].core_id = + info->core[info->configured].core_id = address >> smp_cpu_mt_shift; info->configured++; } @@ -674,7 +688,7 @@ static struct sclp_cpu_info *smp_get_cpu_info(void) static int smp_add_present_cpu(int cpu); -static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add) +static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add) { struct pcpu *pcpu; cpumask_t avail; @@ -685,9 +699,9 @@ static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add) cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); cpu = cpumask_first(&avail); for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) { - if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type) + if (sclp.has_core_type && info->core[i].type != boot_core_type) continue; - address = info->cpu[i].core_id << smp_cpu_mt_shift; + address = info->core[i].core_id << smp_cpu_mt_shift; for (j = 0; j <= smp_cpu_mtid; j++) { if (pcpu_find_address(cpu_present_mask, address + j)) continue; @@ -713,41 +727,37 @@ static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add) static void __init smp_detect_cpus(void) { unsigned int cpu, mtid, c_cpus, s_cpus; - struct sclp_cpu_info *info; + struct sclp_core_info *info; u16 address; /* Get CPU information */ - info = smp_get_cpu_info(); + info = smp_get_core_info(); if (!info) panic("smp_detect_cpus failed to allocate memory\n"); /* Find boot CPU type */ - if (info->has_cpu_type) { + if (sclp.has_core_type) { address = stap(); for (cpu = 0; cpu < info->combined; cpu++) - if (info->cpu[cpu].core_id == address) { + if (info->core[cpu].core_id == address) { /* The boot cpu dictates the cpu type. */ - boot_cpu_type = info->cpu[cpu].type; + boot_core_type = info->core[cpu].type; break; } if (cpu >= info->combined) panic("Could not find boot CPU type"); } -#ifdef CONFIG_CRASH_DUMP - /* Collect CPU state of previous system */ - smp_store_cpu_states(info); -#endif - /* Set multi-threading state for the current system */ - mtid = boot_cpu_type ? sclp.mtid : sclp.mtid_cp; + mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp; mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1; pcpu_set_smt(mtid); /* Print number of CPUs */ c_cpus = s_cpus = 0; for (cpu = 0; cpu < info->combined; cpu++) { - if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type) + if (sclp.has_core_type && + info->core[cpu].type != boot_core_type) continue; if (cpu < info->configured) c_cpus += smp_cpu_mtid + 1; @@ -884,7 +894,7 @@ void __init smp_fill_possible_mask(void) sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1; sclp_max = min(smp_max_threads, sclp_max); - sclp_max = sclp.max_cpu * sclp_max ?: nr_cpu_ids; + sclp_max = sclp.max_cores * sclp_max ?: nr_cpu_ids; possible = setup_possible_cpus ?: nr_cpu_ids; possible = min(possible, sclp_max); for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++) @@ -977,7 +987,7 @@ static ssize_t cpu_configure_store(struct device *dev, case 0: if (pcpu->state != CPU_STATE_CONFIGURED) break; - rc = sclp_cpu_deconfigure(pcpu->address >> smp_cpu_mt_shift); + rc = sclp_core_deconfigure(pcpu->address >> smp_cpu_mt_shift); if (rc) break; for (i = 0; i <= smp_cpu_mtid; i++) { @@ -992,7 +1002,7 @@ static ssize_t cpu_configure_store(struct device *dev, case 1: if (pcpu->state != CPU_STATE_STANDBY) break; - rc = sclp_cpu_configure(pcpu->address >> smp_cpu_mt_shift); + rc = sclp_core_configure(pcpu->address >> smp_cpu_mt_shift); if (rc) break; for (i = 0; i <= smp_cpu_mtid; i++) { @@ -1107,10 +1117,10 @@ out: int __ref smp_rescan_cpus(void) { - struct sclp_cpu_info *info; + struct sclp_core_info *info; int nr; - info = smp_get_cpu_info(); + info = smp_get_core_info(); if (!info) return -ENOMEM; get_online_cpus(); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index d3766dd67e23..fee782acc2ee 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -250,7 +250,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) ({ \ /* Branch instruction needs 6 bytes */ \ int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\ - _EMIT6(op1 | reg(b1, b2) << 16 | rel, op2 | mask); \ + _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), op2 | mask); \ REG_SET_SEEN(b1); \ REG_SET_SEEN(b2); \ }) diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 59cf0b911898..9def1f52d03a 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -24,11 +24,14 @@ config TILE select MODULES_USE_ELF_RELA select HAVE_ARCH_TRACEHOOK select HAVE_SYSCALL_TRACEPOINTS + select USER_STACKTRACE_SUPPORT select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select HAVE_DEBUG_STACKOVERFLOW select ARCH_WANT_FRAME_POINTERS select HAVE_CONTEXT_TRACKING select EDAC_SUPPORT + select GENERIC_STRNCPY_FROM_USER + select GENERIC_STRNLEN_USER # FIXME: investigate whether we need/want these options. # select HAVE_IOREMAP_PROT @@ -125,8 +128,10 @@ config HVC_TILE select HVC_IRQ if TILEGX def_bool y +# Building with ARCH=tilegx (or ARCH=tile) implies using the +# 64-bit TILE-Gx toolchain, so force CONFIG_TILEGX on. config TILEGX - bool "Building for TILE-Gx (64-bit) processor" + def_bool ARCH != "tilepro" select SPARSE_IRQ select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ select HAVE_FUNCTION_TRACER diff --git a/arch/tile/include/asm/irq.h b/arch/tile/include/asm/irq.h index 1fe86911838b..84a924034bdb 100644 --- a/arch/tile/include/asm/irq.h +++ b/arch/tile/include/asm/irq.h @@ -78,4 +78,9 @@ void tile_irq_activate(unsigned int irq, int tile_irq_type); void setup_irq_regs(void); +#ifdef __tilegx__ +void arch_trigger_all_cpu_backtrace(bool self); +#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace +#endif + #endif /* _ASM_TILE_IRQ_H */ diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index dd4f9f17e30a..139dfdee0134 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -111,8 +111,6 @@ struct thread_struct { unsigned long long interrupt_mask; /* User interrupt-control 0 state */ unsigned long intctrl_0; - /* Is this task currently doing a backtrace? */ - bool in_backtrace; /* Any other miscellaneous processor state bits */ unsigned long proc_status; #if !CHIP_HAS_FIXED_INTVEC_BASE() diff --git a/arch/tile/include/asm/spinlock_32.h b/arch/tile/include/asm/spinlock_32.h index c0a77b38d39a..b14b1ba5bf9c 100644 --- a/arch/tile/include/asm/spinlock_32.h +++ b/arch/tile/include/asm/spinlock_32.h @@ -41,8 +41,12 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock) * to claim the lock is held, since it will be momentarily * if not already. There's no need to wait for a "valid" * lock->next_ticket to become available. + * Use READ_ONCE() to ensure that calling this in a loop is OK. */ - return lock->next_ticket != lock->current_ticket; + int curr = READ_ONCE(lock->current_ticket); + int next = READ_ONCE(lock->next_ticket); + + return next != curr; } void arch_spin_lock(arch_spinlock_t *lock); diff --git a/arch/tile/include/asm/spinlock_64.h b/arch/tile/include/asm/spinlock_64.h index 9a12b9c7e5d3..b9718fb4e74a 100644 --- a/arch/tile/include/asm/spinlock_64.h +++ b/arch/tile/include/asm/spinlock_64.h @@ -18,6 +18,8 @@ #ifndef _ASM_TILE_SPINLOCK_64_H #define _ASM_TILE_SPINLOCK_64_H +#include <linux/compiler.h> + /* Shifts and masks for the various fields in "lock". */ #define __ARCH_SPIN_CURRENT_SHIFT 17 #define __ARCH_SPIN_NEXT_MASK 0x7fff @@ -44,7 +46,8 @@ static inline u32 arch_spin_next(u32 val) /* The lock is locked if a task would have to wait to get it. */ static inline int arch_spin_is_locked(arch_spinlock_t *lock) { - u32 val = lock->lock; + /* Use READ_ONCE() to ensure that calling this in a loop is OK. */ + u32 val = READ_ONCE(lock->lock); return arch_spin_current(val) != arch_spin_next(val); } diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h index 0e9d382a2d45..c3cb42615a9f 100644 --- a/arch/tile/include/asm/stack.h +++ b/arch/tile/include/asm/stack.h @@ -58,17 +58,14 @@ extern int KBacktraceIterator_end(struct KBacktraceIterator *kbt); /* Advance to the next frame. */ extern void KBacktraceIterator_next(struct KBacktraceIterator *kbt); +/* Dump just the contents of the pt_regs structure. */ +extern void tile_show_regs(struct pt_regs *); + /* * Dump stack given complete register info. Use only from the * architecture-specific code; show_stack() - * and dump_stack() (in entry.S) are architecture-independent entry points. + * and dump_stack() are architecture-independent entry points. */ -extern void tile_show_stack(struct KBacktraceIterator *, int headers); - -/* Dump stack of current process, with registers to seed the backtrace. */ -extern void dump_stack_regs(struct pt_regs *); - -/* Helper method for assembly dump_stack(). */ -extern void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52); +extern void tile_show_stack(struct KBacktraceIterator *); #endif /* _ASM_TILE_STACK_H */ diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index f804c39a5e4d..dc1fb28d9636 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h @@ -42,6 +42,7 @@ struct thread_info { unsigned long unalign_jit_tmp[4]; /* temp r0..r3 storage */ void __user *unalign_jit_base; /* unalign fixup JIT base */ #endif + bool in_backtrace; /* currently doing backtrace? */ }; /* diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h index 4b99a1c3aab2..11c82270c1f5 100644 --- a/arch/tile/include/asm/traps.h +++ b/arch/tile/include/asm/traps.h @@ -52,6 +52,14 @@ void do_timer_interrupt(struct pt_regs *, int fault_num); /* kernel/messaging.c */ void hv_message_intr(struct pt_regs *, int intnum); +#define TILE_NMI_DUMP_STACK 1 /* Dump stack for sysrq+'l' */ + +/* kernel/process.c */ +void do_nmi_dump_stack(struct pt_regs *regs); + +/* kernel/traps.c */ +void do_nmi(struct pt_regs *, int fault_num, unsigned long reason); + /* kernel/irq.c */ void tile_dev_intr(struct pt_regs *, int intnum); diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h index a33276bf5ca1..0a9c4265763b 100644 --- a/arch/tile/include/asm/uaccess.h +++ b/arch/tile/include/asm/uaccess.h @@ -65,6 +65,13 @@ static inline int is_arch_mappable_range(unsigned long addr, #endif /* + * Note that using this definition ignores is_arch_mappable_range(), + * so on tilepro code that uses user_addr_max() is constrained not + * to reference the tilepro user-interrupt region. + */ +#define user_addr_max() (current_thread_info()->addr_limit.seg) + +/* * Test whether a block of memory is a valid user space address. * Returns 0 if the range is valid, nonzero otherwise. */ @@ -471,62 +478,9 @@ copy_in_user(void __user *to, const void __user *from, unsigned long n) #endif -/** - * strlen_user: - Get the size of a string in user space. - * @str: The string to measure. - * - * Context: User context only. This function may sleep. - * - * Get the size of a NUL-terminated string in user space. - * - * Returns the size of the string INCLUDING the terminating NUL. - * On exception, returns 0. - * - * If there is a limit on the length of a valid string, you may wish to - * consider using strnlen_user() instead. - */ -extern long strnlen_user_asm(const char __user *str, long n); -static inline long __must_check strnlen_user(const char __user *str, long n) -{ - might_fault(); - return strnlen_user_asm(str, n); -} -#define strlen_user(str) strnlen_user(str, LONG_MAX) - -/** - * strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking. - * @dst: Destination address, in kernel space. This buffer must be at - * least @count bytes long. - * @src: Source address, in user space. - * @count: Maximum number of bytes to copy, including the trailing NUL. - * - * Copies a NUL-terminated string from userspace to kernel space. - * Caller must check the specified block with access_ok() before calling - * this function. - * - * On success, returns the length of the string (not including the trailing - * NUL). - * - * If access to userspace fails, returns -EFAULT (some data may have been - * copied). - * - * If @count is smaller than the length of the string, copies @count bytes - * and returns @count. - */ -extern long strncpy_from_user_asm(char *dst, const char __user *src, long); -static inline long __must_check __strncpy_from_user( - char *dst, const char __user *src, long count) -{ - might_fault(); - return strncpy_from_user_asm(dst, src, count); -} -static inline long __must_check strncpy_from_user( - char *dst, const char __user *src, long count) -{ - if (access_ok(VERIFY_READ, src, 1)) - return __strncpy_from_user(dst, src, count); - return -EFAULT; -} +extern long strnlen_user(const char __user *str, long n); +extern long strlen_user(const char __user *str); +extern long strncpy_from_user(char *dst, const char __user *src, long); /** * clear_user: - Zero a block of memory in user space. diff --git a/arch/tile/include/asm/word-at-a-time.h b/arch/tile/include/asm/word-at-a-time.h new file mode 100644 index 000000000000..9e5ce0d7b292 --- /dev/null +++ b/arch/tile/include/asm/word-at-a-time.h @@ -0,0 +1,36 @@ +#ifndef _ASM_WORD_AT_A_TIME_H +#define _ASM_WORD_AT_A_TIME_H + +#include <asm/byteorder.h> + +struct word_at_a_time { /* unused */ }; +#define WORD_AT_A_TIME_CONSTANTS {} + +/* Generate 0x01 byte values for non-zero bytes using a SIMD instruction. */ +static inline unsigned long has_zero(unsigned long val, unsigned long *data, + const struct word_at_a_time *c) +{ +#ifdef __tilegx__ + unsigned long mask = __insn_v1cmpeqi(val, 0); +#else /* tilepro */ + unsigned long mask = __insn_seqib(val, 0); +#endif + *data = mask; + return mask; +} + +/* These operations are both nops. */ +#define prep_zero_mask(val, data, c) (data) +#define create_zero_mask(data) (data) + +/* And this operation just depends on endianness. */ +static inline long find_zero(unsigned long mask) +{ +#ifdef __BIG_ENDIAN + return __builtin_clzl(mask) >> 3; +#else + return __builtin_ctzl(mask) >> 3; +#endif +} + +#endif /* _ASM_WORD_AT_A_TIME_H */ diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h index e0e6af4e783b..f10b332b3b65 100644 --- a/arch/tile/include/hv/hypervisor.h +++ b/arch/tile/include/hv/hypervisor.h @@ -321,8 +321,11 @@ /** hv_console_set_ipi */ #define HV_DISPATCH_CONSOLE_SET_IPI 63 +/** hv_send_nmi */ +#define HV_DISPATCH_SEND_NMI 65 + /** One more than the largest dispatch value */ -#define _HV_DISPATCH_END 64 +#define _HV_DISPATCH_END 66 #ifndef __ASSEMBLER__ @@ -1253,6 +1256,11 @@ void hv_downcall_dispatch(void); #define INT_DMATLB_ACCESS_DWNCL INT_DMA_CPL /** Device interrupt downcall interrupt vector */ #define INT_DEV_INTR_DWNCL INT_WORLD_ACCESS +/** NMI downcall interrupt vector */ +#define INT_NMI_DWNCL 64 + +#define HV_NMI_FLAG_FORCE 0x1 /**< Force an NMI downcall regardless of + the ICS bit of the client. */ #ifndef __ASSEMBLER__ @@ -1780,6 +1788,56 @@ int hv_dev_poll(int devhdl, __hv32 events, HV_IntArg intarg); int hv_dev_poll_cancel(int devhdl); +/** NMI information */ +typedef struct +{ + /** Result: negative error, or HV_NMI_RESULT_xxx. */ + int result; + + /** PC from interrupted remote core (if result != HV_NMI_RESULT_FAIL_HV). */ + HV_VirtAddr pc; + +} HV_NMI_Info; + +/** NMI issued successfully. */ +#define HV_NMI_RESULT_OK 0 + +/** NMI not issued: remote tile running at client PL with ICS set. */ +#define HV_NMI_RESULT_FAIL_ICS 1 + +/** NMI not issued: remote tile waiting in hypervisor. */ +#define HV_NMI_RESULT_FAIL_HV 2 + +/** Force an NMI downcall regardless of the ICS bit of the client. */ +#define HV_NMI_FLAG_FORCE 0x1 + +/** Send an NMI interrupt request to a particular tile. + * + * This will cause the NMI to be issued on the remote tile regardless + * of the state of the client interrupt mask. However, if the remote + * tile is in the hypervisor, it will not execute the NMI, and + * HV_NMI_RESULT_FAIL_HV will be returned. Similarly, if the remote + * tile is in a client interrupt critical section at the time of the + * NMI, it will not execute the NMI, and HV_NMI_RESULT_FAIL_ICS will + * be returned. In this second case, however, if HV_NMI_FLAG_FORCE + * is set in flags, then the remote tile will enter its NMI interrupt + * vector regardless. Forcing the NMI vector during an interrupt + * critical section will mean that the client can not safely continue + * execution after handling the interrupt. + * + * @param tile Tile to which the NMI request is sent. + * @param info NMI information which is defined by and interpreted by the + * supervisor, is passed to the specified tile, and is + * stored in the SPR register SYSTEM_SAVE_{CLIENT_PL}_2 on the + * specified tile when entering the NMI handler routine. + * Typically, this parameter stores the NMI type, or an aligned + * VA plus some special bits, etc. + * @param flags Flags (HV_NMI_FLAG_xxx). + * @return Information about the requested NMI. + */ +HV_NMI_Info hv_send_nmi(HV_Coord tile, unsigned long info, __hv64 flags); + + /** Scatter-gather list for preada/pwritea calls. */ typedef struct #if CHIP_VA_WIDTH() <= 32 diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S index 3d9175992a20..670a3569450f 100644 --- a/arch/tile/kernel/entry.S +++ b/arch/tile/kernel/entry.S @@ -27,13 +27,6 @@ STD_ENTRY(current_text_addr) { move r0, lr; jrp lr } STD_ENDPROC(current_text_addr) -STD_ENTRY(dump_stack) - { move r2, lr; lnk r1 } - { move r4, r52; addli r1, r1, dump_stack - . } - { move r3, sp; j _dump_stack } - jrp lr /* keep backtracer happy */ - STD_ENDPROC(dump_stack) - STD_ENTRY(KBacktraceIterator_init_current) { move r2, lr; lnk r1 } { move r4, r52; addli r1, r1, KBacktraceIterator_init_current - . } diff --git a/arch/tile/kernel/hvglue.S b/arch/tile/kernel/hvglue.S index 2ab456622391..d78ee2ad610c 100644 --- a/arch/tile/kernel/hvglue.S +++ b/arch/tile/kernel/hvglue.S @@ -71,4 +71,5 @@ gensym hv_flush_all, 0x6e0, 32 gensym hv_get_ipi_pte, 0x700, 32 gensym hv_set_pte_super_shift, 0x720, 32 gensym hv_console_set_ipi, 0x7e0, 32 -gensym hv_glue_internals, 0x800, 30720 +gensym hv_send_nmi, 0x820, 32 +gensym hv_glue_internals, 0x820, 30688 diff --git a/arch/tile/kernel/hvglue_trace.c b/arch/tile/kernel/hvglue_trace.c index 85c74ad29312..add0d71395c6 100644 --- a/arch/tile/kernel/hvglue_trace.c +++ b/arch/tile/kernel/hvglue_trace.c @@ -75,6 +75,7 @@ #define hv_get_ipi_pte _hv_get_ipi_pte #define hv_set_pte_super_shift _hv_set_pte_super_shift #define hv_console_set_ipi _hv_console_set_ipi +#define hv_send_nmi _hv_send_nmi #include <hv/hypervisor.h> #undef hv_init #undef hv_install_context @@ -134,6 +135,7 @@ #undef hv_get_ipi_pte #undef hv_set_pte_super_shift #undef hv_console_set_ipi +#undef hv_send_nmi /* * Provide macros based on <linux/syscalls.h> to provide a wrapper @@ -264,3 +266,5 @@ HV_WRAP9(int, hv_flush_remote, HV_PhysAddr, cache_pa, HV_VirtAddr, tlb_va, unsigned long, tlb_length, unsigned long, tlb_pgsize, unsigned long*, tlb_cpumask, HV_Remote_ASID*, asids, int, asidcount) +HV_WRAP3(HV_NMI_Info, hv_send_nmi, HV_Coord, tile, unsigned long, info, + __hv64, flags) diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index 5b67efcecabd..800b91d3f9dc 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -515,6 +515,10 @@ intvec_\vecname: .ifc \c_routine, handle_perf_interrupt mfspr r2, AUX_PERF_COUNT_STS .endif + .ifc \c_routine, do_nmi + mfspr r2, SPR_SYSTEM_SAVE_K_2 /* nmi type */ + .else + .endif .endif .endif .endif @@ -1571,3 +1575,5 @@ intrpt_start: /* Synthetic interrupt delivered only by the simulator */ int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint + /* Synthetic interrupt delivered by hv */ + int_hand INT_NMI_DWNCL, NMI_DWNCL, do_nmi, handle_nmi diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index b403c2e3e263..a45213781ad0 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -27,6 +27,7 @@ #include <linux/kernel.h> #include <linux/tracehook.h> #include <linux/signal.h> +#include <linux/delay.h> #include <linux/context_tracking.h> #include <asm/stack.h> #include <asm/switch_to.h> @@ -132,7 +133,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, (CALLEE_SAVED_REGS_COUNT - 2) * sizeof(unsigned long)); callee_regs[0] = sp; /* r30 = function */ callee_regs[1] = arg; /* r31 = arg */ - childregs->ex1 = PL_ICS_EX1(KERNEL_PL, 0); p->thread.pc = (unsigned long) ret_from_kernel_thread; return 0; } @@ -546,31 +546,141 @@ void exit_thread(void) #endif } -void show_regs(struct pt_regs *regs) +void tile_show_regs(struct pt_regs *regs) { - struct task_struct *tsk = validate_current(); int i; - - if (tsk != &corrupt_current) - show_regs_print_info(KERN_ERR); #ifdef __tilegx__ for (i = 0; i < 17; i++) - pr_err(" r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT "\n", + pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT" r%-2d: "REGFMT"\n", i, regs->regs[i], i+18, regs->regs[i+18], i+36, regs->regs[i+36]); - pr_err(" r17: " REGFMT " r35: " REGFMT " tp : " REGFMT "\n", + pr_err(" r17: "REGFMT" r35: "REGFMT" tp : "REGFMT"\n", regs->regs[17], regs->regs[35], regs->tp); - pr_err(" sp : " REGFMT " lr : " REGFMT "\n", regs->sp, regs->lr); + pr_err(" sp : "REGFMT" lr : "REGFMT"\n", regs->sp, regs->lr); #else for (i = 0; i < 13; i++) - pr_err(" r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT "\n", + pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT + " r%-2d: "REGFMT" r%-2d: "REGFMT"\n", i, regs->regs[i], i+14, regs->regs[i+14], i+27, regs->regs[i+27], i+40, regs->regs[i+40]); - pr_err(" r13: " REGFMT " tp : " REGFMT " sp : " REGFMT " lr : " REGFMT "\n", + pr_err(" r13: "REGFMT" tp : "REGFMT" sp : "REGFMT" lr : "REGFMT"\n", regs->regs[13], regs->tp, regs->sp, regs->lr); #endif - pr_err(" pc : " REGFMT " ex1: %ld faultnum: %ld\n", - regs->pc, regs->ex1, regs->faultnum); + pr_err(" pc : "REGFMT" ex1: %ld faultnum: %ld flags:%s%s%s%s\n", + regs->pc, regs->ex1, regs->faultnum, + is_compat_task() ? " compat" : "", + (regs->flags & PT_FLAGS_DISABLE_IRQ) ? " noirq" : "", + !(regs->flags & PT_FLAGS_CALLER_SAVES) ? " nocallersave" : "", + (regs->flags & PT_FLAGS_RESTORE_REGS) ? " restoreregs" : ""); +} + +void show_regs(struct pt_regs *regs) +{ + struct KBacktraceIterator kbt; + + show_regs_print_info(KERN_DEFAULT); + tile_show_regs(regs); + + KBacktraceIterator_init(&kbt, NULL, regs); + tile_show_stack(&kbt); +} + +/* To ensure stack dump on tiles occurs one by one. */ +static DEFINE_SPINLOCK(backtrace_lock); +/* To ensure no backtrace occurs before all of the stack dump are done. */ +static atomic_t backtrace_cpus; +/* The cpu mask to avoid reentrance. */ +static struct cpumask backtrace_mask; - dump_stack_regs(regs); +void do_nmi_dump_stack(struct pt_regs *regs) +{ + int is_idle = is_idle_task(current) && !in_interrupt(); + int cpu; + + nmi_enter(); + cpu = smp_processor_id(); + if (WARN_ON_ONCE(!cpumask_test_and_clear_cpu(cpu, &backtrace_mask))) + goto done; + + spin_lock(&backtrace_lock); + if (is_idle) + pr_info("CPU: %d idle\n", cpu); + else + show_regs(regs); + spin_unlock(&backtrace_lock); + atomic_dec(&backtrace_cpus); +done: + nmi_exit(); +} + +#ifdef __tilegx__ +void arch_trigger_all_cpu_backtrace(bool self) +{ + struct cpumask mask; + HV_Coord tile; + unsigned int timeout; + int cpu; + int ongoing; + HV_NMI_Info info[NR_CPUS]; + + ongoing = atomic_cmpxchg(&backtrace_cpus, 0, num_online_cpus() - 1); + if (ongoing != 0) { + pr_err("Trying to do all-cpu backtrace.\n"); + pr_err("But another all-cpu backtrace is ongoing (%d cpus left)\n", + ongoing); + if (self) { + pr_err("Reporting the stack on this cpu only.\n"); + dump_stack(); + } + return; + } + + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + cpumask_copy(&backtrace_mask, &mask); + + /* Backtrace for myself first. */ + if (self) + dump_stack(); + + /* Tentatively dump stack on remote tiles via NMI. */ + timeout = 100; + while (!cpumask_empty(&mask) && timeout) { + for_each_cpu(cpu, &mask) { + tile.x = cpu_x(cpu); + tile.y = cpu_y(cpu); + info[cpu] = hv_send_nmi(tile, TILE_NMI_DUMP_STACK, 0); + if (info[cpu].result == HV_NMI_RESULT_OK) + cpumask_clear_cpu(cpu, &mask); + } + + mdelay(10); + timeout--; + } + + /* Warn about cpus stuck in ICS and decrement their counts here. */ + if (!cpumask_empty(&mask)) { + for_each_cpu(cpu, &mask) { + switch (info[cpu].result) { + case HV_NMI_RESULT_FAIL_ICS: + pr_warn("Skipping stack dump of cpu %d in ICS at pc %#llx\n", + cpu, info[cpu].pc); + break; + case HV_NMI_RESULT_FAIL_HV: + pr_warn("Skipping stack dump of cpu %d in hypervisor\n", + cpu); + break; + case HV_ENOSYS: + pr_warn("Hypervisor too old to allow remote stack dumps.\n"); + goto skip_for_each; + default: /* should not happen */ + pr_warn("Skipping stack dump of cpu %d [%d,%#llx]\n", + cpu, info[cpu].result, info[cpu].pc); + break; + } + } +skip_for_each: + atomic_sub(cpumask_weight(&mask), &backtrace_cpus); + } } +#endif /* __tilegx_ */ diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index d366675e4bf8..99c9ff87e018 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -71,7 +71,7 @@ static unsigned long __initdata node_percpu[MAX_NUMNODES]; * per-CPU stack and boot info. */ DEFINE_PER_CPU(unsigned long, boot_sp) = - (unsigned long)init_stack + THREAD_SIZE; + (unsigned long)init_stack + THREAD_SIZE - STACK_TOP_DELTA; #ifdef CONFIG_SMP DEFINE_PER_CPU(unsigned long, boot_pc) = (unsigned long)start_kernel; diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index c42dce50acd8..35d34635e4f1 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -23,6 +23,7 @@ #include <linux/mmzone.h> #include <linux/dcache.h> #include <linux/fs.h> +#include <linux/hardirq.h> #include <linux/string.h> #include <asm/backtrace.h> #include <asm/page.h> @@ -109,7 +110,7 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) if (kbt->verbose) pr_err(" <%s while in user mode>\n", fault); } else { - if (kbt->verbose) + if (kbt->verbose && (p->pc != 0 || p->sp != 0 || p->ex1 != 0)) pr_err(" (odd fault: pc %#lx, sp %#lx, ex1 %#lx?)\n", p->pc, p->sp, p->ex1); return NULL; @@ -119,10 +120,12 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) return p; } -/* Is the pc pointing to a sigreturn trampoline? */ -static int is_sigreturn(unsigned long pc) +/* Is the iterator pointing to a sigreturn trampoline? */ +static int is_sigreturn(struct KBacktraceIterator *kbt) { - return current->mm && (pc == VDSO_SYM(&__vdso_rt_sigreturn)); + return kbt->task->mm && + (kbt->it.pc == ((ulong)kbt->task->mm->context.vdso_base + + (ulong)&__vdso_rt_sigreturn)); } /* Return a pt_regs pointer for a valid signal handler frame */ @@ -131,7 +134,7 @@ static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt, { BacktraceIterator *b = &kbt->it; - if (is_sigreturn(b->pc) && b->sp < PAGE_OFFSET && + if (is_sigreturn(kbt) && b->sp < PAGE_OFFSET && b->sp % sizeof(long) == 0) { int retval; pagefault_disable(); @@ -151,11 +154,6 @@ static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt, return NULL; } -static int KBacktraceIterator_is_sigreturn(struct KBacktraceIterator *kbt) -{ - return is_sigreturn(kbt->it.pc); -} - static int KBacktraceIterator_restart(struct KBacktraceIterator *kbt) { struct pt_regs *p; @@ -178,7 +176,7 @@ static int KBacktraceIterator_next_item_inclusive( { for (;;) { do { - if (!KBacktraceIterator_is_sigreturn(kbt)) + if (!is_sigreturn(kbt)) return KBT_ONGOING; } while (backtrace_next(&kbt->it)); @@ -357,51 +355,50 @@ static void describe_addr(struct KBacktraceIterator *kbt, */ static bool start_backtrace(void) { - if (current->thread.in_backtrace) { + if (current_thread_info()->in_backtrace) { pr_err("Backtrace requested while in backtrace!\n"); return false; } - current->thread.in_backtrace = true; + current_thread_info()->in_backtrace = true; return true; } static void end_backtrace(void) { - current->thread.in_backtrace = false; + current_thread_info()->in_backtrace = false; } /* * This method wraps the backtracer's more generic support. * It is only invoked from the architecture-specific code; show_stack() - * and dump_stack() (in entry.S) are architecture-independent entry points. + * and dump_stack() are architecture-independent entry points. */ -void tile_show_stack(struct KBacktraceIterator *kbt, int headers) +void tile_show_stack(struct KBacktraceIterator *kbt) { int i; int have_mmap_sem = 0; if (!start_backtrace()) return; - if (headers) { - /* - * Add a blank line since if we are called from panic(), - * then bust_spinlocks() spit out a space in front of us - * and it will mess up our KERN_ERR. - */ - pr_err("Starting stack dump of tid %d, pid %d (%s) on cpu %d at cycle %lld\n", - kbt->task->pid, kbt->task->tgid, kbt->task->comm, - raw_smp_processor_id(), get_cycles()); - } kbt->verbose = 1; i = 0; for (; !KBacktraceIterator_end(kbt); KBacktraceIterator_next(kbt)) { char namebuf[KSYM_NAME_LEN+100]; unsigned long address = kbt->it.pc; - /* Try to acquire the mmap_sem as we pass into userspace. */ - if (address < PAGE_OFFSET && !have_mmap_sem && kbt->task->mm) + /* + * Try to acquire the mmap_sem as we pass into userspace. + * If we're in an interrupt context, don't even try, since + * it's not safe to call e.g. d_path() from an interrupt, + * since it uses spin locks without disabling interrupts. + * Note we test "kbt->task == current", not "kbt->is_current", + * since we're checking that "current" will work in d_path(). + */ + if (kbt->task == current && address < PAGE_OFFSET && + !have_mmap_sem && kbt->task->mm && !in_interrupt()) { have_mmap_sem = down_read_trylock(&kbt->task->mm->mmap_sem); + } describe_addr(kbt, address, have_mmap_sem, namebuf, sizeof(namebuf)); @@ -416,24 +413,12 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) } if (kbt->end == KBT_LOOP) pr_err("Stack dump stopped; next frame identical to this one\n"); - if (headers) - pr_err("Stack dump complete\n"); if (have_mmap_sem) up_read(&kbt->task->mm->mmap_sem); end_backtrace(); } EXPORT_SYMBOL(tile_show_stack); - -/* This is called from show_regs() and _dump_stack() */ -void dump_stack_regs(struct pt_regs *regs) -{ - struct KBacktraceIterator kbt; - KBacktraceIterator_init(&kbt, NULL, regs); - tile_show_stack(&kbt, 1); -} -EXPORT_SYMBOL(dump_stack_regs); - static struct pt_regs *regs_to_pt_regs(struct pt_regs *regs, ulong pc, ulong lr, ulong sp, ulong r52) { @@ -445,11 +430,15 @@ static struct pt_regs *regs_to_pt_regs(struct pt_regs *regs, return regs; } -/* This is called from dump_stack() and just converts to pt_regs */ +/* Deprecated function currently only used by kernel_double_fault(). */ void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52) { + struct KBacktraceIterator kbt; struct pt_regs regs; - dump_stack_regs(regs_to_pt_regs(®s, pc, lr, sp, r52)); + + regs_to_pt_regs(®s, pc, lr, sp, r52); + KBacktraceIterator_init(&kbt, NULL, ®s); + tile_show_stack(&kbt); } /* This is called from KBacktraceIterator_init_current() */ @@ -461,22 +450,30 @@ void _KBacktraceIterator_init_current(struct KBacktraceIterator *kbt, ulong pc, regs_to_pt_regs(®s, pc, lr, sp, r52)); } -/* This is called only from kernel/sched/core.c, with esp == NULL */ +/* + * Called from sched_show_task() with task != NULL, or dump_stack() + * with task == NULL. The esp argument is always NULL. + */ void show_stack(struct task_struct *task, unsigned long *esp) { struct KBacktraceIterator kbt; - if (task == NULL || task == current) + if (task == NULL || task == current) { KBacktraceIterator_init_current(&kbt); - else + KBacktraceIterator_next(&kbt); /* don't show first frame */ + } else { KBacktraceIterator_init(&kbt, task, NULL); - tile_show_stack(&kbt, 0); + } + tile_show_stack(&kbt); } #ifdef CONFIG_STACKTRACE /* Support generic Linux stack API too */ -void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace) +static void save_stack_trace_common(struct task_struct *task, + struct pt_regs *regs, + bool user, + struct stack_trace *trace) { struct KBacktraceIterator kbt; int skip = trace->skip; @@ -484,31 +481,57 @@ void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace) if (!start_backtrace()) goto done; - if (task == NULL || task == current) + if (regs != NULL) { + KBacktraceIterator_init(&kbt, NULL, regs); + } else if (task == NULL || task == current) { KBacktraceIterator_init_current(&kbt); - else + skip++; /* don't show KBacktraceIterator_init_current */ + } else { KBacktraceIterator_init(&kbt, task, NULL); + } for (; !KBacktraceIterator_end(&kbt); KBacktraceIterator_next(&kbt)) { if (skip) { --skip; continue; } - if (i >= trace->max_entries || kbt.it.pc < PAGE_OFFSET) + if (i >= trace->max_entries || + (!user && kbt.it.pc < PAGE_OFFSET)) break; trace->entries[i++] = kbt.it.pc; } end_backtrace(); done: + if (i < trace->max_entries) + trace->entries[i++] = ULONG_MAX; trace->nr_entries = i; } + +void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace) +{ + save_stack_trace_common(task, NULL, false, trace); +} EXPORT_SYMBOL(save_stack_trace_tsk); void save_stack_trace(struct stack_trace *trace) { - save_stack_trace_tsk(NULL, trace); + save_stack_trace_common(NULL, NULL, false, trace); } EXPORT_SYMBOL_GPL(save_stack_trace); +void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) +{ + save_stack_trace_common(NULL, regs, false, trace); +} + +void save_stack_trace_user(struct stack_trace *trace) +{ + /* Trace user stack if we are not a kernel thread. */ + if (current->mm) + save_stack_trace_common(NULL, task_pt_regs(current), + true, trace); + else if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} #endif /* In entry.S */ diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index 312fc134c1cb..0011a9ff0525 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -395,6 +395,21 @@ done: exception_exit(prev_state); } +void do_nmi(struct pt_regs *regs, int fault_num, unsigned long reason) +{ + switch (reason) { + case TILE_NMI_DUMP_STACK: + do_nmi_dump_stack(regs); + break; + default: + panic("Unexpected do_nmi type %ld", reason); + return; + } +} + +/* Deprecated function currently only used here. */ +extern void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52); + void kernel_double_fault(int dummy, ulong pc, ulong lr, ulong sp, ulong r52) { _dump_stack(dummy, pc, lr, sp, r52); diff --git a/arch/tile/kernel/vdso/vgettimeofday.c b/arch/tile/kernel/vdso/vgettimeofday.c index 8bb21eda07d8..e63310c49742 100644 --- a/arch/tile/kernel/vdso/vgettimeofday.c +++ b/arch/tile/kernel/vdso/vgettimeofday.c @@ -67,7 +67,7 @@ static inline int do_realtime(struct vdso_data *vdso, struct timespec *ts) u64 ns; do { - count = read_seqcount_begin(&vdso->tb_seq); + count = raw_read_seqcount_begin(&vdso->tb_seq); ts->tv_sec = vdso->wall_time_sec; ns = vdso->wall_time_snsec; ns += vgetsns(vdso); @@ -86,7 +86,7 @@ static inline int do_monotonic(struct vdso_data *vdso, struct timespec *ts) u64 ns; do { - count = read_seqcount_begin(&vdso->tb_seq); + count = raw_read_seqcount_begin(&vdso->tb_seq); ts->tv_sec = vdso->monotonic_time_sec; ns = vdso->monotonic_time_snsec; ns += vgetsns(vdso); @@ -105,7 +105,7 @@ static inline int do_realtime_coarse(struct vdso_data *vdso, unsigned count; do { - count = read_seqcount_begin(&vdso->tb_seq); + count = raw_read_seqcount_begin(&vdso->tb_seq); ts->tv_sec = vdso->wall_time_coarse_sec; ts->tv_nsec = vdso->wall_time_coarse_nsec; } while (unlikely(read_seqcount_retry(&vdso->tb_seq, count))); @@ -119,7 +119,7 @@ static inline int do_monotonic_coarse(struct vdso_data *vdso, unsigned count; do { - count = read_seqcount_begin(&vdso->tb_seq); + count = raw_read_seqcount_begin(&vdso->tb_seq); ts->tv_sec = vdso->monotonic_time_coarse_sec; ts->tv_nsec = vdso->monotonic_time_coarse_nsec; } while (unlikely(read_seqcount_retry(&vdso->tb_seq, count))); @@ -137,7 +137,7 @@ struct syscall_return_value __vdso_gettimeofday(struct timeval *tv, /* The use of the timezone is obsolete, normally tz is NULL. */ if (unlikely(tz != NULL)) { do { - count = read_seqcount_begin(&vdso->tz_seq); + count = raw_read_seqcount_begin(&vdso->tz_seq); tz->tz_minuteswest = vdso->tz_minuteswest; tz->tz_dsttime = vdso->tz_dsttime; } while (unlikely(read_seqcount_retry(&vdso->tz_seq, count))); diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c index 82733c87d67e..9d171ca4302c 100644 --- a/arch/tile/lib/exports.c +++ b/arch/tile/lib/exports.c @@ -18,8 +18,6 @@ /* arch/tile/lib/usercopy.S */ #include <linux/uaccess.h> -EXPORT_SYMBOL(strnlen_user_asm); -EXPORT_SYMBOL(strncpy_from_user_asm); EXPORT_SYMBOL(clear_user_asm); EXPORT_SYMBOL(flush_user_asm); EXPORT_SYMBOL(finv_user_asm); @@ -28,7 +26,6 @@ EXPORT_SYMBOL(finv_user_asm); #include <linux/kernel.h> #include <asm/processor.h> EXPORT_SYMBOL(current_text_addr); -EXPORT_SYMBOL(dump_stack); /* arch/tile/kernel/head.S */ EXPORT_SYMBOL(empty_zero_page); diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c index b34f79aada48..88c2a53362e7 100644 --- a/arch/tile/lib/spinlock_32.c +++ b/arch/tile/lib/spinlock_32.c @@ -65,8 +65,17 @@ EXPORT_SYMBOL(arch_spin_trylock); void arch_spin_unlock_wait(arch_spinlock_t *lock) { u32 iterations = 0; - while (arch_spin_is_locked(lock)) + int curr = READ_ONCE(lock->current_ticket); + int next = READ_ONCE(lock->next_ticket); + + /* Return immediately if unlocked. */ + if (next == curr) + return; + + /* Wait until the current locker has released the lock. */ + do { delay_backoff(iterations++); + } while (READ_ONCE(lock->current_ticket) == curr); } EXPORT_SYMBOL(arch_spin_unlock_wait); diff --git a/arch/tile/lib/spinlock_64.c b/arch/tile/lib/spinlock_64.c index d6fb9581e980..c8d1f94ff1fe 100644 --- a/arch/tile/lib/spinlock_64.c +++ b/arch/tile/lib/spinlock_64.c @@ -65,8 +65,17 @@ EXPORT_SYMBOL(arch_spin_trylock); void arch_spin_unlock_wait(arch_spinlock_t *lock) { u32 iterations = 0; - while (arch_spin_is_locked(lock)) + u32 val = READ_ONCE(lock->lock); + u32 curr = arch_spin_current(val); + + /* Return immediately if unlocked. */ + if (arch_spin_next(val) == curr) + return; + + /* Wait until the current locker has released the lock. */ + do { delay_backoff(iterations++); + } while (arch_spin_current(READ_ONCE(lock->lock)) == curr); } EXPORT_SYMBOL(arch_spin_unlock_wait); diff --git a/arch/tile/lib/usercopy_32.S b/arch/tile/lib/usercopy_32.S index 1bc162224638..db93ad5fae25 100644 --- a/arch/tile/lib/usercopy_32.S +++ b/arch/tile/lib/usercopy_32.S @@ -20,52 +20,6 @@ /* Access user memory, but use MMU to avoid propagating kernel exceptions. */ /* - * strnlen_user_asm takes the pointer in r0, and the length bound in r1. - * It returns the length, including the terminating NUL, or zero on exception. - * If length is greater than the bound, returns one plus the bound. - */ -STD_ENTRY(strnlen_user_asm) - { bz r1, 2f; addi r3, r0, -1 } /* bias down to include NUL */ -1: { lb_u r4, r0; addi r1, r1, -1 } - bz r4, 2f - { bnzt r1, 1b; addi r0, r0, 1 } -2: { sub r0, r0, r3; jrp lr } - STD_ENDPROC(strnlen_user_asm) - .pushsection .fixup,"ax" -strnlen_user_fault: - { move r0, zero; jrp lr } - ENDPROC(strnlen_user_fault) - .section __ex_table,"a" - .align 4 - .word 1b, strnlen_user_fault - .popsection - -/* - * strncpy_from_user_asm takes the kernel target pointer in r0, - * the userspace source pointer in r1, and the length bound (including - * the trailing NUL) in r2. On success, it returns the string length - * (not including the trailing NUL), or -EFAULT on failure. - */ -STD_ENTRY(strncpy_from_user_asm) - { bz r2, 2f; move r3, r0 } -1: { lb_u r4, r1; addi r1, r1, 1; addi r2, r2, -1 } - { sb r0, r4; addi r0, r0, 1 } - bz r4, 2f - bnzt r2, 1b - { sub r0, r0, r3; jrp lr } -2: addi r0, r0, -1 /* don't count the trailing NUL */ - { sub r0, r0, r3; jrp lr } - STD_ENDPROC(strncpy_from_user_asm) - .pushsection .fixup,"ax" -strncpy_from_user_fault: - { movei r0, -EFAULT; jrp lr } - ENDPROC(strncpy_from_user_fault) - .section __ex_table,"a" - .align 4 - .word 1b, strncpy_from_user_fault - .popsection - -/* * clear_user_asm takes the user target address in r0 and the * number of bytes to zero in r1. * It returns the number of uncopiable bytes (hopefully zero) in r0. diff --git a/arch/tile/lib/usercopy_64.S b/arch/tile/lib/usercopy_64.S index b3b31a3306f8..9322dc551e91 100644 --- a/arch/tile/lib/usercopy_64.S +++ b/arch/tile/lib/usercopy_64.S @@ -20,52 +20,6 @@ /* Access user memory, but use MMU to avoid propagating kernel exceptions. */ /* - * strnlen_user_asm takes the pointer in r0, and the length bound in r1. - * It returns the length, including the terminating NUL, or zero on exception. - * If length is greater than the bound, returns one plus the bound. - */ -STD_ENTRY(strnlen_user_asm) - { beqz r1, 2f; addi r3, r0, -1 } /* bias down to include NUL */ -1: { ld1u r4, r0; addi r1, r1, -1 } - beqz r4, 2f - { bnezt r1, 1b; addi r0, r0, 1 } -2: { sub r0, r0, r3; jrp lr } - STD_ENDPROC(strnlen_user_asm) - .pushsection .fixup,"ax" -strnlen_user_fault: - { move r0, zero; jrp lr } - ENDPROC(strnlen_user_fault) - .section __ex_table,"a" - .align 8 - .quad 1b, strnlen_user_fault - .popsection - -/* - * strncpy_from_user_asm takes the kernel target pointer in r0, - * the userspace source pointer in r1, and the length bound (including - * the trailing NUL) in r2. On success, it returns the string length - * (not including the trailing NUL), or -EFAULT on failure. - */ -STD_ENTRY(strncpy_from_user_asm) - { beqz r2, 2f; move r3, r0 } -1: { ld1u r4, r1; addi r1, r1, 1; addi r2, r2, -1 } - { st1 r0, r4; addi r0, r0, 1 } - beqz r4, 2f - bnezt r2, 1b - { sub r0, r0, r3; jrp lr } -2: addi r0, r0, -1 /* don't count the trailing NUL */ - { sub r0, r0, r3; jrp lr } - STD_ENDPROC(strncpy_from_user_asm) - .pushsection .fixup,"ax" -strncpy_from_user_fault: - { movei r0, -EFAULT; jrp lr } - ENDPROC(strncpy_from_user_fault) - .section __ex_table,"a" - .align 8 - .quad 1b, strncpy_from_user_fault - .popsection - -/* * clear_user_asm takes the user target address in r0 and the * number of bytes to zero in r1. * It returns the number of uncopiable bytes (hopefully zero) in r0. diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 3f4f58d34a92..13eac59bf16a 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -699,11 +699,10 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num, * interrupt away appropriately and return immediately. We can't do * page faults for user code while in kernel mode. */ -void do_page_fault(struct pt_regs *regs, int fault_num, - unsigned long address, unsigned long write) +static inline void __do_page_fault(struct pt_regs *regs, int fault_num, + unsigned long address, unsigned long write) { int is_page_fault; - enum ctx_state prev_state = exception_enter(); #ifdef CONFIG_KPROBES /* @@ -713,7 +712,7 @@ void do_page_fault(struct pt_regs *regs, int fault_num, */ if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1, regs->faultnum, SIGSEGV) == NOTIFY_STOP) - goto done; + return; #endif #ifdef __tilegx__ @@ -835,18 +834,22 @@ void do_page_fault(struct pt_regs *regs, int fault_num, async->is_fault = is_page_fault; async->is_write = write; async->address = address; - goto done; + return; } } #endif handle_page_fault(regs, fault_num, is_page_fault, address, write); +} -done: +void do_page_fault(struct pt_regs *regs, int fault_num, + unsigned long address, unsigned long write) +{ + enum ctx_state prev_state = exception_enter(); + __do_page_fault(regs, fault_num, address, write); exception_exit(prev_state); } - #if CHIP_HAS_TILE_DMA() /* * This routine effectively re-issues asynchronous page faults diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c index 9b90fdc4b151..f6b911cc3923 100644 --- a/arch/um/drivers/hostaudio_kern.c +++ b/arch/um/drivers/hostaudio_kern.c @@ -185,9 +185,9 @@ static int hostaudio_open(struct inode *inode, struct file *file) int ret; #ifdef DEBUG - kparam_block_sysfs_write(dsp); + kernel_param_lock(THIS_MODULE); printk(KERN_DEBUG "hostaudio: open called (host: %s)\n", dsp); - kparam_unblock_sysfs_write(dsp); + kernel_param_unlock(THIS_MODULE); #endif state = kmalloc(sizeof(struct hostaudio_state), GFP_KERNEL); @@ -199,11 +199,11 @@ static int hostaudio_open(struct inode *inode, struct file *file) if (file->f_mode & FMODE_WRITE) w = 1; - kparam_block_sysfs_write(dsp); + kernel_param_lock(THIS_MODULE); mutex_lock(&hostaudio_mutex); ret = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0); mutex_unlock(&hostaudio_mutex); - kparam_unblock_sysfs_write(dsp); + kernel_param_unlock(THIS_MODULE); if (ret < 0) { kfree(state); @@ -260,17 +260,17 @@ static int hostmixer_open_mixdev(struct inode *inode, struct file *file) if (file->f_mode & FMODE_WRITE) w = 1; - kparam_block_sysfs_write(mixer); + kernel_param_lock(THIS_MODULE); mutex_lock(&hostaudio_mutex); ret = os_open_file(mixer, of_set_rw(OPENFLAGS(), r, w), 0); mutex_unlock(&hostaudio_mutex); - kparam_unblock_sysfs_write(mixer); + kernel_param_unlock(THIS_MODULE); if (ret < 0) { - kparam_block_sysfs_write(dsp); + kernel_param_lock(THIS_MODULE); printk(KERN_ERR "hostaudio_open_mixdev failed to open '%s', " "err = %d\n", dsp, -ret); - kparam_unblock_sysfs_write(dsp); + kernel_param_unlock(THIS_MODULE); kfree(state); return ret; } @@ -326,10 +326,10 @@ MODULE_LICENSE("GPL"); static int __init hostaudio_init_module(void) { - __kernel_param_lock(); + kernel_param_lock(THIS_MODULE); printk(KERN_INFO "UML Audio Relay (host dsp = %s, host mixer = %s)\n", dsp, mixer); - __kernel_param_unlock(); + kernel_param_unlock(THIS_MODULE); module_data.dev_audio = register_sound_dsp(&hostaudio_fops, -1); if (module_data.dev_audio < 0) { diff --git a/arch/x86/configs/xen.config b/arch/x86/configs/xen.config new file mode 100644 index 000000000000..d9fc7139fd46 --- /dev/null +++ b/arch/x86/configs/xen.config @@ -0,0 +1,28 @@ +# global x86 required specific stuff +# On 32-bit HIGHMEM4G is not allowed +CONFIG_HIGHMEM64G=y +CONFIG_64BIT=y + +# These enable us to allow some of the +# not so generic stuff below +CONFIG_HYPERVISOR_GUEST=y +CONFIG_PCI=y +CONFIG_PCI_MSI=y +CONFIG_X86_MCE=y +CONFIG_ACPI_PROCESSOR=y +CONFIG_CPU_FREQ=y + +# x86 xen specific config options +CONFIG_XEN_PVH=y +CONFIG_XEN_MAX_DOMAIN_MEMORY=500 +CONFIG_XEN_SAVE_RESTORE=y +# CONFIG_XEN_DEBUG_FS is not set +CONFIG_XEN_MCE_LOG=y +CONFIG_XEN_ACPI_PROCESSOR=m +# x86 specific backend drivers +CONFIG_XEN_PCIDEV_BACKEND=m +# x86 specific frontend drivers +CONFIG_XEN_PCIDEV_FRONTEND=m +# depends on MEMORY_HOTPLUG, arm64 doesn't enable this yet, +# move to generic config if it ever does. +CONFIG_XEN_BALLOON_MEMORY_HOTPLUG=y diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 2bfc8a7c88c1..dccad38b59a8 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1537,7 +1537,7 @@ static void __exit aesni_exit(void) crypto_fpu_exit(); } -module_init(aesni_init); +late_initcall(aesni_init); module_exit(aesni_exit); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized"); diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index a4f62e6f2db2..03d518e499a6 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -297,7 +297,7 @@ static int mmu_audit_set(const char *val, const struct kernel_param *kp) return 0; } -static struct kernel_param_ops audit_param_ops = { +static const struct kernel_param_ops audit_param_ops = { .set = mmu_audit_set, .get = param_get_bool, }; diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index 7488cafab955..020c101c255f 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -104,7 +104,7 @@ static int param_set_local64(const char *val, const struct kernel_param *kp) return 0; } -static struct kernel_param_ops param_ops_local64 = { +static const struct kernel_param_ops param_ops_local64 = { .get = param_get_local64, .set = param_set_local64, }; diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 55b6f15dac90..dda653ce7b24 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -326,8 +326,8 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, goto out_put_request; } - ret = -EFAULT; - if (blk_fill_sghdr_rq(q, rq, hdr, mode)) + ret = blk_fill_sghdr_rq(q, rq, hdr, mode); + if (ret < 0) goto out_free_cdb; ret = 0; diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index fcb7807ea8b7..10561ce16ed1 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -660,8 +660,10 @@ static int add_region_before(u64 start, u64 end, u8 space_id, return -ENOMEM; error = request_range(start, end, space_id, flags, desc); - if (error) + if (error) { + kfree(reg); return error; + } reg->start = start; reg->end = end; diff --git a/drivers/base/property.c b/drivers/base/property.c index e645852396ba..f3f6d167f3f1 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -129,9 +129,9 @@ EXPORT_SYMBOL_GPL(device_property_present); bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname) { if (is_of_node(fwnode)) - return of_property_read_bool(of_node(fwnode), propname); + return of_property_read_bool(to_of_node(fwnode), propname); else if (is_acpi_node(fwnode)) - return !acpi_dev_prop_get(acpi_node(fwnode), propname, NULL); + return !acpi_dev_prop_get(to_acpi_node(fwnode), propname, NULL); return !!pset_prop_get(to_pset(fwnode), propname); } @@ -286,10 +286,10 @@ EXPORT_SYMBOL_GPL(device_property_read_string); ({ \ int _ret_; \ if (is_of_node(_fwnode_)) \ - _ret_ = OF_DEV_PROP_READ_ARRAY(of_node(_fwnode_), _propname_, \ + _ret_ = OF_DEV_PROP_READ_ARRAY(to_of_node(_fwnode_), _propname_, \ _type_, _val_, _nval_); \ else if (is_acpi_node(_fwnode_)) \ - _ret_ = acpi_dev_prop_read(acpi_node(_fwnode_), _propname_, \ + _ret_ = acpi_dev_prop_read(to_acpi_node(_fwnode_), _propname_, \ _proptype_, _val_, _nval_); \ else \ _ret_ = pset_prop_read_array(to_pset(_fwnode_), _propname_, \ @@ -425,11 +425,11 @@ int fwnode_property_read_string_array(struct fwnode_handle *fwnode, { if (is_of_node(fwnode)) return val ? - of_property_read_string_array(of_node(fwnode), propname, - val, nval) : - of_property_count_strings(of_node(fwnode), propname); + of_property_read_string_array(to_of_node(fwnode), + propname, val, nval) : + of_property_count_strings(to_of_node(fwnode), propname); else if (is_acpi_node(fwnode)) - return acpi_dev_prop_read(acpi_node(fwnode), propname, + return acpi_dev_prop_read(to_acpi_node(fwnode), propname, DEV_PROP_STRING, val, nval); return pset_prop_read_array(to_pset(fwnode), propname, @@ -456,9 +456,9 @@ int fwnode_property_read_string(struct fwnode_handle *fwnode, const char *propname, const char **val) { if (is_of_node(fwnode)) - return of_property_read_string(of_node(fwnode), propname, val); + return of_property_read_string(to_of_node(fwnode), propname, val); else if (is_acpi_node(fwnode)) - return acpi_dev_prop_read(acpi_node(fwnode), propname, + return acpi_dev_prop_read(to_acpi_node(fwnode), propname, DEV_PROP_STRING, val, 1); return -ENXIO; @@ -476,13 +476,13 @@ struct fwnode_handle *device_get_next_child_node(struct device *dev, if (IS_ENABLED(CONFIG_OF) && dev->of_node) { struct device_node *node; - node = of_get_next_available_child(dev->of_node, of_node(child)); + node = of_get_next_available_child(dev->of_node, to_of_node(child)); if (node) return &node->fwnode; } else if (IS_ENABLED(CONFIG_ACPI)) { struct acpi_device *node; - node = acpi_get_next_child(dev, acpi_node(child)); + node = acpi_get_next_child(dev, to_acpi_node(child)); if (node) return acpi_fwnode_handle(node); } @@ -501,7 +501,7 @@ EXPORT_SYMBOL_GPL(device_get_next_child_node); void fwnode_handle_put(struct fwnode_handle *fwnode) { if (is_of_node(fwnode)) - of_node_put(of_node(fwnode)); + of_node_put(to_of_node(fwnode)); } EXPORT_SYMBOL_GPL(fwnode_handle_put); diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 6f9b7534928e..69de41a87b74 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -99,7 +99,7 @@ static int null_set_queue_mode(const char *str, const struct kernel_param *kp) return null_param_store_val(str, &queue_mode, NULL_Q_BIO, NULL_Q_MQ); } -static struct kernel_param_ops null_queue_mode_param_ops = { +static const struct kernel_param_ops null_queue_mode_param_ops = { .set = null_set_queue_mode, .get = param_get_int, }; @@ -127,7 +127,7 @@ static int null_set_irqmode(const char *str, const struct kernel_param *kp) NULL_IRQ_TIMER); } -static struct kernel_param_ops null_irqmode_param_ops = { +static const struct kernel_param_ops null_irqmode_param_ops = { .set = null_set_irqmode, .get = param_get_int, }; diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index e5112714188f..34338d7438f5 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -193,6 +193,13 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, return 0; } +static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) +{ + struct nvme_queue *nvmeq = hctx->driver_data; + + nvmeq->tags = NULL; +} + static int nvme_admin_init_request(void *data, struct request *req, unsigned int hctx_idx, unsigned int rq_idx, unsigned int numa_node) @@ -606,7 +613,10 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, return; } if (req->cmd_type == REQ_TYPE_DRV_PRIV) { - req->errors = status; + if (cmd_rq->ctx == CMD_CTX_CANCELLED) + req->errors = -EINTR; + else + req->errors = status; } else { req->errors = nvme_error_status(status); } @@ -1161,12 +1171,13 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid) int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id) { - struct nvme_command c = { - .identify.opcode = nvme_admin_identify, - .identify.cns = cpu_to_le32(1), - }; + struct nvme_command c = { }; int error; + /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ + c.identify.opcode = nvme_admin_identify; + c.identify.cns = cpu_to_le32(1); + *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL); if (!*id) return -ENOMEM; @@ -1181,12 +1192,13 @@ int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id) int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid, struct nvme_id_ns **id) { - struct nvme_command c = { - .identify.opcode = nvme_admin_identify, - .identify.nsid = cpu_to_le32(nsid), - }; + struct nvme_command c = { }; int error; + /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ + c.identify.opcode = nvme_admin_identify, + c.identify.nsid = cpu_to_le32(nsid), + *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL); if (!*id) return -ENOMEM; @@ -1230,14 +1242,14 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log) { - struct nvme_command c = { - .common.opcode = nvme_admin_get_log_page, - .common.nsid = cpu_to_le32(0xFFFFFFFF), - .common.cdw10[0] = cpu_to_le32( + struct nvme_command c = { }; + int error; + + c.common.opcode = nvme_admin_get_log_page, + c.common.nsid = cpu_to_le32(0xFFFFFFFF), + c.common.cdw10[0] = cpu_to_le32( (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) | NVME_LOG_SMART), - }; - int error; *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL); if (!*log) @@ -1606,6 +1618,7 @@ static struct blk_mq_ops nvme_mq_admin_ops = { .queue_rq = nvme_queue_rq, .map_queue = blk_mq_map_queue, .init_hctx = nvme_admin_init_hctx, + .exit_hctx = nvme_admin_exit_hctx, .init_request = nvme_admin_init_request, .timeout = nvme_timeout, }; @@ -1648,6 +1661,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) } if (!blk_get_queue(dev->admin_q)) { nvme_dev_remove_admin(dev); + dev->admin_q = NULL; return -ENODEV; } } else @@ -2349,19 +2363,20 @@ static int nvme_dev_add(struct nvme_dev *dev) } kfree(ctrl); - dev->tagset.ops = &nvme_mq_ops; - dev->tagset.nr_hw_queues = dev->online_queues - 1; - dev->tagset.timeout = NVME_IO_TIMEOUT; - dev->tagset.numa_node = dev_to_node(dev->dev); - dev->tagset.queue_depth = + if (!dev->tagset.tags) { + dev->tagset.ops = &nvme_mq_ops; + dev->tagset.nr_hw_queues = dev->online_queues - 1; + dev->tagset.timeout = NVME_IO_TIMEOUT; + dev->tagset.numa_node = dev_to_node(dev->dev); + dev->tagset.queue_depth = min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; - dev->tagset.cmd_size = nvme_cmd_size(dev); - dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE; - dev->tagset.driver_data = dev; - - if (blk_mq_alloc_tag_set(&dev->tagset)) - return 0; + dev->tagset.cmd_size = nvme_cmd_size(dev); + dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE; + dev->tagset.driver_data = dev; + if (blk_mq_alloc_tag_set(&dev->tagset)) + return 0; + } schedule_work(&dev->scan_work); return 0; } @@ -2734,8 +2749,10 @@ static void nvme_free_dev(struct kref *kref) put_device(dev->device); nvme_free_namespaces(dev); nvme_release_instance(dev); - blk_mq_free_tag_set(&dev->tagset); - blk_put_queue(dev->admin_q); + if (dev->tagset.tags) + blk_mq_free_tag_set(&dev->tagset); + if (dev->admin_q) + blk_put_queue(dev->admin_q); kfree(dev->queues); kfree(dev->entry); kfree(dev); @@ -2866,6 +2883,9 @@ static int nvme_dev_start(struct nvme_dev *dev) free_tags: nvme_dev_remove_admin(dev); + blk_put_queue(dev->admin_q); + dev->admin_q = NULL; + dev->queues[0]->tags = NULL; disable: nvme_disable_queue(dev, 0); nvme_dev_list_remove(dev); @@ -2907,25 +2927,43 @@ static int nvme_dev_resume(struct nvme_dev *dev) spin_unlock(&dev_list_lock); } else { nvme_unfreeze_queues(dev); - schedule_work(&dev->scan_work); + nvme_dev_add(dev); nvme_set_irq_hints(dev); } return 0; } +static void nvme_dead_ctrl(struct nvme_dev *dev) +{ + dev_warn(dev->dev, "Device failed to resume\n"); + kref_get(&dev->kref); + if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d", + dev->instance))) { + dev_err(dev->dev, + "Failed to start controller remove task\n"); + kref_put(&dev->kref, nvme_free_dev); + } +} + static void nvme_dev_reset(struct nvme_dev *dev) { + bool in_probe = work_busy(&dev->probe_work); + nvme_dev_shutdown(dev); - if (nvme_dev_resume(dev)) { - dev_warn(dev->dev, "Device failed to resume\n"); - kref_get(&dev->kref); - if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d", - dev->instance))) { - dev_err(dev->dev, - "Failed to start controller remove task\n"); - kref_put(&dev->kref, nvme_free_dev); - } + + /* Synchronize with device probe so that work will see failure status + * and exit gracefully without trying to schedule another reset */ + flush_work(&dev->probe_work); + + /* Fail this device if reset occured during probe to avoid + * infinite initialization loops. */ + if (in_probe) { + nvme_dead_ctrl(dev); + return; } + /* Schedule device resume asynchronously so the reset work is available + * to cleanup errors that may occur during reinitialization */ + schedule_work(&dev->probe_work); } static void nvme_reset_failed_dev(struct work_struct *ws) @@ -2957,6 +2995,7 @@ static int nvme_reset(struct nvme_dev *dev) if (!ret) { flush_work(&dev->reset_work); + flush_work(&dev->probe_work); return 0; } @@ -3053,26 +3092,9 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) static void nvme_async_probe(struct work_struct *work) { struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work); - int result; - result = nvme_dev_start(dev); - if (result) - goto reset; - - if (dev->online_queues > 1) - result = nvme_dev_add(dev); - if (result) - goto reset; - - nvme_set_irq_hints(dev); - return; - reset: - spin_lock(&dev_list_lock); - if (!work_busy(&dev->reset_work)) { - dev->reset_workfn = nvme_reset_failed_dev; - queue_work(nvme_workq, &dev->reset_work); - } - spin_unlock(&dev_list_lock); + if (nvme_dev_resume(dev) && !work_busy(&dev->reset_work)) + nvme_dead_ctrl(dev); } static void nvme_reset_notify(struct pci_dev *pdev, bool prepare) @@ -3104,8 +3126,8 @@ static void nvme_remove(struct pci_dev *pdev) flush_work(&dev->reset_work); flush_work(&dev->scan_work); device_remove_file(dev->device, &dev_attr_reset_controller); - nvme_dev_shutdown(dev); nvme_dev_remove(dev); + nvme_dev_shutdown(dev); nvme_dev_remove_admin(dev); device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance)); nvme_free_queues(dev, 0); diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 713fc9ff1149..ced96777b677 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -84,6 +84,13 @@ MODULE_PARM_DESC(max_persistent_grants, "Maximum number of grants to map persistently"); /* + * Maximum order of pages to be used for the shared ring between front and + * backend, 4KB page granularity is used. + */ +unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER; +module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO); +MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); +/* * The LRU mechanism to clean the lists of persistent grants needs to * be executed periodically. The time interval between consecutive executions * of the purge mechanism is set in ms. @@ -729,7 +736,7 @@ static void xen_blkbk_unmap_and_respond(struct pending_req *req) struct grant_page **pages = req->segments; unsigned int invcount; - invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_pages, + invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_segs, req->unmap, req->unmap_pages); work->data = req; @@ -915,7 +922,7 @@ static int xen_blkbk_map_seg(struct pending_req *pending_req) int rc; rc = xen_blkbk_map(pending_req->blkif, pending_req->segments, - pending_req->nr_pages, + pending_req->nr_segs, (pending_req->operation != BLKIF_OP_READ)); return rc; @@ -931,7 +938,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req, int indirect_grefs, rc, n, nseg, i; struct blkif_request_segment *segments = NULL; - nseg = pending_req->nr_pages; + nseg = pending_req->nr_segs; indirect_grefs = INDIRECT_PAGES(nseg); BUG_ON(indirect_grefs > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST); @@ -1251,7 +1258,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, pending_req->id = req->u.rw.id; pending_req->operation = req_operation; pending_req->status = BLKIF_RSP_OKAY; - pending_req->nr_pages = nseg; + pending_req->nr_segs = nseg; if (req->operation != BLKIF_OP_INDIRECT) { preq.dev = req->u.rw.handle; @@ -1372,7 +1379,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, fail_flush: xen_blkbk_unmap(blkif, pending_req->segments, - pending_req->nr_pages); + pending_req->nr_segs); fail_response: /* Haven't submitted any bio's yet. */ make_response(blkif, req->u.rw.id, req_operation, BLKIF_RSP_ERROR); @@ -1438,6 +1445,12 @@ static int __init xen_blkif_init(void) if (!xen_domain()) return -ENODEV; + if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) { + pr_info("Invalid max_ring_order (%d), will use default max: %d.\n", + xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER); + xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER; + } + rc = xen_blkif_interface_init(); if (rc) goto failed_init; diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index f620b5d3f77c..45a044a53d1e 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -44,6 +44,7 @@ #include <xen/interface/io/blkif.h> #include <xen/interface/io/protocols.h> +extern unsigned int xen_blkif_max_ring_order; /* * This is the maximum number of segments that would be allowed in indirect * requests. This value will also be passed to the frontend. @@ -248,7 +249,7 @@ struct backend_info; #define PERSISTENT_GNT_WAS_ACTIVE 1 /* Number of requests that we can fit in a ring */ -#define XEN_BLKIF_REQS 32 +#define XEN_BLKIF_REQS_PER_PAGE 32 struct persistent_gnt { struct page *page; @@ -320,6 +321,7 @@ struct xen_blkif { struct work_struct free_work; /* Thread shutdown wait queue. */ wait_queue_head_t shutdown_wq; + unsigned int nr_ring_pages; }; struct seg_buf { @@ -343,7 +345,7 @@ struct grant_page { struct pending_req { struct xen_blkif *blkif; u64 id; - int nr_pages; + int nr_segs; atomic_t pendcnt; unsigned short operation; int status; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 6ab69ad61ee1..deb3f001791f 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -25,6 +25,7 @@ /* Enlarge the array size in order to fully show blkback name. */ #define BLKBACK_NAME_LEN (20) +#define RINGREF_NAME_LEN (20) struct backend_info { struct xenbus_device *dev; @@ -124,8 +125,6 @@ static void xen_update_blkif_status(struct xen_blkif *blkif) static struct xen_blkif *xen_blkif_alloc(domid_t domid) { struct xen_blkif *blkif; - struct pending_req *req, *n; - int i, j; BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST); @@ -151,55 +150,15 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) INIT_LIST_HEAD(&blkif->pending_free); INIT_WORK(&blkif->free_work, xen_blkif_deferred_free); - - for (i = 0; i < XEN_BLKIF_REQS; i++) { - req = kzalloc(sizeof(*req), GFP_KERNEL); - if (!req) - goto fail; - list_add_tail(&req->free_list, - &blkif->pending_free); - for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { - req->segments[j] = kzalloc(sizeof(*req->segments[0]), - GFP_KERNEL); - if (!req->segments[j]) - goto fail; - } - for (j = 0; j < MAX_INDIRECT_PAGES; j++) { - req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]), - GFP_KERNEL); - if (!req->indirect_pages[j]) - goto fail; - } - } spin_lock_init(&blkif->pending_free_lock); init_waitqueue_head(&blkif->pending_free_wq); init_waitqueue_head(&blkif->shutdown_wq); return blkif; - -fail: - list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) { - list_del(&req->free_list); - for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { - if (!req->segments[j]) - break; - kfree(req->segments[j]); - } - for (j = 0; j < MAX_INDIRECT_PAGES; j++) { - if (!req->indirect_pages[j]) - break; - kfree(req->indirect_pages[j]); - } - kfree(req); - } - - kmem_cache_free(xen_blkif_cachep, blkif); - - return ERR_PTR(-ENOMEM); } -static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref, - unsigned int evtchn) +static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref, + unsigned int nr_grefs, unsigned int evtchn) { int err; @@ -207,7 +166,7 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref, if (blkif->irq) return 0; - err = xenbus_map_ring_valloc(blkif->be->dev, &gref, 1, + err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs, &blkif->blk_ring); if (err < 0) return err; @@ -217,21 +176,21 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref, { struct blkif_sring *sring; sring = (struct blkif_sring *)blkif->blk_ring; - BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); + BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE * nr_grefs); break; } case BLKIF_PROTOCOL_X86_32: { struct blkif_x86_32_sring *sring_x86_32; sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring; - BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE * nr_grefs); break; } case BLKIF_PROTOCOL_X86_64: { struct blkif_x86_64_sring *sring_x86_64; sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring; - BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE * nr_grefs); break; } default: @@ -312,7 +271,7 @@ static void xen_blkif_free(struct xen_blkif *blkif) i++; } - WARN_ON(i != XEN_BLKIF_REQS); + WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages)); kmem_cache_free(xen_blkif_cachep, blkif); } @@ -597,6 +556,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev, if (err) goto fail; + err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", "%u", + xen_blkif_max_ring_order); + if (err) + pr_warn("%s write out 'max-ring-page-order' failed\n", __func__); + err = xenbus_switch_state(dev, XenbusStateInitWait); if (err) goto fail; @@ -860,22 +824,66 @@ again: static int connect_ring(struct backend_info *be) { struct xenbus_device *dev = be->dev; - unsigned long ring_ref; - unsigned int evtchn; + unsigned int ring_ref[XENBUS_MAX_RING_PAGES]; + unsigned int evtchn, nr_grefs, ring_page_order; unsigned int pers_grants; char protocol[64] = ""; - int err; + struct pending_req *req, *n; + int err, i, j; pr_debug("%s %s\n", __func__, dev->otherend); - err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", - &ring_ref, "event-channel", "%u", &evtchn, NULL); - if (err) { - xenbus_dev_fatal(dev, err, - "reading %s/ring-ref and event-channel", + err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u", + &evtchn); + if (err != 1) { + err = -EINVAL; + xenbus_dev_fatal(dev, err, "reading %s/event-channel", dev->otherend); return err; } + pr_info("event-channel %u\n", evtchn); + + err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u", + &ring_page_order); + if (err != 1) { + err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref", + "%u", &ring_ref[0]); + if (err != 1) { + err = -EINVAL; + xenbus_dev_fatal(dev, err, "reading %s/ring-ref", + dev->otherend); + return err; + } + nr_grefs = 1; + pr_info("%s:using single page: ring-ref %d\n", dev->otherend, + ring_ref[0]); + } else { + unsigned int i; + + if (ring_page_order > xen_blkif_max_ring_order) { + err = -EINVAL; + xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d", + dev->otherend, ring_page_order, + xen_blkif_max_ring_order); + return err; + } + + nr_grefs = 1 << ring_page_order; + for (i = 0; i < nr_grefs; i++) { + char ring_ref_name[RINGREF_NAME_LEN]; + + snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i); + err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name, + "%u", &ring_ref[i]); + if (err != 1) { + err = -EINVAL; + xenbus_dev_fatal(dev, err, "reading %s/%s", + dev->otherend, ring_ref_name); + return err; + } + pr_info("ring-ref%u: %u\n", i, ring_ref[i]); + } + } be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT; err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", @@ -900,20 +908,55 @@ static int connect_ring(struct backend_info *be) be->blkif->vbd.feature_gnt_persistent = pers_grants; be->blkif->vbd.overflow_max_grants = 0; + be->blkif->nr_ring_pages = nr_grefs; - pr_info("ring-ref %ld, event-channel %d, protocol %d (%s) %s\n", - ring_ref, evtchn, be->blkif->blk_protocol, protocol, + pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n", + nr_grefs, evtchn, be->blkif->blk_protocol, protocol, pers_grants ? "persistent grants" : ""); + for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) { + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) + goto fail; + list_add_tail(&req->free_list, &be->blkif->pending_free); + for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { + req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL); + if (!req->segments[j]) + goto fail; + } + for (j = 0; j < MAX_INDIRECT_PAGES; j++) { + req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]), + GFP_KERNEL); + if (!req->indirect_pages[j]) + goto fail; + } + } + /* Map the shared frame, irq etc. */ - err = xen_blkif_map(be->blkif, ring_ref, evtchn); + err = xen_blkif_map(be->blkif, ring_ref, nr_grefs, evtchn); if (err) { - xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", - ring_ref, evtchn); + xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn); return err; } return 0; + +fail: + list_for_each_entry_safe(req, n, &be->blkif->pending_free, free_list) { + list_del(&req->free_list); + for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { + if (!req->segments[j]) + break; + kfree(req->segments[j]); + } + for (j = 0; j < MAX_INDIRECT_PAGES; j++) { + if (!req->indirect_pages[j]) + break; + kfree(req->indirect_pages[j]); + } + kfree(req); + } + return -ENOMEM; } static const struct xenbus_device_id xen_blkbk_ids[] = { diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 2c61cf8c6f61..6d89ed35d80c 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -98,7 +98,21 @@ static unsigned int xen_blkif_max_segments = 32; module_param_named(max, xen_blkif_max_segments, int, S_IRUGO); MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)"); -#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) +/* + * Maximum order of pages to be used for the shared ring between front and + * backend, 4KB page granularity is used. + */ +static unsigned int xen_blkif_max_ring_order; +module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO); +MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); + +#define BLK_RING_SIZE(info) __CONST_RING_SIZE(blkif, PAGE_SIZE * (info)->nr_ring_pages) +#define BLK_MAX_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE * XENBUS_MAX_RING_PAGES) +/* + * ring-ref%i i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19 + * characters are enough. Define to 20 to keep consist with backend. + */ +#define RINGREF_NAME_LEN (20) /* * We have one of these per vbd, whether ide, scsi or 'other'. They @@ -114,13 +128,14 @@ struct blkfront_info int vdevice; blkif_vdev_t handle; enum blkif_state connected; - int ring_ref; + int ring_ref[XENBUS_MAX_RING_PAGES]; + unsigned int nr_ring_pages; struct blkif_front_ring ring; unsigned int evtchn, irq; struct request_queue *rq; struct work_struct work; struct gnttab_free_callback callback; - struct blk_shadow shadow[BLK_RING_SIZE]; + struct blk_shadow shadow[BLK_MAX_RING_SIZE]; struct list_head grants; struct list_head indirect_pages; unsigned int persistent_gnts_c; @@ -139,8 +154,6 @@ static unsigned int nr_minors; static unsigned long *minors; static DEFINE_SPINLOCK(minor_lock); -#define MAXIMUM_OUTSTANDING_BLOCK_REQS \ - (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) #define GRANT_INVALID_REF 0 #define PARTS_PER_DISK 16 @@ -170,7 +183,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info); static int get_id_from_freelist(struct blkfront_info *info) { unsigned long free = info->shadow_free; - BUG_ON(free >= BLK_RING_SIZE); + BUG_ON(free >= BLK_RING_SIZE(info)); info->shadow_free = info->shadow[free].req.u.rw.id; info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */ return free; @@ -983,7 +996,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) } } - for (i = 0; i < BLK_RING_SIZE; i++) { + for (i = 0; i < BLK_RING_SIZE(info); i++) { /* * Clear persistent grants present in requests already * on the shared ring @@ -1033,12 +1046,15 @@ free_shadow: flush_work(&info->work); /* Free resources associated with old device channel. */ - if (info->ring_ref != GRANT_INVALID_REF) { - gnttab_end_foreign_access(info->ring_ref, 0, - (unsigned long)info->ring.sring); - info->ring_ref = GRANT_INVALID_REF; - info->ring.sring = NULL; + for (i = 0; i < info->nr_ring_pages; i++) { + if (info->ring_ref[i] != GRANT_INVALID_REF) { + gnttab_end_foreign_access(info->ring_ref[i], 0, 0); + info->ring_ref[i] = GRANT_INVALID_REF; + } } + free_pages((unsigned long)info->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE)); + info->ring.sring = NULL; + if (info->irq) unbind_from_irqhandler(info->irq, info); info->evtchn = info->irq = 0; @@ -1058,12 +1074,6 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments; if (bret->operation == BLKIF_OP_READ && info->feature_persistent) { - /* - * Copy the data received from the backend into the bvec. - * Since bv_offset can be different than 0, and bv_len different - * than PAGE_SIZE, we have to keep track of the current offset, - * to be sure we are copying the data from the right shared page. - */ for_each_sg(s->sg, sg, nseg, i) { BUG_ON(sg->offset + sg->length > PAGE_SIZE); shared_data = kmap_atomic( @@ -1157,7 +1167,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) * never have given to it (we stamp it up to BLK_RING_SIZE - * look in get_id_from_freelist. */ - if (id >= BLK_RING_SIZE) { + if (id >= BLK_RING_SIZE(info)) { WARN(1, "%s: response to %s has incorrect id (%ld)\n", info->gd->disk_name, op_name(bret->operation), id); /* We can't safely get the 'struct request' as @@ -1245,26 +1255,30 @@ static int setup_blkring(struct xenbus_device *dev, struct blkfront_info *info) { struct blkif_sring *sring; - grant_ref_t gref; - int err; + int err, i; + unsigned long ring_size = info->nr_ring_pages * PAGE_SIZE; + grant_ref_t gref[XENBUS_MAX_RING_PAGES]; - info->ring_ref = GRANT_INVALID_REF; + for (i = 0; i < info->nr_ring_pages; i++) + info->ring_ref[i] = GRANT_INVALID_REF; - sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH); + sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH, + get_order(ring_size)); if (!sring) { xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); return -ENOMEM; } SHARED_RING_INIT(sring); - FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); + FRONT_RING_INIT(&info->ring, sring, ring_size); - err = xenbus_grant_ring(dev, info->ring.sring, 1, &gref); + err = xenbus_grant_ring(dev, info->ring.sring, info->nr_ring_pages, gref); if (err < 0) { - free_page((unsigned long)sring); + free_pages((unsigned long)sring, get_order(ring_size)); info->ring.sring = NULL; goto fail; } - info->ring_ref = gref; + for (i = 0; i < info->nr_ring_pages; i++) + info->ring_ref[i] = gref[i]; err = xenbus_alloc_evtchn(dev, &info->evtchn); if (err) @@ -1292,7 +1306,18 @@ static int talk_to_blkback(struct xenbus_device *dev, { const char *message = NULL; struct xenbus_transaction xbt; - int err; + int err, i; + unsigned int max_page_order = 0; + unsigned int ring_page_order = 0; + + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "max-ring-page-order", "%u", &max_page_order); + if (err != 1) + info->nr_ring_pages = 1; + else { + ring_page_order = min(xen_blkif_max_ring_order, max_page_order); + info->nr_ring_pages = 1 << ring_page_order; + } /* Create shared ring, alloc event channel. */ err = setup_blkring(dev, info); @@ -1306,11 +1331,32 @@ again: goto destroy_blkring; } - err = xenbus_printf(xbt, dev->nodename, - "ring-ref", "%u", info->ring_ref); - if (err) { - message = "writing ring-ref"; - goto abort_transaction; + if (info->nr_ring_pages == 1) { + err = xenbus_printf(xbt, dev->nodename, + "ring-ref", "%u", info->ring_ref[0]); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + } else { + err = xenbus_printf(xbt, dev->nodename, + "ring-page-order", "%u", ring_page_order); + if (err) { + message = "writing ring-page-order"; + goto abort_transaction; + } + + for (i = 0; i < info->nr_ring_pages; i++) { + char ring_ref_name[RINGREF_NAME_LEN]; + + snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i); + err = xenbus_printf(xbt, dev->nodename, ring_ref_name, + "%u", info->ring_ref[i]); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + } } err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", info->evtchn); @@ -1338,6 +1384,9 @@ again: goto destroy_blkring; } + for (i = 0; i < BLK_RING_SIZE(info); i++) + info->shadow[i].req.u.rw.id = i+1; + info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff; xenbus_switch_state(dev, XenbusStateInitialised); return 0; @@ -1361,7 +1410,7 @@ again: static int blkfront_probe(struct xenbus_device *dev, const struct xenbus_device_id *id) { - int err, vdevice, i; + int err, vdevice; struct blkfront_info *info; /* FIXME: Use dynamic device id if this is not set. */ @@ -1422,21 +1471,10 @@ static int blkfront_probe(struct xenbus_device *dev, info->connected = BLKIF_STATE_DISCONNECTED; INIT_WORK(&info->work, blkif_restart_queue); - for (i = 0; i < BLK_RING_SIZE; i++) - info->shadow[i].req.u.rw.id = i+1; - info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; - /* Front end dir is a number, which is used as the id. */ info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); dev_set_drvdata(&dev->dev, info); - err = talk_to_blkback(dev, info); - if (err) { - kfree(info); - dev_set_drvdata(&dev->dev, NULL); - return err; - } - return 0; } @@ -1476,10 +1514,10 @@ static int blkif_recover(struct blkfront_info *info) /* Stage 2: Set up free list. */ memset(&info->shadow, 0, sizeof(info->shadow)); - for (i = 0; i < BLK_RING_SIZE; i++) + for (i = 0; i < BLK_RING_SIZE(info); i++) info->shadow[i].req.u.rw.id = i+1; info->shadow_free = info->ring.req_prod_pvt; - info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; + info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff; rc = blkfront_setup_indirect(info); if (rc) { @@ -1491,7 +1529,7 @@ static int blkif_recover(struct blkfront_info *info) blk_queue_max_segments(info->rq, segs); bio_list_init(&bio_list); INIT_LIST_HEAD(&requests); - for (i = 0; i < BLK_RING_SIZE; i++) { + for (i = 0; i < BLK_RING_SIZE(info); i++) { /* Not in use? */ if (!copy[i].request) continue; @@ -1697,7 +1735,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info) segs = info->max_indirect_segments; } - err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE); + err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE(info)); if (err) goto out_of_memory; @@ -1707,7 +1745,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info) * grants, we need to allocate a set of pages that can be * used for mapping indirect grefs */ - int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE; + int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE(info); BUG_ON(!list_empty(&info->indirect_pages)); for (i = 0; i < num; i++) { @@ -1718,7 +1756,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info) } } - for (i = 0; i < BLK_RING_SIZE; i++) { + for (i = 0; i < BLK_RING_SIZE(info); i++) { info->shadow[i].grants_used = kzalloc( sizeof(info->shadow[i].grants_used[0]) * segs, GFP_NOIO); @@ -1740,7 +1778,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info) return 0; out_of_memory: - for (i = 0; i < BLK_RING_SIZE; i++) { + for (i = 0; i < BLK_RING_SIZE(info); i++) { kfree(info->shadow[i].grants_used); info->shadow[i].grants_used = NULL; kfree(info->shadow[i].sg); @@ -1906,8 +1944,15 @@ static void blkback_changed(struct xenbus_device *dev, dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state); switch (backend_state) { - case XenbusStateInitialising: case XenbusStateInitWait: + if (dev->state != XenbusStateInitialising) + break; + if (talk_to_blkback(dev, info)) { + kfree(info); + dev_set_drvdata(&dev->dev, NULL); + break; + } + case XenbusStateInitialising: case XenbusStateInitialised: case XenbusStateReconfiguring: case XenbusStateReconfigured: @@ -2091,6 +2136,12 @@ static int __init xlblk_init(void) if (!xen_domain()) return -ENODEV; + if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) { + pr_info("Invalid max_ring_order (%d), will use default max: %d.\n", + xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER); + xen_blkif_max_ring_order = 0; + } + if (!xen_has_pv_disk_devices()) return -ENODEV; diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index 37b8be7cba95..0ac3bd1a5497 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -208,7 +208,7 @@ static int set_param_timeout(const char *val, const struct kernel_param *kp) return rv; } -static struct kernel_param_ops param_ops_timeout = { +static const struct kernel_param_ops param_ops_timeout = { .set = set_param_timeout, .get = param_get_int, }; @@ -270,14 +270,14 @@ static int set_param_wdog_ifnum(const char *val, const struct kernel_param *kp) return 0; } -static struct kernel_param_ops param_ops_wdog_ifnum = { +static const struct kernel_param_ops param_ops_wdog_ifnum = { .set = set_param_wdog_ifnum, .get = param_get_int, }; #define param_check_wdog_ifnum param_check_int -static struct kernel_param_ops param_ops_str = { +static const struct kernel_param_ops param_ops_str = { .set = set_param_str, .get = get_param_str, }; diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index 935b05936dbd..9064ff743598 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -462,15 +462,12 @@ static int exynos4_local_timer_setup(struct clock_event_device *evt) exynos4_mct_write(TICK_BASE_CNT, mevt->base + MCT_L_TCNTB_OFFSET); if (mct_int_type == MCT_INT_SPI) { - evt->irq = mct_irqs[MCT_L0_IRQ + cpu]; - if (request_irq(evt->irq, exynos4_mct_tick_isr, - IRQF_TIMER | IRQF_NOBALANCING, - evt->name, mevt)) { - pr_err("exynos-mct: cannot register IRQ %d\n", - evt->irq); + + if (evt->irq == -1) return -EIO; - } - irq_force_affinity(mct_irqs[MCT_L0_IRQ + cpu], cpumask_of(cpu)); + + irq_force_affinity(evt->irq, cpumask_of(cpu)); + enable_irq(evt->irq); } else { enable_percpu_irq(mct_irqs[MCT_L0_IRQ], 0); } @@ -483,10 +480,12 @@ static int exynos4_local_timer_setup(struct clock_event_device *evt) static void exynos4_local_timer_stop(struct clock_event_device *evt) { evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt); - if (mct_int_type == MCT_INT_SPI) - free_irq(evt->irq, this_cpu_ptr(&percpu_mct_tick)); - else + if (mct_int_type == MCT_INT_SPI) { + if (evt->irq != -1) + disable_irq_nosync(evt->irq); + } else { disable_percpu_irq(mct_irqs[MCT_L0_IRQ]); + } } static int exynos4_mct_cpu_notify(struct notifier_block *self, @@ -518,7 +517,7 @@ static struct notifier_block exynos4_mct_cpu_nb = { static void __init exynos4_timer_resources(struct device_node *np, void __iomem *base) { - int err; + int err, cpu; struct mct_clock_event_device *mevt = this_cpu_ptr(&percpu_mct_tick); struct clk *mct_clk, *tick_clk; @@ -545,7 +544,25 @@ static void __init exynos4_timer_resources(struct device_node *np, void __iomem WARN(err, "MCT: can't request IRQ %d (%d)\n", mct_irqs[MCT_L0_IRQ], err); } else { - irq_set_affinity(mct_irqs[MCT_L0_IRQ], cpumask_of(0)); + for_each_possible_cpu(cpu) { + int mct_irq = mct_irqs[MCT_L0_IRQ + cpu]; + struct mct_clock_event_device *pcpu_mevt = + per_cpu_ptr(&percpu_mct_tick, cpu); + + pcpu_mevt->evt.irq = -1; + + irq_set_status_flags(mct_irq, IRQ_NOAUTOEN); + if (request_irq(mct_irq, + exynos4_mct_tick_isr, + IRQF_TIMER | IRQF_NOBALANCING, + pcpu_mevt->name, pcpu_mevt)) { + pr_err("exynos-mct: cannot register IRQ (cpu%d)\n", + cpu); + + continue; + } + pcpu_mevt->evt.irq = mct_irq; + } } err = register_cpu_notifier(&exynos4_mct_cpu_nb); diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 1e3ef5ec4784..845bafcfa792 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -67,6 +67,8 @@ static int nap_loop(struct cpuidle_device *dev, return index; } +/* Register for fastsleep only in oneshot mode of broadcast */ +#ifdef CONFIG_TICK_ONESHOT static int fastsleep_loop(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) @@ -90,7 +92,7 @@ static int fastsleep_loop(struct cpuidle_device *dev, return index; } - +#endif /* * States for dedicated partition case. */ @@ -216,7 +218,14 @@ static int powernv_add_idle_states(void) powernv_states[nr_idle_states].flags = 0; powernv_states[nr_idle_states].target_residency = 100; powernv_states[nr_idle_states].enter = &nap_loop; - } else if (flags[i] & OPAL_PM_SLEEP_ENABLED || + } + + /* + * All cpuidle states with CPUIDLE_FLAG_TIMER_STOP set must come + * within this config dependency check. + */ +#ifdef CONFIG_TICK_ONESHOT + if (flags[i] & OPAL_PM_SLEEP_ENABLED || flags[i] & OPAL_PM_SLEEP_ENABLED_ER1) { /* Add FASTSLEEP state */ strcpy(powernv_states[nr_idle_states].name, "FastSleep"); @@ -225,7 +234,7 @@ static int powernv_add_idle_states(void) powernv_states[nr_idle_states].target_residency = 300000; powernv_states[nr_idle_states].enter = &fastsleep_loop; } - +#endif powernv_states[nr_idle_states].exit_latency = ((unsigned int)latency_ns[i]) / 1000; diff --git a/drivers/crypto/qat/qat_common/adf_accel_engine.c b/drivers/crypto/qat/qat_common/adf_accel_engine.c index 7f8b66c915ed..fdda8e7ae302 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_engine.c +++ b/drivers/crypto/qat/qat_common/adf_accel_engine.c @@ -88,10 +88,7 @@ void adf_ae_fw_release(struct adf_accel_dev *accel_dev) qat_uclo_del_uof_obj(loader_data->fw_loader); qat_hal_deinit(loader_data->fw_loader); - - if (loader_data->uof_fw) - release_firmware(loader_data->uof_fw); - + release_firmware(loader_data->uof_fw); loader_data->uof_fw = NULL; loader_data->fw_loader = NULL; } diff --git a/drivers/crypto/qat/qat_common/adf_transport.c b/drivers/crypto/qat/qat_common/adf_transport.c index ccec327489da..db2926bff8a5 100644 --- a/drivers/crypto/qat/qat_common/adf_transport.c +++ b/drivers/crypto/qat/qat_common/adf_transport.c @@ -449,7 +449,7 @@ static int adf_init_bank(struct adf_accel_dev *accel_dev, err: for (i = 0; i < ADF_ETR_MAX_RINGS_PER_BANK; i++) { ring = &bank->rings[i]; - if (hw_data->tx_rings_mask & (1 << i) && ring->inflights) + if (hw_data->tx_rings_mask & (1 << i)) kfree(ring->inflights); } return -ENOMEM; diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index 220ee49633e4..b8576fd6bd0e 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -120,7 +120,7 @@ static struct dmatest_info { static int dmatest_run_set(const char *val, const struct kernel_param *kp); static int dmatest_run_get(char *val, const struct kernel_param *kp); -static struct kernel_param_ops run_ops = { +static const struct kernel_param_ops run_ops = { .set = dmatest_run_set, .get = dmatest_run_get, }; @@ -195,7 +195,7 @@ static int dmatest_wait_get(char *val, const struct kernel_param *kp) return param_get_bool(val, kp); } -static struct kernel_param_ops wait_ops = { +static const struct kernel_param_ops wait_ops = { .get = dmatest_wait_get, .set = param_set_bool, }; diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c index 8333f878919c..40343fa92c7b 100644 --- a/drivers/gpio/gpio-bcm-kona.c +++ b/drivers/gpio/gpio-bcm-kona.c @@ -657,8 +657,9 @@ static int bcm_kona_gpio_probe(struct platform_device *pdev) } for (i = 0; i < kona_gpio->num_bank; i++) { bank = &kona_gpio->banks[i]; - irq_set_chained_handler(bank->irq, bcm_kona_gpio_irq_handler); - irq_set_handler_data(bank->irq, bank); + irq_set_chained_handler_and_data(bank->irq, + bcm_kona_gpio_irq_handler, + bank); } spin_lock_init(&kona_gpio->lock); diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index 58faf04fce5d..55fa9853a7f2 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -348,8 +348,8 @@ static void dwapb_configure_irqs(struct dwapb_gpio *gpio, irq_gc->chip_types[1].handler = handle_edge_irq; if (!pp->irq_shared) { - irq_set_chained_handler(pp->irq, dwapb_irq_handler); - irq_set_handler_data(pp->irq, gpio); + irq_set_chained_handler_and_data(pp->irq, dwapb_irq_handler, + gpio); } else { /* * Request a shared IRQ since where MFD would have devices diff --git a/drivers/gpio/gpio-msic.c b/drivers/gpio/gpio-msic.c index 01acf0a8cdb1..7bcfb87a5fa6 100644 --- a/drivers/gpio/gpio-msic.c +++ b/drivers/gpio/gpio-msic.c @@ -309,8 +309,7 @@ static int platform_msic_gpio_probe(struct platform_device *pdev) &msic_irqchip, handle_simple_irq); } - irq_set_chained_handler(mg->irq, msic_gpio_irq_handler); - irq_set_handler_data(mg->irq, mg); + irq_set_chained_handler_and_data(mg->irq, msic_gpio_irq_handler, mg); return 0; err: diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index be42ab368a80..bf4bd1d120c3 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2052,14 +2052,14 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, if (is_of_node(fwnode)) { enum of_gpio_flags flags; - desc = of_get_named_gpiod_flags(of_node(fwnode), propname, 0, + desc = of_get_named_gpiod_flags(to_of_node(fwnode), propname, 0, &flags); if (!IS_ERR(desc)) active_low = flags & OF_GPIO_ACTIVE_LOW; } else if (is_acpi_node(fwnode)) { struct acpi_gpio_info info; - desc = acpi_get_gpiod_by_index(acpi_node(fwnode), propname, 0, + desc = acpi_get_gpiod_by_index(to_acpi_node(fwnode), propname, 0, &info); if (!IS_ERR(desc)) active_low = info.active_low; diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index e29b02ca9e91..f086ef387475 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -199,7 +199,7 @@ static int ide_set_dev_param_mask(const char *s, const struct kernel_param *kp) return 0; } -static struct kernel_param_ops param_ops_ide_dev_mask = { +static const struct kernel_param_ops param_ops_ide_dev_mask = { .set = ide_set_dev_param_mask }; diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index eada8f758ad4..267dc4f75502 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -99,7 +99,7 @@ module_param(register_always, bool, 0444); MODULE_PARM_DESC(register_always, "Use memory registration even for contiguous memory regions"); -static struct kernel_param_ops srp_tmo_ops; +static const struct kernel_param_ops srp_tmo_ops; static int srp_reconnect_delay = 10; module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay, @@ -184,7 +184,7 @@ out: return res; } -static struct kernel_param_ops srp_tmo_ops = { +static const struct kernel_param_ops srp_tmo_ops = { .get = srp_tmo_get, .set = srp_tmo_set, }; diff --git a/drivers/input/misc/ati_remote2.c b/drivers/input/misc/ati_remote2.c index f63341f20b91..cfd58e87da26 100644 --- a/drivers/input/misc/ati_remote2.c +++ b/drivers/input/misc/ati_remote2.c @@ -94,7 +94,7 @@ static int ati_remote2_get_mode_mask(char *buffer, static unsigned int channel_mask = ATI_REMOTE2_MAX_CHANNEL_MASK; #define param_check_channel_mask(name, p) __param_check(name, p, unsigned int) -static struct kernel_param_ops param_ops_channel_mask = { +static const struct kernel_param_ops param_ops_channel_mask = { .set = ati_remote2_set_channel_mask, .get = ati_remote2_get_channel_mask, }; @@ -103,7 +103,7 @@ MODULE_PARM_DESC(channel_mask, "Bitmask of channels to accept <15:Channel16>...< static unsigned int mode_mask = ATI_REMOTE2_MAX_MODE_MASK; #define param_check_mode_mask(name, p) __param_check(name, p, unsigned int) -static struct kernel_param_ops param_ops_mode_mask = { +static const struct kernel_param_ops param_ops_mode_mask = { .set = ati_remote2_set_mode_mask, .get = ati_remote2_get_mode_mask, }; diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c index 7c4ba43d253e..ec3477036150 100644 --- a/drivers/input/mouse/psmouse-base.c +++ b/drivers/input/mouse/psmouse-base.c @@ -47,7 +47,7 @@ MODULE_LICENSE("GPL"); static unsigned int psmouse_max_proto = PSMOUSE_AUTO; static int psmouse_set_maxproto(const char *val, const struct kernel_param *); static int psmouse_get_maxproto(char *buffer, const struct kernel_param *kp); -static struct kernel_param_ops param_ops_proto_abbrev = { +static const struct kernel_param_ops param_ops_proto_abbrev = { .set = psmouse_set_maxproto, .get = psmouse_get_maxproto, }; diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index d3e5e9abe3b6..a57e9b749895 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -117,6 +117,7 @@ struct kmem_cache *amd_iommu_irq_cache; static void update_domain(struct protection_domain *domain); static int alloc_passthrough_domain(void); +static int protection_domain_init(struct protection_domain *domain); /**************************************************************************** * @@ -1881,12 +1882,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) if (!dma_dom) return NULL; - spin_lock_init(&dma_dom->domain.lock); - - dma_dom->domain.id = domain_id_alloc(); - if (dma_dom->domain.id == 0) + if (protection_domain_init(&dma_dom->domain)) goto free_dma_dom; - INIT_LIST_HEAD(&dma_dom->domain.dev_list); + dma_dom->domain.mode = PAGE_MODE_2_LEVEL; dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); dma_dom->domain.flags = PD_DMA_OPS_MASK; @@ -2916,6 +2914,18 @@ static void protection_domain_free(struct protection_domain *domain) kfree(domain); } +static int protection_domain_init(struct protection_domain *domain) +{ + spin_lock_init(&domain->lock); + mutex_init(&domain->api_lock); + domain->id = domain_id_alloc(); + if (!domain->id) + return -ENOMEM; + INIT_LIST_HEAD(&domain->dev_list); + + return 0; +} + static struct protection_domain *protection_domain_alloc(void) { struct protection_domain *domain; @@ -2924,12 +2934,8 @@ static struct protection_domain *protection_domain_alloc(void) if (!domain) return NULL; - spin_lock_init(&domain->lock); - mutex_init(&domain->api_lock); - domain->id = domain_id_alloc(); - if (!domain->id) + if (protection_domain_init(domain)) goto out_err; - INIT_LIST_HEAD(&domain->dev_list); add_domain_to_list(domain); diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index f14130121298..8e9ec81ce4bb 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1389,8 +1389,7 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; - if (smmu_domain->pgtbl_ops) - free_io_pgtable_ops(smmu_domain->pgtbl_ops); + free_io_pgtable_ops(smmu_domain->pgtbl_ops); /* Free the CD and ASID, if we allocated them */ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index dce041b1c139..4cd0c29cb585 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1566,7 +1566,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) return -ENODEV; } - if ((id & ID0_S1TS) && ((smmu->version == 1) || (id & ID0_ATOSNS))) { + if ((id & ID0_S1TS) && ((smmu->version == 1) || !(id & ID0_ATOSNS))) { smmu->features |= ARM_SMMU_FEAT_TRANS_OPS; dev_notice(smmu->dev, "\taddress translation ops\n"); } diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 49e7542510d1..f286090931cc 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -847,13 +847,24 @@ static int add_iommu_group(struct device *dev, void *data) { struct iommu_callback_data *cb = data; const struct iommu_ops *ops = cb->ops; + int ret; if (!ops->add_device) return 0; WARN_ON(dev->iommu_group); - return ops->add_device(dev); + ret = ops->add_device(dev); + + /* + * We ignore -ENODEV errors for now, as they just mean that the + * device is not translated by an IOMMU. We still care about + * other errors and fail to initialize when they happen. + */ + if (ret == -ENODEV) + ret = 0; + + return ret; } static int remove_iommu_group(struct device *dev, void *data) diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c index 15eb3f86f670..d2d54d62afee 100644 --- a/drivers/leds/leds-gpio.c +++ b/drivers/leds/leds-gpio.c @@ -191,7 +191,7 @@ static struct gpio_leds_priv *gpio_leds_create(struct platform_device *pdev) goto err; } - np = of_node(child); + np = to_of_node(child); if (fwnode_property_present(child, "label")) { fwnode_property_read_string(child, "label", &led.name); diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c index 977bd3a3eed0..120df5c08741 100644 --- a/drivers/mfd/asic3.c +++ b/drivers/mfd/asic3.c @@ -417,9 +417,8 @@ static int __init asic3_irq_probe(struct platform_device *pdev) asic3_write_register(asic, ASIC3_OFFSET(INTR, INT_MASK), ASIC3_INTMASK_GINTMASK); - irq_set_chained_handler(asic->irq_nr, asic3_irq_demux); + irq_set_chained_handler_and_data(asic->irq_nr, asic3_irq_demux, asic); irq_set_irq_type(asic->irq_nr, IRQ_TYPE_EDGE_RISING); - irq_set_handler_data(asic->irq_nr, asic); return 0; } diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c index 4739689d23ad..fb8705fc3aca 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d.c +++ b/drivers/misc/lis3lv02d/lis3lv02d.c @@ -115,7 +115,7 @@ static int param_set_axis(const char *val, const struct kernel_param *kp) return ret; } -static struct kernel_param_ops param_ops_axis = { +static const struct kernel_param_ops param_ops_axis = { .set = param_set_axis, .get = param_get_int, }; diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index 1a92d30689e7..ebf46ad2d513 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -162,7 +162,7 @@ static int __init ubiblock_set_param(const char *val, return 0; } -static struct kernel_param_ops ubiblock_param_ops = { +static const struct kernel_param_ops ubiblock_param_ops = { .set = ubiblock_set_param, }; module_param_cb(block, &ubiblock_param_ops, NULL, 0); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c index dd03ad865caf..661cdaa7ea96 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c @@ -268,7 +268,7 @@ static int xgbe_alloc_pages(struct xgbe_prv_data *pdata, int ret; /* Try to obtain pages, decreasing order if necessary */ - gfp |= __GFP_COLD | __GFP_COMP; + gfp |= __GFP_COLD | __GFP_COMP | __GFP_NOWARN; while (order >= 0) { pages = alloc_pages(gfp, order); if (pages) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 95153b234c71..299eb4315fe6 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -948,7 +948,7 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) struct resource *res; void __iomem *base_addr; u32 offset; - int ret; + int ret = 0; pdev = pdata->pdev; dev = &pdev->dev; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 7a4aaa3c01b6..cd4ae76bbff2 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -530,7 +530,6 @@ enum bnx2x_tpa_mode_t { struct bnx2x_alloc_pool { struct page *page; - dma_addr_t dma; unsigned int offset; }; @@ -2418,10 +2417,13 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id, AEU_INPUTS_ATTN_BITS_IGU_PARITY_ERROR | \ AEU_INPUTS_ATTN_BITS_MISC_PARITY_ERROR) -#define HW_PRTY_ASSERT_SET_3 (AEU_INPUTS_ATTN_BITS_MCP_LATCHED_ROM_PARITY | \ - AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_RX_PARITY | \ - AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_TX_PARITY | \ - AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY) +#define HW_PRTY_ASSERT_SET_3_WITHOUT_SCPAD \ + (AEU_INPUTS_ATTN_BITS_MCP_LATCHED_ROM_PARITY | \ + AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_RX_PARITY | \ + AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_TX_PARITY) + +#define HW_PRTY_ASSERT_SET_3 (HW_PRTY_ASSERT_SET_3_WITHOUT_SCPAD | \ + AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY) #define HW_PRTY_ASSERT_SET_4 (AEU_INPUTS_ATTN_BITS_PGLUE_PARITY_ERROR | \ AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index e2a65334708d..a90d7364334f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -563,23 +563,20 @@ static int bnx2x_alloc_rx_sge(struct bnx2x *bp, struct bnx2x_fastpath *fp, return -ENOMEM; } - pool->dma = dma_map_page(&bp->pdev->dev, pool->page, 0, - PAGE_SIZE, DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(&bp->pdev->dev, - pool->dma))) { - __free_pages(pool->page, PAGES_PER_SGE_SHIFT); - pool->page = NULL; - BNX2X_ERR("Can't map sge\n"); - return -ENOMEM; - } pool->offset = 0; } + mapping = dma_map_page(&bp->pdev->dev, pool->page, + pool->offset, SGE_PAGE_SIZE, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) { + BNX2X_ERR("Can't map sge\n"); + return -ENOMEM; + } + get_page(pool->page); sw_buf->page = pool->page; sw_buf->offset = pool->offset; - mapping = pool->dma + sw_buf->offset; dma_unmap_addr_set(sw_buf, mapping, mapping); sge->addr_hi = cpu_to_le32(U64_HI(mapping)); @@ -648,9 +645,9 @@ static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp, return err; } - dma_unmap_single(&bp->pdev->dev, - dma_unmap_addr(&old_rx_pg, mapping), - SGE_PAGE_SIZE, DMA_FROM_DEVICE); + dma_unmap_page(&bp->pdev->dev, + dma_unmap_addr(&old_rx_pg, mapping), + SGE_PAGE_SIZE, DMA_FROM_DEVICE); /* Add one frag and update the appropriate fields in the skb */ if (fp->mode == TPA_MODE_LRO) skb_fill_page_desc(skb, j, old_rx_pg.page, @@ -3421,8 +3418,13 @@ static int bnx2x_pkt_req_lin(struct bnx2x *bp, struct sk_buff *skb, u32 wnd_sum = 0; /* Headers length */ - hlen = (int)(skb_transport_header(skb) - skb->data) + - tcp_hdrlen(skb); + if (xmit_type & XMIT_GSO_ENC) + hlen = (int)(skb_inner_transport_header(skb) - + skb->data) + + inner_tcp_hdrlen(skb); + else + hlen = (int)(skb_transport_header(skb) - + skb->data) + tcp_hdrlen(skb); /* Amount of data (w/o headers) on linear part of SKB*/ first_bd_sz = skb_headlen(skb) - hlen; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index 2b30081ec26d..03b7404d5b9b 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -807,8 +807,8 @@ static inline void bnx2x_free_rx_sge(struct bnx2x *bp, /* Since many fragments can share the same page, make sure to * only unmap and free the page once. */ - dma_unmap_single(&bp->pdev->dev, dma_unmap_addr(sw_buf, mapping), - SGE_PAGE_SIZE, DMA_FROM_DEVICE); + dma_unmap_page(&bp->pdev->dev, dma_unmap_addr(sw_buf, mapping), + SGE_PAGE_SIZE, DMA_FROM_DEVICE); put_page(page); @@ -974,14 +974,6 @@ static inline void bnx2x_free_rx_mem_pool(struct bnx2x *bp, if (!pool->page) return; - /* Page was not fully fragmented. Unmap unused space */ - if (pool->offset < PAGE_SIZE) { - dma_addr_t dma = pool->dma + pool->offset; - int size = PAGE_SIZE - pool->offset; - - dma_unmap_single(&bp->pdev->dev, dma, size, DMA_FROM_DEVICE); - } - put_page(pool->page); pool->page = NULL; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index 48ed005ba73f..76b9052a961c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -257,14 +257,15 @@ static int bnx2x_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { struct bnx2x *bp = netdev_priv(dev); int cfg_idx = bnx2x_get_link_cfg_idx(bp); + u32 media_type; /* Dual Media boards present all available port types */ cmd->supported = bp->port.supported[cfg_idx] | (bp->port.supported[cfg_idx ^ 1] & (SUPPORTED_TP | SUPPORTED_FIBRE)); cmd->advertising = bp->port.advertising[cfg_idx]; - if (bp->link_params.phy[bnx2x_get_cur_phy_idx(bp)].media_type == - ETH_PHY_SFP_1G_FIBER) { + media_type = bp->link_params.phy[bnx2x_get_cur_phy_idx(bp)].media_type; + if (media_type == ETH_PHY_SFP_1G_FIBER) { cmd->supported &= ~(SUPPORTED_10000baseT_Full); cmd->advertising &= ~(ADVERTISED_10000baseT_Full); } @@ -312,12 +313,26 @@ static int bnx2x_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) cmd->lp_advertising |= ADVERTISED_100baseT_Full; if (status & LINK_STATUS_LINK_PARTNER_1000THD_CAPABLE) cmd->lp_advertising |= ADVERTISED_1000baseT_Half; - if (status & LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE) - cmd->lp_advertising |= ADVERTISED_1000baseT_Full; + if (status & LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE) { + if (media_type == ETH_PHY_KR) { + cmd->lp_advertising |= + ADVERTISED_1000baseKX_Full; + } else { + cmd->lp_advertising |= + ADVERTISED_1000baseT_Full; + } + } if (status & LINK_STATUS_LINK_PARTNER_2500XFD_CAPABLE) cmd->lp_advertising |= ADVERTISED_2500baseX_Full; - if (status & LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE) - cmd->lp_advertising |= ADVERTISED_10000baseT_Full; + if (status & LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE) { + if (media_type == ETH_PHY_KR) { + cmd->lp_advertising |= + ADVERTISED_10000baseKR_Full; + } else { + cmd->lp_advertising |= + ADVERTISED_10000baseT_Full; + } + } if (status & LINK_STATUS_LINK_PARTNER_20GXFD_CAPABLE) cmd->lp_advertising |= ADVERTISED_20000baseKR2_Full; } @@ -564,15 +579,20 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) return -EINVAL; } - if (!(bp->port.supported[cfg_idx] & - SUPPORTED_1000baseT_Full)) { + if (bp->port.supported[cfg_idx] & + SUPPORTED_1000baseT_Full) { + advertising = (ADVERTISED_1000baseT_Full | + ADVERTISED_TP); + + } else if (bp->port.supported[cfg_idx] & + SUPPORTED_1000baseKX_Full) { + advertising = ADVERTISED_1000baseKX_Full; + } else { DP(BNX2X_MSG_ETHTOOL, "1G full not supported\n"); return -EINVAL; } - advertising = (ADVERTISED_1000baseT_Full | - ADVERTISED_TP); break; case SPEED_2500: @@ -600,17 +620,22 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) return -EINVAL; } phy_idx = bnx2x_get_cur_phy_idx(bp); - if (!(bp->port.supported[cfg_idx] - & SUPPORTED_10000baseT_Full) || - (bp->link_params.phy[phy_idx].media_type == + if ((bp->port.supported[cfg_idx] & + SUPPORTED_10000baseT_Full) && + (bp->link_params.phy[phy_idx].media_type != ETH_PHY_SFP_1G_FIBER)) { + advertising = (ADVERTISED_10000baseT_Full | + ADVERTISED_FIBRE); + } else if (bp->port.supported[cfg_idx] & + SUPPORTED_10000baseKR_Full) { + advertising = (ADVERTISED_10000baseKR_Full | + ADVERTISED_FIBRE); + } else { DP(BNX2X_MSG_ETHTOOL, "10G full not supported\n"); return -EINVAL; } - advertising = (ADVERTISED_10000baseT_Full | - ADVERTISED_FIBRE); break; default: @@ -633,6 +658,7 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) bp->link_params.multi_phy_config = new_multi_phy_config; if (netif_running(dev)) { bnx2x_stats_handle(bp, STATS_EVENT_STOP); + bnx2x_force_link_reset(bp); bnx2x_link_set(bp); } @@ -1204,6 +1230,7 @@ static int bnx2x_acquire_nvram_lock(struct bnx2x *bp) if (!(val & (MCPR_NVM_SW_ARB_ARB_ARB1 << port))) { DP(BNX2X_MSG_ETHTOOL | BNX2X_MSG_NVM, "cannot get access to nvram interface\n"); + bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_NVRAM); return -EBUSY; } @@ -1944,6 +1971,7 @@ static int bnx2x_set_pauseparam(struct net_device *dev, if (netif_running(dev)) { bnx2x_stats_handle(bp, STATS_EVENT_STOP); + bnx2x_force_link_reset(bp); bnx2x_link_set(bp); } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c index 21a0d6afca4a..a0b03c27e0a3 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c @@ -3392,9 +3392,9 @@ static void bnx2x_calc_ieee_aneg_adv(struct bnx2x_phy *phy, case BNX2X_FLOW_CTRL_AUTO: switch (params->req_fc_auto_adv) { case BNX2X_FLOW_CTRL_BOTH: + case BNX2X_FLOW_CTRL_RX: *ieee_fc |= MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH; break; - case BNX2X_FLOW_CTRL_RX: case BNX2X_FLOW_CTRL_TX: *ieee_fc |= MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_ASYMMETRIC; @@ -3488,14 +3488,21 @@ static void bnx2x_ext_phy_set_pause(struct link_params *params, bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD, MDIO_AN_REG_ADV_PAUSE, val); } -static void bnx2x_pause_resolve(struct link_vars *vars, u32 pause_result) -{ /* LD LP */ +static void bnx2x_pause_resolve(struct bnx2x_phy *phy, + struct link_params *params, + struct link_vars *vars, + u32 pause_result) +{ + struct bnx2x *bp = params->bp; + /* LD LP */ switch (pause_result) { /* ASYM P ASYM P */ case 0xb: /* 1 0 1 1 */ + DP(NETIF_MSG_LINK, "Flow Control: TX only\n"); vars->flow_ctrl = BNX2X_FLOW_CTRL_TX; break; case 0xe: /* 1 1 1 0 */ + DP(NETIF_MSG_LINK, "Flow Control: RX only\n"); vars->flow_ctrl = BNX2X_FLOW_CTRL_RX; break; @@ -3503,10 +3510,22 @@ static void bnx2x_pause_resolve(struct link_vars *vars, u32 pause_result) case 0x7: /* 0 1 1 1 */ case 0xd: /* 1 1 0 1 */ case 0xf: /* 1 1 1 1 */ - vars->flow_ctrl = BNX2X_FLOW_CTRL_BOTH; + /* If the user selected to advertise RX ONLY, + * although we advertised both, need to enable + * RX only. + */ + if (params->req_fc_auto_adv == BNX2X_FLOW_CTRL_BOTH) { + DP(NETIF_MSG_LINK, "Flow Control: RX & TX\n"); + vars->flow_ctrl = BNX2X_FLOW_CTRL_BOTH; + } else { + DP(NETIF_MSG_LINK, "Flow Control: RX only\n"); + vars->flow_ctrl = BNX2X_FLOW_CTRL_RX; + } break; default: + DP(NETIF_MSG_LINK, "Flow Control: None\n"); + vars->flow_ctrl = BNX2X_FLOW_CTRL_NONE; break; } if (pause_result & (1<<0)) @@ -3567,7 +3586,7 @@ static void bnx2x_ext_phy_update_adv_fc(struct bnx2x_phy *phy, pause_result |= (lp_pause & MDIO_AN_REG_ADV_PAUSE_MASK) >> 10; DP(NETIF_MSG_LINK, "Ext PHY pause result 0x%x\n", pause_result); - bnx2x_pause_resolve(vars, pause_result); + bnx2x_pause_resolve(phy, params, vars, pause_result); } @@ -5396,7 +5415,7 @@ static void bnx2x_update_adv_fc(struct bnx2x_phy *phy, MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_MASK)>>7; DP(NETIF_MSG_LINK, "pause_result CL37 0x%x\n", pause_result); } - bnx2x_pause_resolve(vars, pause_result); + bnx2x_pause_resolve(phy, params, vars, pause_result); } @@ -7129,7 +7148,7 @@ static void bnx2x_8073_resolve_fc(struct bnx2x_phy *phy, pause_result |= (lp_pause & MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH) >> 7; - bnx2x_pause_resolve(vars, pause_result); + bnx2x_pause_resolve(phy, params, vars, pause_result); DP(NETIF_MSG_LINK, "Ext PHY CL37 pause result 0x%x\n", pause_result); } @@ -11474,7 +11493,9 @@ static const struct bnx2x_phy phy_warpcore = { SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | SUPPORTED_1000baseT_Full | + SUPPORTED_1000baseKX_Full | SUPPORTED_10000baseT_Full | + SUPPORTED_10000baseKR_Full | SUPPORTED_20000baseKR2_Full | SUPPORTED_20000baseMLD2_Full | SUPPORTED_FIBRE | @@ -11980,8 +12001,8 @@ static int bnx2x_populate_int_phy(struct bnx2x *bp, u32 shmem_base, u8 port, break; case PORT_HW_CFG_NET_SERDES_IF_KR: phy->media_type = ETH_PHY_KR; - phy->supported &= (SUPPORTED_1000baseT_Full | - SUPPORTED_10000baseT_Full | + phy->supported &= (SUPPORTED_1000baseKX_Full | + SUPPORTED_10000baseKR_Full | SUPPORTED_FIBRE | SUPPORTED_Autoneg | SUPPORTED_Pause | @@ -11999,8 +12020,8 @@ static int bnx2x_populate_int_phy(struct bnx2x *bp, u32 shmem_base, u8 port, phy->media_type = ETH_PHY_KR; phy->flags |= FLAGS_WC_DUAL_MODE; phy->supported &= (SUPPORTED_20000baseKR2_Full | - SUPPORTED_10000baseT_Full | - SUPPORTED_1000baseT_Full | + SUPPORTED_10000baseKR_Full | + SUPPORTED_1000baseKX_Full | SUPPORTED_Autoneg | SUPPORTED_FIBRE | SUPPORTED_Pause | diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 33501bcddc48..c27af12314ed 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -2287,13 +2287,11 @@ static int bnx2x_set_spio(struct bnx2x *bp, int spio, u32 mode) void bnx2x_calc_fc_adv(struct bnx2x *bp) { u8 cfg_idx = bnx2x_get_link_cfg_idx(bp); + + bp->port.advertising[cfg_idx] &= ~(ADVERTISED_Asym_Pause | + ADVERTISED_Pause); switch (bp->link_vars.ieee_fc & MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_MASK) { - case MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_NONE: - bp->port.advertising[cfg_idx] &= ~(ADVERTISED_Asym_Pause | - ADVERTISED_Pause); - break; - case MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH: bp->port.advertising[cfg_idx] |= (ADVERTISED_Asym_Pause | ADVERTISED_Pause); @@ -2304,8 +2302,6 @@ void bnx2x_calc_fc_adv(struct bnx2x *bp) break; default: - bp->port.advertising[cfg_idx] &= ~(ADVERTISED_Asym_Pause | - ADVERTISED_Pause); break; } } @@ -2351,12 +2347,16 @@ int bnx2x_initial_phy_init(struct bnx2x *bp, int load_mode) if (load_mode == LOAD_DIAG) { struct link_params *lp = &bp->link_params; lp->loopback_mode = LOOPBACK_XGXS; - /* do PHY loopback at 10G speed, if possible */ - if (lp->req_line_speed[cfx_idx] < SPEED_10000) { + /* Prefer doing PHY loopback at highest speed */ + if (lp->req_line_speed[cfx_idx] < SPEED_20000) { if (lp->speed_cap_mask[cfx_idx] & - PORT_HW_CFG_SPEED_CAPABILITY_D0_10G) + PORT_HW_CFG_SPEED_CAPABILITY_D0_20G) lp->req_line_speed[cfx_idx] = - SPEED_10000; + SPEED_20000; + else if (lp->speed_cap_mask[cfx_idx] & + PORT_HW_CFG_SPEED_CAPABILITY_D0_10G) + lp->req_line_speed[cfx_idx] = + SPEED_10000; else lp->req_line_speed[cfx_idx] = SPEED_1000; @@ -4867,9 +4867,7 @@ static bool bnx2x_check_blocks_with_parity3(struct bnx2x *bp, u32 sig, res = true; break; case AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY: - if (print) - _print_next_block((*par_num)++, - "MCP SCPAD"); + (*par_num)++; /* clear latched SCPAD PATIRY from MCP */ REG_WR(bp, MISC_REG_AEU_CLR_LATCH_SIGNAL, 1UL << 10); @@ -4931,6 +4929,7 @@ static bool bnx2x_parity_attn(struct bnx2x *bp, bool *global, bool print, (sig[3] & HW_PRTY_ASSERT_SET_3) || (sig[4] & HW_PRTY_ASSERT_SET_4)) { int par_num = 0; + DP(NETIF_MSG_HW, "Was parity error: HW block parity attention:\n" "[0]:0x%08x [1]:0x%08x [2]:0x%08x [3]:0x%08x [4]:0x%08x\n", sig[0] & HW_PRTY_ASSERT_SET_0, @@ -4938,9 +4937,18 @@ static bool bnx2x_parity_attn(struct bnx2x *bp, bool *global, bool print, sig[2] & HW_PRTY_ASSERT_SET_2, sig[3] & HW_PRTY_ASSERT_SET_3, sig[4] & HW_PRTY_ASSERT_SET_4); - if (print) - netdev_err(bp->dev, - "Parity errors detected in blocks: "); + if (print) { + if (((sig[0] & HW_PRTY_ASSERT_SET_0) || + (sig[1] & HW_PRTY_ASSERT_SET_1) || + (sig[2] & HW_PRTY_ASSERT_SET_2) || + (sig[4] & HW_PRTY_ASSERT_SET_4)) || + (sig[3] & HW_PRTY_ASSERT_SET_3_WITHOUT_SCPAD)) { + netdev_err(bp->dev, + "Parity errors detected in blocks: "); + } else { + print = false; + } + } res |= bnx2x_check_blocks_with_parity0(bp, sig[0] & HW_PRTY_ASSERT_SET_0, &par_num, print); res |= bnx2x_check_blocks_with_parity1(bp, @@ -8431,7 +8439,7 @@ int bnx2x_set_eth_mac(struct bnx2x *bp, bool set) BNX2X_ETH_MAC, &ramrod_flags); } else { /* vf */ return bnx2x_vfpf_config_mac(bp, bp->dev->dev_addr, - bp->fp->index, true); + bp->fp->index, set); } } @@ -9323,7 +9331,8 @@ unload_error: * function stop ramrod is sent, since as part of this ramrod FW access * PTP registers. */ - bnx2x_stop_ptp(bp); + if (bp->flags & PTP_SUPPORTED) + bnx2x_stop_ptp(bp); /* Disable HW interrupts, NAPI */ bnx2x_netif_stop(bp, 1); @@ -11147,6 +11156,12 @@ static void bnx2x_link_settings_requested(struct bnx2x *bp) bp->port.advertising[idx] |= (ADVERTISED_1000baseT_Full | ADVERTISED_TP); + } else if (bp->port.supported[idx] & + SUPPORTED_1000baseKX_Full) { + bp->link_params.req_line_speed[idx] = + SPEED_1000; + bp->port.advertising[idx] |= + ADVERTISED_1000baseKX_Full; } else { BNX2X_ERR("NVRAM config error. Invalid link_config 0x%x speed_cap_mask 0x%x\n", link_config, @@ -11179,6 +11194,13 @@ static void bnx2x_link_settings_requested(struct bnx2x *bp) bp->port.advertising[idx] |= (ADVERTISED_10000baseT_Full | ADVERTISED_FIBRE); + } else if (bp->port.supported[idx] & + SUPPORTED_10000baseKR_Full) { + bp->link_params.req_line_speed[idx] = + SPEED_10000; + bp->port.advertising[idx] |= + (ADVERTISED_10000baseKR_Full | + ADVERTISED_FIBRE); } else { BNX2X_ERR("NVRAM config error. Invalid link_config 0x%x speed_cap_mask 0x%x\n", link_config, diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c index 07cdf9bbffef..4ad415ac8cfe 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c @@ -424,7 +424,7 @@ static void __bnx2x_vlan_mac_h_exec_pending(struct bnx2x *bp, o->head_exe_request = false; o->saved_ramrod_flags = 0; rc = bnx2x_exe_queue_step(bp, &o->exe_queue, &ramrod_flags); - if (rc != 0) { + if ((rc != 0) && (rc != 1)) { BNX2X_ERR("execution of pending commands failed with rc %d\n", rc); #ifdef BNX2X_STOP_ON_ERROR diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 6f2887a5e0be..6159deab8c98 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -594,6 +594,7 @@ struct bcmgenet_priv { wait_queue_head_t wq; struct phy_device *phydev; struct device_node *phy_dn; + struct device_node *mdio_dn; struct mii_bus *mii_bus; u16 gphy_rev; struct clk *clk_eee; diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 6bef04e2f735..adf23d2ac488 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -408,6 +408,52 @@ static int bcmgenet_mii_probe(struct net_device *dev) return 0; } +/* Workaround for integrated BCM7xxx Gigabit PHYs which have a problem with + * their internal MDIO management controller making them fail to successfully + * be read from or written to for the first transaction. We insert a dummy + * BMSR read here to make sure that phy_get_device() and get_phy_id() can + * correctly read the PHY MII_PHYSID1/2 registers and successfully register a + * PHY device for this peripheral. + * + * Once the PHY driver is registered, we can workaround subsequent reads from + * there (e.g: during system-wide power management). + * + * bus->reset is invoked before mdiobus_scan during mdiobus_register and is + * therefore the right location to stick that workaround. Since we do not want + * to read from non-existing PHYs, we either use bus->phy_mask or do a manual + * Device Tree scan to limit the search area. + */ +static int bcmgenet_mii_bus_reset(struct mii_bus *bus) +{ + struct net_device *dev = bus->priv; + struct bcmgenet_priv *priv = netdev_priv(dev); + struct device_node *np = priv->mdio_dn; + struct device_node *child = NULL; + u32 read_mask = 0; + int addr = 0; + + if (!np) { + read_mask = 1 << priv->phy_addr; + } else { + for_each_available_child_of_node(np, child) { + addr = of_mdio_parse_addr(&dev->dev, child); + if (addr < 0) + continue; + + read_mask |= 1 << addr; + } + } + + for (addr = 0; addr < PHY_MAX_ADDR; addr++) { + if (read_mask & 1 << addr) { + dev_dbg(&dev->dev, "Workaround for PHY @ %d\n", addr); + mdiobus_read(bus, addr, MII_BMSR); + } + } + + return 0; +} + static int bcmgenet_mii_alloc(struct bcmgenet_priv *priv) { struct mii_bus *bus; @@ -427,6 +473,7 @@ static int bcmgenet_mii_alloc(struct bcmgenet_priv *priv) bus->parent = &priv->pdev->dev; bus->read = bcmgenet_mii_read; bus->write = bcmgenet_mii_write; + bus->reset = bcmgenet_mii_bus_reset; snprintf(bus->id, MII_BUS_ID_SIZE, "%s-%d", priv->pdev->name, priv->pdev->id); @@ -443,7 +490,6 @@ static int bcmgenet_mii_of_init(struct bcmgenet_priv *priv) { struct device_node *dn = priv->pdev->dev.of_node; struct device *kdev = &priv->pdev->dev; - struct device_node *mdio_dn; char *compat; int ret; @@ -451,14 +497,14 @@ static int bcmgenet_mii_of_init(struct bcmgenet_priv *priv) if (!compat) return -ENOMEM; - mdio_dn = of_find_compatible_node(dn, NULL, compat); + priv->mdio_dn = of_find_compatible_node(dn, NULL, compat); kfree(compat); - if (!mdio_dn) { + if (!priv->mdio_dn) { dev_err(kdev, "unable to find MDIO bus node\n"); return -ENODEV; } - ret = of_mdiobus_register(priv->mii_bus, mdio_dn); + ret = of_mdiobus_register(priv->mii_bus, priv->mdio_dn); if (ret) { dev_err(kdev, "failed to register MDIO bus\n"); return ret; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c index 160f8077692c..29f330831784 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c @@ -434,8 +434,9 @@ static int lio_set_phys_id(struct net_device *netdev, if (ret) return ret; - octnet_mdio45_access(lio, 1, LIO68XX_LED_BEACON_ADDR, - &lio->phy_beacon_val); + ret = octnet_mdio45_access(lio, 1, + LIO68XX_LED_BEACON_ADDR, + &lio->phy_beacon_val); if (ret) return ret; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index 0d3106b464b2..f67641a2ff9e 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -650,14 +650,12 @@ void octeon_free_device_mem(struct octeon_device *oct) for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES; i++) { /* could check mask as well */ - if (oct->droq[i]) - vfree(oct->droq[i]); + vfree(oct->droq[i]); } for (i = 0; i < MAX_OCTEON_INSTR_QUEUES; i++) { /* could check mask as well */ - if (oct->instr_queue[i]) - vfree(oct->instr_queue[i]); + vfree(oct->instr_queue[i]); } i = oct->octeon_id; @@ -1078,10 +1076,7 @@ octeon_unregister_dispatch_fn(struct octeon_device *oct, u16 opcode, oct->dispatch.count--; spin_unlock_bh(&oct->dispatch.lock); - - if (dfree) - vfree(dfree); - + vfree(dfree); return retval; } diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c index 94b502a0cf33..4dba86eaa045 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c @@ -216,9 +216,7 @@ int octeon_delete_droq(struct octeon_device *oct, u32 q_no) dev_dbg(&oct->pci_dev->dev, "%s[%d]\n", __func__, q_no); octeon_droq_destroy_ring_buffers(oct, droq); - - if (droq->recv_buf_list) - vfree(droq->recv_buf_list); + vfree(droq->recv_buf_list); if (droq->info_base_addr) cnnic_free_aligned_dma(oct->pci_dev, droq->info_list, diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c index 356796bf9b87..a2a24652c8f3 100644 --- a/drivers/net/ethernet/cavium/liquidio/request_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c @@ -175,8 +175,7 @@ int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no) desc_size = CFG_GET_IQ_INSTR_TYPE(CHIP_FIELD(oct, cn6xxx, conf)); - if (iq->request_list) - vfree(iq->request_list); + vfree(iq->request_list); if (iq->base_addr) { q_size = iq->max_count * desc_size; diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index eadae1b412c6..da2004e2a741 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1208,7 +1208,7 @@ static int enic_poll(struct napi_struct *napi, int budget) napi_complete(napi); vnic_intr_unmask(&enic->intr[intr]); } - enic_poll_unlock_napi(&enic->rq[cq_rq]); + enic_poll_unlock_napi(&enic->rq[cq_rq], napi); return rq_work_done; } @@ -1414,7 +1414,7 @@ static int enic_poll_msix_rq(struct napi_struct *napi, int budget) */ enic_calc_int_moderation(enic, &enic->rq[rq]); - enic_poll_unlock_napi(&enic->rq[rq]); + enic_poll_unlock_napi(&enic->rq[rq], napi); if (work_done < work_to_do) { /* Some work done, but not enough to stay in polling, diff --git a/drivers/net/ethernet/cisco/enic/vnic_rq.h b/drivers/net/ethernet/cisco/enic/vnic_rq.h index 8111d5202df2..b9c82f143d7e 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_rq.h +++ b/drivers/net/ethernet/cisco/enic/vnic_rq.h @@ -21,6 +21,7 @@ #define _VNIC_RQ_H_ #include <linux/pci.h> +#include <linux/netdevice.h> #include "vnic_dev.h" #include "vnic_cq.h" @@ -75,6 +76,12 @@ struct vnic_rq_buf { uint64_t wr_id; }; +enum enic_poll_state { + ENIC_POLL_STATE_IDLE, + ENIC_POLL_STATE_NAPI, + ENIC_POLL_STATE_POLL +}; + struct vnic_rq { unsigned int index; struct vnic_dev *vdev; @@ -86,19 +93,7 @@ struct vnic_rq { void *os_buf_head; unsigned int pkts_outstanding; #ifdef CONFIG_NET_RX_BUSY_POLL -#define ENIC_POLL_STATE_IDLE 0 -#define ENIC_POLL_STATE_NAPI (1 << 0) /* NAPI owns this poll */ -#define ENIC_POLL_STATE_POLL (1 << 1) /* poll owns this poll */ -#define ENIC_POLL_STATE_NAPI_YIELD (1 << 2) /* NAPI yielded this poll */ -#define ENIC_POLL_STATE_POLL_YIELD (1 << 3) /* poll yielded this poll */ -#define ENIC_POLL_YIELD (ENIC_POLL_STATE_NAPI_YIELD | \ - ENIC_POLL_STATE_POLL_YIELD) -#define ENIC_POLL_LOCKED (ENIC_POLL_STATE_NAPI | \ - ENIC_POLL_STATE_POLL) -#define ENIC_POLL_USER_PEND (ENIC_POLL_STATE_POLL | \ - ENIC_POLL_STATE_POLL_YIELD) - unsigned int bpoll_state; - spinlock_t bpoll_lock; + atomic_t bpoll_state; #endif /* CONFIG_NET_RX_BUSY_POLL */ }; @@ -215,76 +210,43 @@ static inline int vnic_rq_fill(struct vnic_rq *rq, #ifdef CONFIG_NET_RX_BUSY_POLL static inline void enic_busy_poll_init_lock(struct vnic_rq *rq) { - spin_lock_init(&rq->bpoll_lock); - rq->bpoll_state = ENIC_POLL_STATE_IDLE; + atomic_set(&rq->bpoll_state, ENIC_POLL_STATE_IDLE); } static inline bool enic_poll_lock_napi(struct vnic_rq *rq) { - bool rc = true; - - spin_lock(&rq->bpoll_lock); - if (rq->bpoll_state & ENIC_POLL_LOCKED) { - WARN_ON(rq->bpoll_state & ENIC_POLL_STATE_NAPI); - rq->bpoll_state |= ENIC_POLL_STATE_NAPI_YIELD; - rc = false; - } else { - rq->bpoll_state = ENIC_POLL_STATE_NAPI; - } - spin_unlock(&rq->bpoll_lock); + int rc = atomic_cmpxchg(&rq->bpoll_state, ENIC_POLL_STATE_IDLE, + ENIC_POLL_STATE_NAPI); - return rc; + return (rc == ENIC_POLL_STATE_IDLE); } -static inline bool enic_poll_unlock_napi(struct vnic_rq *rq) +static inline void enic_poll_unlock_napi(struct vnic_rq *rq, + struct napi_struct *napi) { - bool rc = false; - - spin_lock(&rq->bpoll_lock); - WARN_ON(rq->bpoll_state & - (ENIC_POLL_STATE_POLL | ENIC_POLL_STATE_NAPI_YIELD)); - if (rq->bpoll_state & ENIC_POLL_STATE_POLL_YIELD) - rc = true; - rq->bpoll_state = ENIC_POLL_STATE_IDLE; - spin_unlock(&rq->bpoll_lock); - - return rc; + WARN_ON(atomic_read(&rq->bpoll_state) != ENIC_POLL_STATE_NAPI); + napi_gro_flush(napi, false); + atomic_set(&rq->bpoll_state, ENIC_POLL_STATE_IDLE); } static inline bool enic_poll_lock_poll(struct vnic_rq *rq) { - bool rc = true; - - spin_lock_bh(&rq->bpoll_lock); - if (rq->bpoll_state & ENIC_POLL_LOCKED) { - rq->bpoll_state |= ENIC_POLL_STATE_POLL_YIELD; - rc = false; - } else { - rq->bpoll_state |= ENIC_POLL_STATE_POLL; - } - spin_unlock_bh(&rq->bpoll_lock); + int rc = atomic_cmpxchg(&rq->bpoll_state, ENIC_POLL_STATE_IDLE, + ENIC_POLL_STATE_POLL); - return rc; + return (rc == ENIC_POLL_STATE_IDLE); } -static inline bool enic_poll_unlock_poll(struct vnic_rq *rq) -{ - bool rc = false; - spin_lock_bh(&rq->bpoll_lock); - WARN_ON(rq->bpoll_state & ENIC_POLL_STATE_NAPI); - if (rq->bpoll_state & ENIC_POLL_STATE_POLL_YIELD) - rc = true; - rq->bpoll_state = ENIC_POLL_STATE_IDLE; - spin_unlock_bh(&rq->bpoll_lock); - - return rc; +static inline void enic_poll_unlock_poll(struct vnic_rq *rq) +{ + WARN_ON(atomic_read(&rq->bpoll_state) != ENIC_POLL_STATE_POLL); + atomic_set(&rq->bpoll_state, ENIC_POLL_STATE_IDLE); } static inline bool enic_poll_busy_polling(struct vnic_rq *rq) { - WARN_ON(!(rq->bpoll_state & ENIC_POLL_LOCKED)); - return rq->bpoll_state & ENIC_POLL_USER_PEND; + return atomic_read(&rq->bpoll_state) & ENIC_POLL_STATE_POLL; } #else @@ -298,7 +260,8 @@ static inline bool enic_poll_lock_napi(struct vnic_rq *rq) return true; } -static inline bool enic_poll_unlock_napi(struct vnic_rq *rq) +static inline bool enic_poll_unlock_napi(struct vnic_rq *rq, + struct napi_struct *napi) { return false; } diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig index b8de87b03046..ff76d4e9dc1b 100644 --- a/drivers/net/ethernet/freescale/Kconfig +++ b/drivers/net/ethernet/freescale/Kconfig @@ -83,12 +83,12 @@ config UGETH_TX_ON_DEMAND config GIANFAR tristate "Gianfar Ethernet" - depends on FSL_SOC select FSL_PQ_MDIO select PHYLIB select CRC32 ---help--- This driver supports the Gigabit TSEC on the MPC83xx, MPC85xx, - and MPC86xx family of chips, and the FEC on the 8540. + and MPC86xx family of chips, the eTSEC on LS1021A and the FEC + on the 8540. endif # NET_VENDOR_FREESCALE diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index a86af8a7485d..1eee73cccdf5 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -428,6 +428,8 @@ struct bufdesc_ex { #define FEC_QUIRK_BUG_CAPTURE (1 << 10) /* Controller has only one MDIO bus */ #define FEC_QUIRK_SINGLE_MDIO (1 << 11) +/* Controller supports RACC register */ +#define FEC_QUIRK_HAS_RACC (1 << 12) struct fec_enet_priv_tx_q { int index; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index e464aeaeed2c..1f89c59b4353 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -85,28 +85,30 @@ static struct platform_device_id fec_devtype[] = { .driver_data = 0, }, { .name = "imx25-fec", - .driver_data = FEC_QUIRK_USE_GASKET, + .driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_HAS_RACC, }, { .name = "imx27-fec", - .driver_data = 0, + .driver_data = FEC_QUIRK_HAS_RACC, }, { .name = "imx28-fec", .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME | - FEC_QUIRK_SINGLE_MDIO, + FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC, }, { .name = "imx6q-fec", .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT | FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM | - FEC_QUIRK_HAS_VLAN | FEC_QUIRK_ERR006358, + FEC_QUIRK_HAS_VLAN | FEC_QUIRK_ERR006358 | + FEC_QUIRK_HAS_RACC, }, { .name = "mvf600-fec", - .driver_data = FEC_QUIRK_ENET_MAC, + .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_RACC, }, { .name = "imx6sx-fec", .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT | FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM | FEC_QUIRK_HAS_VLAN | FEC_QUIRK_HAS_AVB | - FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE, + FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE | + FEC_QUIRK_HAS_RACC, }, { /* sentinel */ } @@ -970,13 +972,15 @@ fec_restart(struct net_device *ndev) writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED); #if !defined(CONFIG_M5272) - /* set RX checksum */ - val = readl(fep->hwp + FEC_RACC); - if (fep->csum_flags & FLAG_RX_CSUM_ENABLED) - val |= FEC_RACC_OPTIONS; - else - val &= ~FEC_RACC_OPTIONS; - writel(val, fep->hwp + FEC_RACC); + if (fep->quirks & FEC_QUIRK_HAS_RACC) { + /* set RX checksum */ + val = readl(fep->hwp + FEC_RACC); + if (fep->csum_flags & FLAG_RX_CSUM_ENABLED) + val |= FEC_RACC_OPTIONS; + else + val &= ~FEC_RACC_OPTIONS; + writel(val, fep->hwp + FEC_RACC); + } #endif /* diff --git a/drivers/net/ethernet/icplus/ipg.c b/drivers/net/ethernet/icplus/ipg.c index ff2903652f4b..c3b6af83f070 100644 --- a/drivers/net/ethernet/icplus/ipg.c +++ b/drivers/net/ethernet/icplus/ipg.c @@ -1028,7 +1028,7 @@ static struct net_device_stats *ipg_nic_get_stats(struct net_device *dev) /* detailed rx_errors */ sp->stats.rx_length_errors += ipg_r16(IPG_INRANGELENGTHERRORS) + - ipg_r16(IPG_FRAMETOOLONGERRRORS); + ipg_r16(IPG_FRAMETOOLONGERRORS); sp->stats.rx_crc_errors += ipg_r16(IPG_FRAMECHECKSEQERRORS); /* Unutilized IPG statistic registers. */ diff --git a/drivers/net/ethernet/icplus/ipg.h b/drivers/net/ethernet/icplus/ipg.h index a21e4f5702b5..de606281f97b 100644 --- a/drivers/net/ethernet/icplus/ipg.h +++ b/drivers/net/ethernet/icplus/ipg.h @@ -102,7 +102,7 @@ enum ipg_regs { #define IPG_MCSTFRAMESRCVDOK 0xB8 #define IPG_BCSTFRAMESRCVDOK 0xBE #define IPG_MACCONTROLFRAMESRCVD 0xC6 -#define IPG_FRAMETOOLONGERRRORS 0xC8 +#define IPG_FRAMETOOLONGERRORS 0xC8 #define IPG_INRANGELENGTHERRORS 0xCA #define IPG_FRAMECHECKSEQERRORS 0xCC #define IPG_FRAMESLOSTRXERRORS 0xCE diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index b074b9a667b3..91a5a0ae9cd7 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -237,17 +237,19 @@ static bool e1000_phy_is_accessible_pchlan(struct e1000_hw *hw) if (ret_val) return false; out: - if ((hw->mac.type == e1000_pch_lpt) || - (hw->mac.type == e1000_pch_spt)) { - /* Unforce SMBus mode in PHY */ - e1e_rphy_locked(hw, CV_SMB_CTRL, &phy_reg); - phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS; - e1e_wphy_locked(hw, CV_SMB_CTRL, phy_reg); + if ((hw->mac.type == e1000_pch_lpt) || (hw->mac.type == e1000_pch_spt)) { + /* Only unforce SMBus if ME is not active */ + if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) { + /* Unforce SMBus mode in PHY */ + e1e_rphy_locked(hw, CV_SMB_CTRL, &phy_reg); + phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS; + e1e_wphy_locked(hw, CV_SMB_CTRL, phy_reg); - /* Unforce SMBus mode in MAC */ - mac_reg = er32(CTRL_EXT); - mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS; - ew32(CTRL_EXT, mac_reg); + /* Unforce SMBus mode in MAC */ + mac_reg = er32(CTRL_EXT); + mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS; + ew32(CTRL_EXT, mac_reg); + } } return true; @@ -1087,6 +1089,7 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx) u32 mac_reg; s32 ret_val = 0; u16 phy_reg; + u16 oem_reg = 0; if ((hw->mac.type < e1000_pch_lpt) || (hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPT_I217_LM) || @@ -1128,33 +1131,37 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx) if (ret_val) goto out; + /* Force SMBus mode in PHY */ + ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg); + if (ret_val) + goto release; + phy_reg |= CV_SMB_CTRL_FORCE_SMBUS; + e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg); + + /* Force SMBus mode in MAC */ + mac_reg = er32(CTRL_EXT); + mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS; + ew32(CTRL_EXT, mac_reg); + /* Si workaround for ULP entry flow on i127/rev6 h/w. Enable * LPLU and disable Gig speed when entering ULP */ if ((hw->phy.type == e1000_phy_i217) && (hw->phy.revision == 6)) { ret_val = e1000_read_phy_reg_hv_locked(hw, HV_OEM_BITS, - &phy_reg); + &oem_reg); if (ret_val) goto release; + + phy_reg = oem_reg; phy_reg |= HV_OEM_BITS_LPLU | HV_OEM_BITS_GBE_DIS; + ret_val = e1000_write_phy_reg_hv_locked(hw, HV_OEM_BITS, phy_reg); + if (ret_val) goto release; } - /* Force SMBus mode in PHY */ - ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg); - if (ret_val) - goto release; - phy_reg |= CV_SMB_CTRL_FORCE_SMBUS; - e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg); - - /* Force SMBus mode in MAC */ - mac_reg = er32(CTRL_EXT); - mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS; - ew32(CTRL_EXT, mac_reg); - /* Set Inband ULP Exit, Reset to SMBus mode and * Disable SMBus Release on PERST# in PHY */ @@ -1166,10 +1173,15 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx) if (to_sx) { if (er32(WUFC) & E1000_WUFC_LNKC) phy_reg |= I218_ULP_CONFIG1_WOL_HOST; + else + phy_reg &= ~I218_ULP_CONFIG1_WOL_HOST; phy_reg |= I218_ULP_CONFIG1_STICKY_ULP; + phy_reg &= ~I218_ULP_CONFIG1_INBAND_EXIT; } else { phy_reg |= I218_ULP_CONFIG1_INBAND_EXIT; + phy_reg &= ~I218_ULP_CONFIG1_STICKY_ULP; + phy_reg &= ~I218_ULP_CONFIG1_WOL_HOST; } e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg); @@ -1181,6 +1193,15 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx) /* Commit ULP changes in PHY by starting auto ULP configuration */ phy_reg |= I218_ULP_CONFIG1_START; e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg); + + if ((hw->phy.type == e1000_phy_i217) && (hw->phy.revision == 6) && + to_sx && (er32(STATUS) & E1000_STATUS_LU)) { + ret_val = e1000_write_phy_reg_hv_locked(hw, HV_OEM_BITS, + oem_reg); + if (ret_val) + goto release; + } + release: hw->phy.ops.release(hw); out: @@ -1379,16 +1400,20 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) if (((hw->mac.type == e1000_pch2lan) || (hw->mac.type == e1000_pch_lpt) || (hw->mac.type == e1000_pch_spt)) && link) { - u32 reg; + u16 speed, duplex; - reg = er32(STATUS); + e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex); tipg_reg = er32(TIPG); tipg_reg &= ~E1000_TIPG_IPGT_MASK; - if (!(reg & (E1000_STATUS_FD | E1000_STATUS_SPEED_MASK))) { + if (duplex == HALF_DUPLEX && speed == SPEED_10) { tipg_reg |= 0xFF; /* Reduce Rx latency in analog PHY */ emi_val = 0; + } else if (hw->mac.type == e1000_pch_spt && + duplex == FULL_DUPLEX && speed != SPEED_1000) { + tipg_reg |= 0xC; + emi_val = 1; } else { /* Roll back the default values */ @@ -1412,14 +1437,59 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) if (ret_val) return ret_val; + + if (hw->mac.type == e1000_pch_spt) { + u16 data; + u16 ptr_gap; + + if (speed == SPEED_1000) { + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + + ret_val = e1e_rphy_locked(hw, + PHY_REG(776, 20), + &data); + if (ret_val) { + hw->phy.ops.release(hw); + return ret_val; + } + + ptr_gap = (data & (0x3FF << 2)) >> 2; + if (ptr_gap < 0x18) { + data &= ~(0x3FF << 2); + data |= (0x18 << 2); + ret_val = + e1e_wphy_locked(hw, + PHY_REG(776, 20), + data); + } + hw->phy.ops.release(hw); + if (ret_val) + return ret_val; + } + } + } + + /* I217 Packet Loss issue: + * ensure that FEXTNVM4 Beacon Duration is set correctly + * on power up. + * Set the Beacon Duration for I217 to 8 usec + */ + if ((hw->mac.type == e1000_pch_lpt) || (hw->mac.type == e1000_pch_spt)) { + u32 mac_reg; + + mac_reg = er32(FEXTNVM4); + mac_reg &= ~E1000_FEXTNVM4_BEACON_DURATION_MASK; + mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_8USEC; + ew32(FEXTNVM4, mac_reg); } /* Work-around I218 hang issue */ if ((hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPTLP_I218_LM) || (hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPTLP_I218_V) || (hw->adapter->pdev->device == E1000_DEV_ID_PCH_I218_LM3) || - (hw->adapter->pdev->device == E1000_DEV_ID_PCH_I218_V3) || - (hw->mac.type == e1000_pch_spt)) { + (hw->adapter->pdev->device == E1000_DEV_ID_PCH_I218_V3)) { ret_val = e1000_k1_workaround_lpt_lp(hw, link); if (ret_val) return ret_val; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index e62b9dcb91fe..89d788d8f263 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6354,13 +6354,14 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime) } /** - * e1000e_disable_aspm - Disable ASPM states + * __e1000e_disable_aspm - Disable ASPM states * @pdev: pointer to PCI device struct * @state: bit-mask of ASPM states to disable + * @locked: indication if this context holds pci_bus_sem locked. * * Some devices *must* have certain ASPM states disabled per hardware errata. **/ -static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state) +static void __e1000e_disable_aspm(struct pci_dev *pdev, u16 state, int locked) { struct pci_dev *parent = pdev->bus->self; u16 aspm_dis_mask = 0; @@ -6399,7 +6400,10 @@ static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state) "L1" : ""); #ifdef CONFIG_PCIEASPM - pci_disable_link_state_locked(pdev, state); + if (locked) + pci_disable_link_state_locked(pdev, state); + else + pci_disable_link_state(pdev, state); /* Double-check ASPM control. If not disabled by the above, the * BIOS is preventing that from happening (or CONFIG_PCIEASPM is @@ -6422,6 +6426,32 @@ static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state) aspm_dis_mask); } +/** + * e1000e_disable_aspm - Disable ASPM states. + * @pdev: pointer to PCI device struct + * @state: bit-mask of ASPM states to disable + * + * This function acquires the pci_bus_sem! + * Some devices *must* have certain ASPM states disabled per hardware errata. + **/ +static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state) +{ + __e1000e_disable_aspm(pdev, state, 0); +} + +/** + * e1000e_disable_aspm_locked Disable ASPM states. + * @pdev: pointer to PCI device struct + * @state: bit-mask of ASPM states to disable + * + * This function must be called with pci_bus_sem acquired! + * Some devices *must* have certain ASPM states disabled per hardware errata. + **/ +static void e1000e_disable_aspm_locked(struct pci_dev *pdev, u16 state) +{ + __e1000e_disable_aspm(pdev, state, 1); +} + #ifdef CONFIG_PM static int __e1000_resume(struct pci_dev *pdev) { @@ -6435,7 +6465,7 @@ static int __e1000_resume(struct pci_dev *pdev) if (adapter->flags2 & FLAG2_DISABLE_ASPM_L1) aspm_disable_flag |= PCIE_LINK_STATE_L1; if (aspm_disable_flag) - e1000e_disable_aspm(pdev, aspm_disable_flag); + e1000e_disable_aspm_locked(pdev, aspm_disable_flag); pci_set_master(pdev); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index f54996f19629..395f32f226c0 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -484,6 +484,8 @@ int i40evf_setup_tx_descriptors(struct i40e_ring *tx_ring) if (!dev) return -ENOMEM; + /* warn if we are about to overwrite the pointer */ + WARN_ON(tx_ring->tx_bi); bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL); if (!tx_ring->tx_bi) @@ -644,6 +646,8 @@ int i40evf_setup_rx_descriptors(struct i40e_ring *rx_ring) struct device *dev = rx_ring->dev; int bi_size; + /* warn if we are about to overwrite the pointer */ + WARN_ON(rx_ring->rx_bi); bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count; rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL); if (!rx_ring->rx_bi) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index 1b98c25b3092..fea3b75a9a35 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -264,7 +264,6 @@ extern const char i40evf_driver_version[]; int i40evf_up(struct i40evf_adapter *adapter); void i40evf_down(struct i40evf_adapter *adapter); -void i40evf_reinit_locked(struct i40evf_adapter *adapter); void i40evf_reset(struct i40evf_adapter *adapter); void i40evf_set_ethtool_ops(struct net_device *netdev); void i40evf_update_stats(struct i40evf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index f4e77665bc54..2b53c870e7f1 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -267,8 +267,10 @@ static int i40evf_set_ringparam(struct net_device *netdev, adapter->tx_desc_count = new_tx_count; adapter->rx_desc_count = new_rx_count; - if (netif_running(netdev)) - i40evf_reinit_locked(adapter); + if (netif_running(netdev)) { + adapter->flags |= I40EVF_FLAG_RESET_NEEDED; + schedule_work(&adapter->reset_task); + } return 0; } diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 7c53aca4b5a6..4ab4ebba07a1 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -170,7 +170,8 @@ static void i40evf_tx_timeout(struct net_device *netdev) struct i40evf_adapter *adapter = netdev_priv(netdev); adapter->tx_timeout_count++; - if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING)) { + if (!(adapter->flags & (I40EVF_FLAG_RESET_PENDING | + I40EVF_FLAG_RESET_NEEDED))) { adapter->flags |= I40EVF_FLAG_RESET_NEEDED; schedule_work(&adapter->reset_task); } @@ -1460,7 +1461,7 @@ static void i40evf_configure_rss(struct i40evf_adapter *adapter) for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) { lut = 0; for (j = 0; j < 4; j++) { - if (cqueue == adapter->vsi_res->num_queue_pairs) + if (cqueue == adapter->num_active_queues) cqueue = 0; lut |= ((cqueue) << (8 * j)); cqueue++; @@ -1470,8 +1471,8 @@ static void i40evf_configure_rss(struct i40evf_adapter *adapter) i40e_flush(hw); } -#define I40EVF_RESET_WAIT_MS 100 -#define I40EVF_RESET_WAIT_COUNT 200 +#define I40EVF_RESET_WAIT_MS 10 +#define I40EVF_RESET_WAIT_COUNT 500 /** * i40evf_reset_task - Call-back task to handle hardware reset * @work: pointer to work_struct @@ -1495,10 +1496,17 @@ static void i40evf_reset_task(struct work_struct *work) &adapter->crit_section)) usleep_range(500, 1000); + i40evf_misc_irq_disable(adapter); if (adapter->flags & I40EVF_FLAG_RESET_NEEDED) { - dev_info(&adapter->pdev->dev, "Requesting reset from PF\n"); + adapter->flags &= ~I40EVF_FLAG_RESET_NEEDED; + /* Restart the AQ here. If we have been reset but didn't + * detect it, or if the PF had to reinit, our AQ will be hosed. + */ + i40evf_shutdown_adminq(hw); + i40evf_init_adminq(hw); i40evf_request_reset(adapter); } + adapter->flags |= I40EVF_FLAG_RESET_PENDING; /* poll until we see the reset actually happen */ for (i = 0; i < I40EVF_RESET_WAIT_COUNT; i++) { @@ -1507,10 +1515,10 @@ static void i40evf_reset_task(struct work_struct *work) if ((rstat_val != I40E_VFR_VFACTIVE) && (rstat_val != I40E_VFR_COMPLETED)) break; - msleep(I40EVF_RESET_WAIT_MS); + usleep_range(500, 1000); } if (i == I40EVF_RESET_WAIT_COUNT) { - adapter->flags &= ~I40EVF_FLAG_RESET_PENDING; + dev_info(&adapter->pdev->dev, "Never saw reset\n"); goto continue_reset; /* act like the reset happened */ } @@ -1518,11 +1526,12 @@ static void i40evf_reset_task(struct work_struct *work) for (i = 0; i < I40EVF_RESET_WAIT_COUNT; i++) { rstat_val = rd32(hw, I40E_VFGEN_RSTAT) & I40E_VFGEN_RSTAT_VFR_STATE_MASK; - if ((rstat_val == I40E_VFR_VFACTIVE) || - (rstat_val == I40E_VFR_COMPLETED)) + if (rstat_val == I40E_VFR_VFACTIVE) break; msleep(I40EVF_RESET_WAIT_MS); } + /* extra wait to make sure minimum wait is met */ + msleep(I40EVF_RESET_WAIT_MS); if (i == I40EVF_RESET_WAIT_COUNT) { struct i40evf_mac_filter *f, *ftmp; struct i40evf_vlan_filter *fv, *fvtmp; @@ -1534,11 +1543,10 @@ static void i40evf_reset_task(struct work_struct *work) if (netif_running(adapter->netdev)) { set_bit(__I40E_DOWN, &adapter->vsi.state); - i40evf_irq_disable(adapter); - i40evf_napi_disable_all(adapter); - netif_tx_disable(netdev); - netif_tx_stop_all_queues(netdev); netif_carrier_off(netdev); + netif_tx_disable(netdev); + i40evf_napi_disable_all(adapter); + i40evf_irq_disable(adapter); i40evf_free_traffic_irqs(adapter); i40evf_free_all_tx_resources(adapter); i40evf_free_all_rx_resources(adapter); @@ -1550,6 +1558,7 @@ static void i40evf_reset_task(struct work_struct *work) list_del(&f->list); kfree(f); } + list_for_each_entry_safe(fv, fvtmp, &adapter->vlan_filter_list, list) { list_del(&fv->list); @@ -1564,22 +1573,27 @@ static void i40evf_reset_task(struct work_struct *work) i40evf_shutdown_adminq(hw); adapter->netdev->flags &= ~IFF_UP; clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section); + adapter->flags &= ~I40EVF_FLAG_RESET_PENDING; + dev_info(&adapter->pdev->dev, "Reset task did not complete, VF disabled\n"); return; /* Do not attempt to reinit. It's dead, Jim. */ } continue_reset: - adapter->flags &= ~I40EVF_FLAG_RESET_PENDING; - - i40evf_irq_disable(adapter); - if (netif_running(adapter->netdev)) { - i40evf_napi_disable_all(adapter); - netif_tx_disable(netdev); - netif_tx_stop_all_queues(netdev); netif_carrier_off(netdev); + netif_tx_stop_all_queues(netdev); + i40evf_napi_disable_all(adapter); } + i40evf_irq_disable(adapter); adapter->state = __I40EVF_RESETTING; + adapter->flags &= ~I40EVF_FLAG_RESET_PENDING; + + /* free the Tx/Rx rings and descriptors, might be better to just + * re-use them sometime in the future + */ + i40evf_free_all_rx_resources(adapter); + i40evf_free_all_tx_resources(adapter); /* kill and reinit the admin queue */ if (i40evf_shutdown_adminq(hw)) @@ -1603,6 +1617,7 @@ continue_reset: adapter->aq_required = I40EVF_FLAG_AQ_ADD_MAC_FILTER; adapter->aq_required |= I40EVF_FLAG_AQ_ADD_VLAN_FILTER; clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section); + i40evf_misc_irq_enable(adapter); mod_timer(&adapter->watchdog_timer, jiffies + 2); @@ -1624,7 +1639,10 @@ continue_reset: goto reset_err; i40evf_irq_enable(adapter, true); + } else { + adapter->state = __I40EVF_DOWN; } + return; reset_err: dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); @@ -1667,6 +1685,11 @@ static void i40evf_adminq_task(struct work_struct *work) memset(event.msg_buf, 0, I40EVF_MAX_AQ_BUF_SIZE); } while (pending); + if ((adapter->flags & + (I40EVF_FLAG_RESET_PENDING | I40EVF_FLAG_RESET_NEEDED)) || + adapter->state == __I40EVF_RESETTING) + goto freedom; + /* check for error indications */ val = rd32(hw, hw->aq.arq.len); oldval = val; @@ -1702,6 +1725,7 @@ static void i40evf_adminq_task(struct work_struct *work) if (oldval != val) wr32(hw, hw->aq.asq.len, val); +freedom: kfree(event.msg_buf); out: /* re-enable Admin queue interrupt cause */ @@ -1897,47 +1921,6 @@ static struct net_device_stats *i40evf_get_stats(struct net_device *netdev) } /** - * i40evf_reinit_locked - Software reinit - * @adapter: board private structure - * - * Reinititalizes the ring structures in response to a software configuration - * change. Roughly the same as close followed by open, but skips releasing - * and reallocating the interrupts. - **/ -void i40evf_reinit_locked(struct i40evf_adapter *adapter) -{ - struct net_device *netdev = adapter->netdev; - int err; - - WARN_ON(in_interrupt()); - - i40evf_down(adapter); - - /* allocate transmit descriptors */ - err = i40evf_setup_all_tx_resources(adapter); - if (err) - goto err_reinit; - - /* allocate receive descriptors */ - err = i40evf_setup_all_rx_resources(adapter); - if (err) - goto err_reinit; - - i40evf_configure(adapter); - - err = i40evf_up_complete(adapter); - if (err) - goto err_reinit; - - i40evf_irq_enable(adapter, true); - return; - -err_reinit: - dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); - i40evf_close(netdev); -} - -/** * i40evf_change_mtu - Change the Maximum Transfer Unit * @netdev: network interface device structure * @new_mtu: new value for maximum frame size @@ -1952,9 +1935,10 @@ static int i40evf_change_mtu(struct net_device *netdev, int new_mtu) if ((new_mtu < 68) || (max_frame > I40E_MAX_RXBUFFER)) return -EINVAL; - /* must set new MTU before calling down or up */ netdev->mtu = new_mtu; - i40evf_reinit_locked(adapter); + adapter->flags |= I40EVF_FLAG_RESET_NEEDED; + schedule_work(&adapter->reset_task); + return 0; } diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index 0f69ef81751a..b0182dd31346 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -1,5 +1,5 @@ /* Intel(R) Gigabit Ethernet Linux driver - * Copyright(c) 2007-2014 Intel Corporation. + * Copyright(c) 2007-2015 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -1900,8 +1900,8 @@ static void igb_clear_hw_cntrs_82575(struct e1000_hw *hw) * igb_rx_fifo_flush_82575 - Clean rx fifo after RX enable * @hw: pointer to the HW structure * - * After rx enable if managability is enabled then there is likely some - * bad data at the start of the fifo and possibly in the DMA fifo. This + * After rx enable if manageability is enabled then there is likely some + * bad data at the start of the fifo and possibly in the DMA fifo. This * function clears the fifos and flushes any packets that came in as rx was * being enabled. **/ @@ -1910,6 +1910,11 @@ void igb_rx_fifo_flush_82575(struct e1000_hw *hw) u32 rctl, rlpml, rxdctl[4], rfctl, temp_rctl, rx_enabled; int i, ms_wait; + /* disable IPv6 options as per hardware errata */ + rfctl = rd32(E1000_RFCTL); + rfctl |= E1000_RFCTL_IPV6_EX_DIS; + wr32(E1000_RFCTL, rfctl); + if (hw->mac.type != e1000_82575 || !(rd32(E1000_MANC) & E1000_MANC_RCV_TCO_EN)) return; @@ -1937,7 +1942,6 @@ void igb_rx_fifo_flush_82575(struct e1000_hw *hw) * incoming packets are rejected. Set enable and wait 2ms so that * any packet that was coming in as RCTL.EN was set is flushed */ - rfctl = rd32(E1000_RFCTL); wr32(E1000_RFCTL, rfctl & ~E1000_RFCTL_LEF); rlpml = rd32(E1000_RLPML); diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index 217f8138851b..f8684aa285be 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -344,7 +344,8 @@ #define E1000_RXCSUM_PCSD 0x00002000 /* packet checksum disabled */ /* Header split receive */ -#define E1000_RFCTL_LEF 0x00040000 +#define E1000_RFCTL_IPV6_EX_DIS 0x00010000 +#define E1000_RFCTL_LEF 0x00040000 /* Collision related configuration parameters */ #define E1000_COLLISION_THRESHOLD 15 diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index f287186192bb..2f70a9b152bd 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -58,7 +58,7 @@ #define MAJ 5 #define MIN 2 -#define BUILD 15 +#define BUILD 18 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ __stringify(BUILD) "-k" char igb_driver_name[] = "igb"; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 5bdf78231a4e..370e20ed224c 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -310,6 +310,7 @@ struct mvneta_port { unsigned int link; unsigned int duplex; unsigned int speed; + unsigned int tx_csum_limit; int use_inband_status:1; }; @@ -2508,8 +2509,10 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu) dev->mtu = mtu; - if (!netif_running(dev)) + if (!netif_running(dev)) { + netdev_update_features(dev); return 0; + } /* The interface is running, so we have to force a * reallocation of the queues @@ -2538,9 +2541,26 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu) mvneta_start_dev(pp); mvneta_port_up(pp); + netdev_update_features(dev); + return 0; } +static netdev_features_t mvneta_fix_features(struct net_device *dev, + netdev_features_t features) +{ + struct mvneta_port *pp = netdev_priv(dev); + + if (pp->tx_csum_limit && dev->mtu > pp->tx_csum_limit) { + features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO); + netdev_info(dev, + "Disable IP checksum for MTU greater than %dB\n", + pp->tx_csum_limit); + } + + return features; +} + /* Get mac address */ static void mvneta_get_mac_addr(struct mvneta_port *pp, unsigned char *addr) { @@ -2862,6 +2882,7 @@ static const struct net_device_ops mvneta_netdev_ops = { .ndo_set_rx_mode = mvneta_set_rx_mode, .ndo_set_mac_address = mvneta_set_mac_addr, .ndo_change_mtu = mvneta_change_mtu, + .ndo_fix_features = mvneta_fix_features, .ndo_get_stats64 = mvneta_get_stats64, .ndo_do_ioctl = mvneta_ioctl, }; @@ -3107,6 +3128,9 @@ static int mvneta_probe(struct platform_device *pdev) } } + if (of_device_is_compatible(dn, "marvell,armada-370-neta")) + pp->tx_csum_limit = 1600; + pp->tx_ring_size = MVNETA_MAX_TXD; pp->rx_ring_size = MVNETA_MAX_RXD; @@ -3185,6 +3209,7 @@ static int mvneta_remove(struct platform_device *pdev) static const struct of_device_id mvneta_match[] = { { .compatible = "marvell,armada-370-neta" }, + { .compatible = "marvell,armada-xp-neta" }, { } }; MODULE_DEVICE_TABLE(of, mvneta_match); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 77179d7ae4cc..e0de2fd1ce12 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1977,10 +1977,6 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv) mlx4_en_destroy_cq(priv, &priv->rx_cq[i]); } - if (priv->base_tx_qpn) { - mlx4_qp_release_range(priv->mdev->dev, priv->base_tx_qpn, priv->tx_ring_num); - priv->base_tx_qpn = 0; - } } int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 35f726c17e48..7a4f20bb7fcb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -718,7 +718,7 @@ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, } #endif static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, - int hwtstamp_rx_filter) + netdev_features_t dev_features) { __wsum hw_checksum = 0; @@ -726,14 +726,8 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, hw_checksum = csum_unfold((__force __sum16)cqe->checksum); - if (((struct ethhdr *)va)->h_proto == htons(ETH_P_8021Q) && - hwtstamp_rx_filter != HWTSTAMP_FILTER_NONE) { - /* next protocol non IPv4 or IPv6 */ - if (((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto - != htons(ETH_P_IP) && - ((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto - != htons(ETH_P_IPV6)) - return -1; + if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK) && + !(dev_features & NETIF_F_HW_VLAN_CTAG_RX)) { hw_checksum = get_fixed_vlan_csum(hw_checksum, hdr); hdr += sizeof(struct vlan_hdr); } @@ -896,7 +890,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if (ip_summed == CHECKSUM_COMPLETE) { void *va = skb_frag_address(skb_shinfo(gro_skb)->frags); - if (check_csum(cqe, gro_skb, va, ring->hwtstamp_rx_filter)) { + if (check_csum(cqe, gro_skb, va, + dev->features)) { ip_summed = CHECKSUM_NONE; ring->csum_none++; ring->csum_complete--; @@ -951,7 +946,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud } if (ip_summed == CHECKSUM_COMPLETE) { - if (check_csum(cqe, skb, skb->data, ring->hwtstamp_rx_filter)) { + if (check_csum(cqe, skb, skb->data, dev->features)) { ip_summed = CHECKSUM_NONE; ring->csum_complete--; ring->csum_none++; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 7bed3a88579f..c10d98f6ad96 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -66,6 +66,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring->size = size; ring->size_mask = size - 1; ring->stride = stride; + ring->full_size = ring->size - HEADROOM - MAX_DESC_TXBBS; tmp = size * sizeof(struct mlx4_en_tx_info); ring->tx_info = kmalloc_node(tmp, GFP_KERNEL | __GFP_NOWARN, node); @@ -180,6 +181,7 @@ void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, mlx4_bf_free(mdev->dev, &ring->bf); mlx4_qp_remove(mdev->dev, &ring->qp); mlx4_qp_free(mdev->dev, &ring->qp); + mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1); mlx4_en_unmap_buffer(&ring->wqres.buf); mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); kfree(ring->bounce_buf); @@ -231,6 +233,11 @@ void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp); } +static inline bool mlx4_en_is_tx_ring_full(struct mlx4_en_tx_ring *ring) +{ + return ring->prod - ring->cons > ring->full_size; +} + static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, int index, u8 owner) @@ -473,11 +480,10 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, netdev_tx_completed_queue(ring->tx_queue, packets, bytes); - /* - * Wakeup Tx queue if this stopped, and at least 1 packet - * was completed + /* Wakeup Tx queue if this stopped, and ring is not full. */ - if (netif_tx_queue_stopped(ring->tx_queue) && txbbs_skipped > 0) { + if (netif_tx_queue_stopped(ring->tx_queue) && + !mlx4_en_is_tx_ring_full(ring)) { netif_tx_wake_queue(ring->tx_queue); ring->wake_queue++; } @@ -921,8 +927,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) skb_tx_timestamp(skb); /* Check available TXBBs And 2K spare for prefetch */ - stop_queue = (int)(ring->prod - ring_cons) > - ring->size - HEADROOM - MAX_DESC_TXBBS; + stop_queue = mlx4_en_is_tx_ring_full(ring); if (unlikely(stop_queue)) { netif_tx_stop_queue(ring->tx_queue); ring->queue_stopped++; @@ -991,8 +996,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) smp_rmb(); ring_cons = ACCESS_ONCE(ring->cons); - if (unlikely(((int)(ring->prod - ring_cons)) <= - ring->size - HEADROOM - MAX_DESC_TXBBS)) { + if (unlikely(!mlx4_en_is_tx_ring_full(ring))) { netif_tx_wake_queue(ring->tx_queue); ring->wake_queue++; } diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index 6fce58718837..0d80aed59043 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -93,8 +93,14 @@ int mlx4_register_interface(struct mlx4_interface *intf) mutex_lock(&intf_mutex); list_add_tail(&intf->list, &intf_list); - list_for_each_entry(priv, &dev_list, dev_list) + list_for_each_entry(priv, &dev_list, dev_list) { + if (mlx4_is_mfunc(&priv->dev) && (intf->flags & MLX4_INTFF_BONDING)) { + mlx4_dbg(&priv->dev, + "SRIOV, disabling HA mode for intf proto %d\n", intf->protocol); + intf->flags &= ~MLX4_INTFF_BONDING; + } mlx4_add_device(intf, priv); + } mutex_unlock(&intf_mutex); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index d5f9adb6a784..666d1669eb52 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -279,6 +279,7 @@ struct mlx4_en_tx_ring { u32 size; /* number of TXBBs */ u32 size_mask; u16 stride; + u32 full_size; u16 cqn; /* index of port CQ associated with this ring */ u32 buf_size; __be32 doorbell_qpn; @@ -580,7 +581,6 @@ struct mlx4_en_priv { int vids[128]; bool wol; struct device *ddev; - int base_tx_qpn; struct hlist_head mac_hash[MLX4_EN_MAC_HASH_SIZE]; struct hwtstamp_config hwtstamp_config; u32 counter_index; diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 2bae50292dcd..83651ac8ddb9 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -279,7 +279,7 @@ MODULE_FIRMWARE("myri10ge_eth_z8e.dat"); MODULE_FIRMWARE("myri10ge_rss_ethp_z8e.dat"); MODULE_FIRMWARE("myri10ge_rss_eth_z8e.dat"); -/* Careful: must be accessed under kparam_block_sysfs_write */ +/* Careful: must be accessed under kernel_param_lock() */ static char *myri10ge_fw_name = NULL; module_param(myri10ge_fw_name, charp, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image name"); @@ -3427,7 +3427,7 @@ static void myri10ge_select_firmware(struct myri10ge_priv *mgp) } } - kparam_block_sysfs_write(myri10ge_fw_name); + kernel_param_lock(THIS_MODULE); if (myri10ge_fw_name != NULL) { char *fw_name = kstrdup(myri10ge_fw_name, GFP_KERNEL); if (fw_name) { @@ -3435,7 +3435,7 @@ static void myri10ge_select_firmware(struct myri10ge_priv *mgp) set_fw_name(mgp, fw_name, true); } } - kparam_unblock_sysfs_write(myri10ge_fw_name); + kernel_param_unlock(THIS_MODULE); if (mgp->board_number < MYRI10GE_MAX_BOARDS && myri10ge_fw_names[mgp->board_number] != NULL && diff --git a/drivers/net/ethernet/renesas/ravb_ptp.c b/drivers/net/ethernet/renesas/ravb_ptp.c index 42656da50500..7a8ce920c49e 100644 --- a/drivers/net/ethernet/renesas/ravb_ptp.c +++ b/drivers/net/ethernet/renesas/ravb_ptp.c @@ -116,8 +116,10 @@ static int ravb_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) priv->ptp.current_addend = addend; gccr = ravb_read(ndev, GCCR); - if (gccr & GCCR_LTI) + if (gccr & GCCR_LTI) { + spin_unlock_irqrestore(&priv->lock, flags); return -EBUSY; + } ravb_write(ndev, addend & GTI_TIV, GTI); ravb_write(ndev, gccr | GCCR_LTI, GCCR); diff --git a/drivers/net/ethernet/sis/sis900.h b/drivers/net/ethernet/sis/sis900.h index 1341f33e6084..7d430d322931 100644 --- a/drivers/net/ethernet/sis/sis900.h +++ b/drivers/net/ethernet/sis/sis900.h @@ -56,7 +56,7 @@ enum sis900_configuration_register_bits { EDB_MASTER_EN = 0x00002000 }; -enum sis900_eeprom_access_reigster_bits { +enum sis900_eeprom_access_register_bits { MDC = 0x00000040, MDDIR = 0x00000020, MDIO = 0x00000010, /* 7016 specific */ EECS = 0x00000008, EECLK = 0x00000004, EEDO = 0x00000002, EEDI = 0x00000001 @@ -73,7 +73,7 @@ enum sis900_interrupt_register_bits { RxERR = 0x00000004, RxDESC = 0x00000002, RxOK = 0x00000001 }; -enum sis900_interrupt_enable_reigster_bits { +enum sis900_interrupt_enable_register_bits { IE = 0x00000001 }; diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c index 08c483bd2ec7..3f20bb1fe570 100644 --- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c @@ -73,7 +73,7 @@ #define MMC_RX_OCTETCOUNT_G 0x00000188 #define MMC_RX_BROADCASTFRAME_G 0x0000018c #define MMC_RX_MULTICASTFRAME_G 0x00000190 -#define MMC_RX_CRC_ERRROR 0x00000194 +#define MMC_RX_CRC_ERROR 0x00000194 #define MMC_RX_ALIGN_ERROR 0x00000198 #define MMC_RX_RUN_ERROR 0x0000019C #define MMC_RX_JABBER_ERROR 0x000001A0 @@ -196,7 +196,7 @@ void dwmac_mmc_read(void __iomem *ioaddr, struct stmmac_counters *mmc) mmc->mmc_rx_octetcount_g += readl(ioaddr + MMC_RX_OCTETCOUNT_G); mmc->mmc_rx_broadcastframe_g += readl(ioaddr + MMC_RX_BROADCASTFRAME_G); mmc->mmc_rx_multicastframe_g += readl(ioaddr + MMC_RX_MULTICASTFRAME_G); - mmc->mmc_rx_crc_error += readl(ioaddr + MMC_RX_CRC_ERRROR); + mmc->mmc_rx_crc_error += readl(ioaddr + MMC_RX_CRC_ERROR); mmc->mmc_rx_align_error += readl(ioaddr + MMC_RX_ALIGN_ERROR); mmc->mmc_rx_run_error += readl(ioaddr + MMC_RX_RUN_ERROR); mmc->mmc_rx_jabber_error += readl(ioaddr + MMC_RX_JABBER_ERROR); diff --git a/drivers/net/ethernet/via/Kconfig b/drivers/net/ethernet/via/Kconfig index 8b0b1d6aca72..2f1264b882b9 100644 --- a/drivers/net/ethernet/via/Kconfig +++ b/drivers/net/ethernet/via/Kconfig @@ -18,6 +18,7 @@ if NET_VENDOR_VIA config VIA_RHINE tristate "VIA Rhine support" depends on (PCI || OF_IRQ) + depends on HAS_DMA select CRC32 select MII ---help--- @@ -42,6 +43,7 @@ config VIA_RHINE_MMIO config VIA_VELOCITY tristate "VIA Velocity support" depends on (PCI || (OF_ADDRESS && OF_IRQ)) + depends on HAS_DMA select CRC32 select CRC_CCITT select MII diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 4dea85bfc545..6b701b3ded74 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -246,6 +246,13 @@ static int bcm7xxx_28nm_config_init(struct phy_device *phydev) pr_info_once("%s: %s PHY revision: 0x%02x, patch: %d\n", dev_name(&phydev->dev), phydev->drv->name, rev, patch); + /* Dummy read to a register to workaround an issue upon reset where the + * internal inverter may not allow the first MDIO transaction to pass + * the MDIO management controller and make us return 0xffff for such + * reads. + */ + phy_read(phydev, MII_BMSR); + switch (rev) { case 0xb0: ret = bcm7xxx_28nm_b0_afe_config_init(phydev); diff --git a/drivers/net/phy/mdio-bcm-unimac.c b/drivers/net/phy/mdio-bcm-unimac.c index fc7abc50b4f1..6a52a7f0fa0d 100644 --- a/drivers/net/phy/mdio-bcm-unimac.c +++ b/drivers/net/phy/mdio-bcm-unimac.c @@ -120,6 +120,48 @@ static int unimac_mdio_write(struct mii_bus *bus, int phy_id, return 0; } +/* Workaround for integrated BCM7xxx Gigabit PHYs which have a problem with + * their internal MDIO management controller making them fail to successfully + * be read from or written to for the first transaction. We insert a dummy + * BMSR read here to make sure that phy_get_device() and get_phy_id() can + * correctly read the PHY MII_PHYSID1/2 registers and successfully register a + * PHY device for this peripheral. + * + * Once the PHY driver is registered, we can workaround subsequent reads from + * there (e.g: during system-wide power management). + * + * bus->reset is invoked before mdiobus_scan during mdiobus_register and is + * therefore the right location to stick that workaround. Since we do not want + * to read from non-existing PHYs, we either use bus->phy_mask or do a manual + * Device Tree scan to limit the search area. + */ +static int unimac_mdio_reset(struct mii_bus *bus) +{ + struct device_node *np = bus->dev.of_node; + struct device_node *child; + u32 read_mask = 0; + int addr; + + if (!np) { + read_mask = ~bus->phy_mask; + } else { + for_each_available_child_of_node(np, child) { + addr = of_mdio_parse_addr(&bus->dev, child); + if (addr < 0) + continue; + + read_mask |= 1 << addr; + } + } + + for (addr = 0; addr < PHY_MAX_ADDR; addr++) { + if (read_mask & 1 << addr) + mdiobus_read(bus, addr, MII_BMSR); + } + + return 0; +} + static int unimac_mdio_probe(struct platform_device *pdev) { struct unimac_mdio_priv *priv; @@ -155,6 +197,7 @@ static int unimac_mdio_probe(struct platform_device *pdev) bus->parent = &pdev->dev; bus->read = unimac_mdio_read; bus->write = unimac_mdio_write; + bus->reset = unimac_mdio_reset; snprintf(bus->id, MII_BUS_ID_SIZE, "%s", pdev->name); bus->irq = kcalloc(PHY_MAX_ADDR, sizeof(int), GFP_KERNEL); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index bdfe51fc3a65..0302483de240 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -230,7 +230,7 @@ static int get_phy_c45_ids(struct mii_bus *bus, int addr, u32 *phy_id, for (i = 1; i < num_ids && c45_ids->devices_in_package == 0; i++) { - reg_addr = MII_ADDR_C45 | i << 16 | MDIO_DEVS2; +retry: reg_addr = MII_ADDR_C45 | i << 16 | MDIO_DEVS2; phy_reg = mdiobus_read(bus, addr, reg_addr); if (phy_reg < 0) return -EIO; @@ -242,12 +242,20 @@ static int get_phy_c45_ids(struct mii_bus *bus, int addr, u32 *phy_id, return -EIO; c45_ids->devices_in_package |= (phy_reg & 0xffff); - /* If mostly Fs, there is no device there, - * let's get out of here. - */ if ((c45_ids->devices_in_package & 0x1fffffff) == 0x1fffffff) { - *phy_id = 0xffffffff; - return 0; + if (i) { + /* If mostly Fs, there is no device there, + * then let's continue to probe more, as some + * 10G PHYs have zero Devices In package, + * e.g. Cortina CS4315/CS4340 PHY. + */ + i = 0; + goto retry; + } else { + /* no device there, let's get out of here */ + *phy_id = 0xffffffff; + return 0; + } } } @@ -796,10 +804,11 @@ static int genphy_config_advert(struct phy_device *phydev) if (phydev->supported & (SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full)) { adv |= ethtool_adv_to_mii_ctrl1000_t(advertise); - if (adv != oldadv) - changed = 1; } + if (adv != oldadv) + changed = 1; + err = phy_write(phydev, MII_CTRL1000, adv); if (err < 0) return err; diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c index 76cad712ddb2..17cad185169d 100644 --- a/drivers/net/phy/vitesse.c +++ b/drivers/net/phy/vitesse.c @@ -66,6 +66,7 @@ #define PHY_ID_VSC8244 0x000fc6c0 #define PHY_ID_VSC8514 0x00070670 #define PHY_ID_VSC8574 0x000704a0 +#define PHY_ID_VSC8641 0x00070431 #define PHY_ID_VSC8662 0x00070660 #define PHY_ID_VSC8221 0x000fc550 #define PHY_ID_VSC8211 0x000fc4b0 @@ -272,6 +273,18 @@ static struct phy_driver vsc82xx_driver[] = { .config_intr = &vsc82xx_config_intr, .driver = { .owner = THIS_MODULE,}, }, { + .phy_id = PHY_ID_VSC8641, + .name = "Vitesse VSC8641", + .phy_id_mask = 0x000ffff0, + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_init = &vsc824x_config_init, + .config_aneg = &vsc82x4_config_aneg, + .read_status = &genphy_read_status, + .ack_interrupt = &vsc824x_ack_interrupt, + .config_intr = &vsc82xx_config_intr, + .driver = { .owner = THIS_MODULE,}, +}, { .phy_id = PHY_ID_VSC8662, .name = "Vitesse VSC8662", .phy_id_mask = 0x000ffff0, @@ -318,6 +331,7 @@ static struct mdio_device_id __maybe_unused vitesse_tbl[] = { { PHY_ID_VSC8244, 0x000fffc0 }, { PHY_ID_VSC8514, 0x000ffff0 }, { PHY_ID_VSC8574, 0x000ffff0 }, + { PHY_ID_VSC8641, 0x000ffff0 }, { PHY_ID_VSC8662, 0x000ffff0 }, { PHY_ID_VSC8221, 0x000ffff0 }, { PHY_ID_VSC8211, 0x000ffff0 }, diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index e9f1075f7d4c..2652245631d1 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -69,10 +69,10 @@ /* * Version numbers */ -#define VMXNET3_DRIVER_VERSION_STRING "1.3.5.0-k" +#define VMXNET3_DRIVER_VERSION_STRING "1.4.2.0-k" /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */ -#define VMXNET3_DRIVER_VERSION_NUM 0x01030500 +#define VMXNET3_DRIVER_VERSION_NUM 0x01040200 #if defined(CONFIG_PCI_MSI) /* RSS only makes sense if MSI-X is supported. */ diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index b9febab89167..6ca6193ab8a6 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -62,7 +62,7 @@ static int mtu_max_set(const char *val, const struct kernel_param *kp) return ret; } -static struct kernel_param_ops mtu_max_ops = { +static const struct kernel_param_ops mtu_max_ops = { .set = mtu_max_set, .get = param_get_uint, }; @@ -91,7 +91,7 @@ static int ring_order_set(const char *val, const struct kernel_param *kp) return 0; } -static struct kernel_param_ops ring_order_ops = { +static const struct kernel_param_ops ring_order_ops = { .set = ring_order_set, .get = param_get_uint, }; diff --git a/drivers/net/wireless/libertas_tf/if_usb.c b/drivers/net/wireless/libertas_tf/if_usb.c index 1a20cee5febe..799a2efe5793 100644 --- a/drivers/net/wireless/libertas_tf/if_usb.c +++ b/drivers/net/wireless/libertas_tf/if_usb.c @@ -821,15 +821,15 @@ static int if_usb_prog_firmware(struct if_usb_card *cardp) lbtf_deb_enter(LBTF_DEB_USB); - kparam_block_sysfs_write(fw_name); + kernel_param_lock(THIS_MODULE); ret = request_firmware(&cardp->fw, lbtf_fw_name, &cardp->udev->dev); if (ret < 0) { pr_err("request_firmware() failed with %#x\n", ret); pr_err("firmware %s not found\n", lbtf_fw_name); - kparam_unblock_sysfs_write(fw_name); + kernel_param_unlock(THIS_MODULE); goto done; } - kparam_unblock_sysfs_write(fw_name); + kernel_param_unlock(THIS_MODULE); if (check_fwfile_format(cardp->fw->data, cardp->fw->size)) goto release_fw; diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 5485f91294e7..880d0d63e872 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -44,9 +44,9 @@ #include <xen/xen.h> #include <xen/events.h> #include <xen/interface/memory.h> +#include <xen/page.h> #include <asm/xen/hypercall.h> -#include <asm/xen/page.h> /* Provide an option to disable split event channels at load time as * event channels are limited resource. Split event channels are diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 56d8afd11077..f948c46d5132 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -45,7 +45,6 @@ #include <linux/slab.h> #include <net/ip.h> -#include <asm/xen/page.h> #include <xen/xen.h> #include <xen/xenbus.h> #include <xen/events.h> @@ -1245,10 +1244,6 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) np = netdev_priv(netdev); np->xbdev = dev; - /* No need to use rtnl_lock() before the call below as it - * happens before register_netdev(). - */ - netif_set_real_num_tx_queues(netdev, 0); np->queues = NULL; err = -ENOMEM; @@ -1900,9 +1895,6 @@ abort_transaction_no_dev_fatal: xennet_disconnect_backend(info); kfree(info->queues); info->queues = NULL; - rtnl_lock(); - netif_set_real_num_tx_queues(info->netdev, 0); - rtnl_unlock(); out: return err; } diff --git a/drivers/pci/host/pci-keystone.c b/drivers/pci/host/pci-keystone.c index b75d684aefcd..734da589cdfb 100644 --- a/drivers/pci/host/pci-keystone.c +++ b/drivers/pci/host/pci-keystone.c @@ -221,10 +221,9 @@ static void ks_pcie_setup_interrupts(struct keystone_pcie *ks_pcie) /* MSI IRQ */ if (IS_ENABLED(CONFIG_PCI_MSI)) { for (i = 0; i < ks_pcie->num_msi_host_irqs; i++) { - irq_set_chained_handler(ks_pcie->msi_host_irqs[i], - ks_pcie_msi_irq_handler); - irq_set_handler_data(ks_pcie->msi_host_irqs[i], - ks_pcie); + irq_set_chained_handler_and_data(ks_pcie->msi_host_irqs[i], + ks_pcie_msi_irq_handler, + ks_pcie); } } } diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index 240f38872085..8b7a900cd28b 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -20,6 +20,7 @@ #include <linux/workqueue.h> #include <linux/bitops.h> #include <linux/time.h> +#include <linux/ktime.h> #include <xen/platform_pci.h> #include <asm/xen/swiotlb-xen.h> @@ -115,7 +116,6 @@ static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op) evtchn_port_t port = pdev->evtchn; unsigned irq = pdev->irq; s64 ns, ns_timeout; - struct timeval tv; spin_lock_irqsave(&pdev->sh_info_lock, irq_flags); @@ -132,8 +132,7 @@ static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op) * (in the latter case we end up continually re-executing poll() with a * timeout in the past). 1s difference gives plenty of slack for error. */ - do_gettimeofday(&tv); - ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC; + ns_timeout = ktime_get_ns() + 2 * (s64)NSEC_PER_SEC; xen_clear_irq_pending(irq); @@ -141,8 +140,7 @@ static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op) (unsigned long *)&pdev->sh_info->flags)) { xen_poll_irq_timeout(irq, jiffies + 3*HZ); xen_clear_irq_pending(irq); - do_gettimeofday(&tv); - ns = timeval_to_ns(&tv); + ns = ktime_get_ns(); if (ns > ns_timeout) { dev_err(&pdev->xdev->dev, "pciback not responding!!!\n"); diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index c4fc77aa766e..ad1ea1695b4a 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -1351,8 +1351,7 @@ int mtk_pctrl_init(struct platform_device *pdev, set_irq_flags(virq, IRQF_VALID); }; - irq_set_chained_handler(irq, mtk_eint_irq_handler); - irq_set_handler_data(irq, pctl); + irq_set_chained_handler_and_data(irq, mtk_eint_irq_handler, pctl); set_irq_flags(irq, IRQF_VALID); return 0; diff --git a/drivers/pinctrl/pinctrl-adi2.c b/drivers/pinctrl/pinctrl-adi2.c index 873433da0f2c..c3c3d2345fc6 100644 --- a/drivers/pinctrl/pinctrl-adi2.c +++ b/drivers/pinctrl/pinctrl-adi2.c @@ -865,8 +865,8 @@ static int adi_gpio_pint_probe(struct platform_device *pdev) pint->pint_map_port = adi_pint_map_port; platform_set_drvdata(pdev, pint); - irq_set_chained_handler(pint->irq, adi_gpio_handle_pint_irq); - irq_set_handler_data(pint->irq, pint); + irq_set_chained_handler_and_data(pint->irq, adi_gpio_handle_pint_irq, + pint); list_add_tail(&pint->node, &adi_pint_list); diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c index d34ac879af9e..c262e5f35c28 100644 --- a/drivers/pinctrl/pinctrl-st.c +++ b/drivers/pinctrl/pinctrl-st.c @@ -1661,8 +1661,8 @@ static int st_pctl_probe_dt(struct platform_device *pdev, if (IS_ERR(info->irqmux_base)) return PTR_ERR(info->irqmux_base); - irq_set_chained_handler(irq, st_gpio_irqmux_handler); - irq_set_handler_data(irq, info); + irq_set_chained_handler_and_data(irq, st_gpio_irqmux_handler, + info); } diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c index 0b7afa50121a..b18dabba03a4 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos.c @@ -563,8 +563,8 @@ static int exynos_eint_wkup_init(struct samsung_pinctrl_drv_data *d) return -ENOMEM; } - irq_set_chained_handler(irq, exynos_irq_demux_eint16_31); - irq_set_handler_data(irq, muxed_data); + irq_set_chained_handler_and_data(irq, exynos_irq_demux_eint16_31, + muxed_data); bank = d->pin_banks; idx = 0; diff --git a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c index f1993f42114c..01b43dbfb795 100644 --- a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c +++ b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c @@ -514,8 +514,7 @@ static int s3c24xx_eint_init(struct samsung_pinctrl_drv_data *d) } eint_data->parents[i] = irq; - irq_set_chained_handler(irq, handlers[i]); - irq_set_handler_data(irq, eint_data); + irq_set_chained_handler_and_data(irq, handlers[i], eint_data); } bank = d->pin_banks; diff --git a/drivers/pinctrl/samsung/pinctrl-s3c64xx.c b/drivers/pinctrl/samsung/pinctrl-s3c64xx.c index 7756c1e9e763..ec8cc3b47621 100644 --- a/drivers/pinctrl/samsung/pinctrl-s3c64xx.c +++ b/drivers/pinctrl/samsung/pinctrl-s3c64xx.c @@ -506,8 +506,7 @@ static int s3c64xx_eint_gpio_init(struct samsung_pinctrl_drv_data *d) data->domains[nr_domains++] = bank->irq_domain; } - irq_set_chained_handler(d->irq, s3c64xx_eint_gpio_irq); - irq_set_handler_data(d->irq, data); + irq_set_chained_handler_and_data(d->irq, s3c64xx_eint_gpio_irq, data); return 0; } @@ -731,8 +730,9 @@ static int s3c64xx_eint_eint0_init(struct samsung_pinctrl_drv_data *d) return -ENXIO; } - irq_set_chained_handler(irq, s3c64xx_eint0_handlers[i]); - irq_set_handler_data(irq, data); + irq_set_chained_handler_and_data(irq, + s3c64xx_eint0_handlers[i], + data); } bank = d->pin_banks; diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index d7857c72e627..f09573e13203 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -1005,9 +1005,9 @@ int sunxi_pinctrl_init(struct platform_device *pdev, writel(0xffffffff, pctl->membase + sunxi_irq_status_reg_from_bank(i)); - irq_set_chained_handler(pctl->irq[i], - sunxi_pinctrl_irq_handler); - irq_set_handler_data(pctl->irq[i], pctl); + irq_set_chained_handler_and_data(pctl->irq[i], + sunxi_pinctrl_irq_handler, + pctl); } dev_info(&pdev->dev, "initialized sunXi PIO driver\n"); diff --git a/drivers/power/test_power.c b/drivers/power/test_power.c index f986e0cca7ac..83c42ea88f2b 100644 --- a/drivers/power/test_power.c +++ b/drivers/power/test_power.c @@ -448,42 +448,42 @@ static int param_set_battery_voltage(const char *key, #define param_get_battery_voltage param_get_int -static struct kernel_param_ops param_ops_ac_online = { +static const struct kernel_param_ops param_ops_ac_online = { .set = param_set_ac_online, .get = param_get_ac_online, }; -static struct kernel_param_ops param_ops_usb_online = { +static const struct kernel_param_ops param_ops_usb_online = { .set = param_set_usb_online, .get = param_get_usb_online, }; -static struct kernel_param_ops param_ops_battery_status = { +static const struct kernel_param_ops param_ops_battery_status = { .set = param_set_battery_status, .get = param_get_battery_status, }; -static struct kernel_param_ops param_ops_battery_present = { +static const struct kernel_param_ops param_ops_battery_present = { .set = param_set_battery_present, .get = param_get_battery_present, }; -static struct kernel_param_ops param_ops_battery_technology = { +static const struct kernel_param_ops param_ops_battery_technology = { .set = param_set_battery_technology, .get = param_get_battery_technology, }; -static struct kernel_param_ops param_ops_battery_health = { +static const struct kernel_param_ops param_ops_battery_health = { .set = param_set_battery_health, .get = param_get_battery_health, }; -static struct kernel_param_ops param_ops_battery_capacity = { +static const struct kernel_param_ops param_ops_battery_capacity = { .set = param_set_battery_capacity, .get = param_get_battery_capacity, }; -static struct kernel_param_ops param_ops_battery_voltage = { +static const struct kernel_param_ops param_ops_battery_voltage = { .set = param_set_battery_voltage, .get = param_get_battery_voltage, }; diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index f74c040d5c10..e9485fbbb373 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -92,8 +92,8 @@ struct read_cpu_info_sccb { u8 reserved[4096 - 16]; } __attribute__((packed, aligned(PAGE_SIZE))); -static void sclp_fill_cpu_info(struct sclp_cpu_info *info, - struct read_cpu_info_sccb *sccb) +static void sclp_fill_core_info(struct sclp_core_info *info, + struct read_cpu_info_sccb *sccb) { char *page = (char *) sccb; @@ -101,12 +101,11 @@ static void sclp_fill_cpu_info(struct sclp_cpu_info *info, info->configured = sccb->nr_configured; info->standby = sccb->nr_standby; info->combined = sccb->nr_configured + sccb->nr_standby; - info->has_cpu_type = sclp.has_cpu_type; - memcpy(&info->cpu, page + sccb->offset_configured, - info->combined * sizeof(struct sclp_cpu_entry)); + memcpy(&info->core, page + sccb->offset_configured, + info->combined * sizeof(struct sclp_core_entry)); } -int sclp_get_cpu_info(struct sclp_cpu_info *info) +int sclp_get_core_info(struct sclp_core_info *info) { int rc; struct read_cpu_info_sccb *sccb; @@ -127,7 +126,7 @@ int sclp_get_cpu_info(struct sclp_cpu_info *info) rc = -EIO; goto out; } - sclp_fill_cpu_info(info, sccb); + sclp_fill_core_info(info, sccb); out: free_page((unsigned long) sccb); return rc; @@ -137,7 +136,7 @@ struct cpu_configure_sccb { struct sccb_header header; } __attribute__((packed, aligned(8))); -static int do_cpu_configure(sclp_cmdw_t cmd) +static int do_core_configure(sclp_cmdw_t cmd) { struct cpu_configure_sccb *sccb; int rc; @@ -171,14 +170,14 @@ out: return rc; } -int sclp_cpu_configure(u8 cpu) +int sclp_core_configure(u8 core) { - return do_cpu_configure(SCLP_CMDW_CONFIGURE_CPU | cpu << 8); + return do_core_configure(SCLP_CMDW_CONFIGURE_CPU | core << 8); } -int sclp_cpu_deconfigure(u8 cpu) +int sclp_core_deconfigure(u8 core) { - return do_cpu_configure(SCLP_CMDW_DECONFIGURE_CPU | cpu << 8); + return do_core_configure(SCLP_CMDW_DECONFIGURE_CPU | core << 8); } #ifdef CONFIG_MEMORY_HOTPLUG diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index d7f696d95597..aeed7969fd79 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -98,7 +98,7 @@ static int __init sclp_read_info_early(struct read_info_sccb *sccb) static void __init sclp_facilities_detect(struct read_info_sccb *sccb) { - struct sclp_cpu_entry *cpue; + struct sclp_core_entry *cpue; u16 boot_cpu_address, cpu; if (sclp_read_info_early(sccb)) @@ -106,7 +106,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp.facilities = sccb->facilities; sclp.has_sprp = !!(sccb->fac84 & 0x02); - sclp.has_cpu_type = !!(sccb->fac84 & 0x01); + sclp.has_core_type = !!(sccb->fac84 & 0x01); if (sccb->fac85 & 0x02) S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; @@ -116,11 +116,11 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) if (!sccb->hcpua) { if (MACHINE_IS_VM) - sclp.max_cpu = 64; + sclp.max_cores = 64; else - sclp.max_cpu = sccb->ncpurl; + sclp.max_cores = sccb->ncpurl; } else { - sclp.max_cpu = sccb->hcpua + 1; + sclp.max_cores = sccb->hcpua + 1; } boot_cpu_address = stap(); diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 9a3dd95029cc..823f41fc4bbd 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -154,7 +154,7 @@ static int __init init_cpu_info(enum arch_id arch) /* get info for boot cpu from lowcore, stored in the HSA */ - sa_ext = dump_save_area_create(0); + sa_ext = dump_save_areas.areas[0]; if (!sa_ext) return -ENOMEM; if (memcpy_hsa_kernel(&sa_ext->sa, sys_info.sa_base, diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 3ba611419759..559a9dcdb15d 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -60,7 +60,7 @@ static int __ap_poll_device(struct ap_device *ap_dev, unsigned long *flags); static int ap_device_remove(struct device *dev); static int ap_device_probe(struct device *dev); static void ap_interrupt_handler(struct airq_struct *airq); -static void ap_reset(struct ap_device *ap_dev); +static void ap_reset(struct ap_device *ap_dev, unsigned long *flags); static void ap_config_timeout(unsigned long ptr); static int ap_select_domain(void); static void ap_query_configuration(void); @@ -310,35 +310,26 @@ static inline int __ap_query_configuration(struct ap_config_info *config) static int ap_query_functions(ap_qid_t qid, unsigned int *functions) { struct ap_queue_status status; - int i; + status = __ap_query_functions(qid, functions); - for (i = 0; i < AP_MAX_RESET; i++) { - if (ap_queue_status_invalid_test(&status)) - return -ENODEV; + if (ap_queue_status_invalid_test(&status)) + return -ENODEV; - switch (status.response_code) { - case AP_RESPONSE_NORMAL: - return 0; - case AP_RESPONSE_RESET_IN_PROGRESS: - case AP_RESPONSE_BUSY: - break; - case AP_RESPONSE_Q_NOT_AVAIL: - case AP_RESPONSE_DECONFIGURED: - case AP_RESPONSE_CHECKSTOPPED: - case AP_RESPONSE_INVALID_ADDRESS: - return -ENODEV; - case AP_RESPONSE_OTHERWISE_CHANGED: - break; - default: - break; - } - if (i < AP_MAX_RESET - 1) { - udelay(5); - status = __ap_query_functions(qid, functions); - } + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + return 0; + case AP_RESPONSE_Q_NOT_AVAIL: + case AP_RESPONSE_DECONFIGURED: + case AP_RESPONSE_CHECKSTOPPED: + case AP_RESPONSE_INVALID_ADDRESS: + return -ENODEV; + case AP_RESPONSE_RESET_IN_PROGRESS: + case AP_RESPONSE_BUSY: + case AP_RESPONSE_OTHERWISE_CHANGED: + default: + return -EBUSY; } - return -EBUSY; } /** @@ -350,47 +341,25 @@ static int ap_query_functions(ap_qid_t qid, unsigned int *functions) * on the return value it waits a while and tests the AP queue if interrupts * have been switched on using ap_test_queue(). */ -static int ap_queue_enable_interruption(ap_qid_t qid, void *ind) +static int ap_queue_enable_interruption(struct ap_device *ap_dev, void *ind) { struct ap_queue_status status; - int t_depth, t_device_type, rc, i; - rc = -EBUSY; - status = ap_queue_interruption_control(qid, ind); - - for (i = 0; i < AP_MAX_RESET; i++) { - switch (status.response_code) { - case AP_RESPONSE_NORMAL: - if (status.int_enabled) - return 0; - break; - case AP_RESPONSE_RESET_IN_PROGRESS: - case AP_RESPONSE_BUSY: - if (i < AP_MAX_RESET - 1) { - udelay(5); - status = ap_queue_interruption_control(qid, - ind); - continue; - } - break; - case AP_RESPONSE_Q_NOT_AVAIL: - case AP_RESPONSE_DECONFIGURED: - case AP_RESPONSE_CHECKSTOPPED: - case AP_RESPONSE_INVALID_ADDRESS: - return -ENODEV; - case AP_RESPONSE_OTHERWISE_CHANGED: - if (status.int_enabled) - return 0; - break; - default: - break; - } - if (i < AP_MAX_RESET - 1) { - udelay(5); - status = ap_test_queue(qid, &t_depth, &t_device_type); - } + status = ap_queue_interruption_control(ap_dev->qid, ind); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + case AP_RESPONSE_OTHERWISE_CHANGED: + return 0; + case AP_RESPONSE_Q_NOT_AVAIL: + case AP_RESPONSE_DECONFIGURED: + case AP_RESPONSE_CHECKSTOPPED: + case AP_RESPONSE_INVALID_ADDRESS: + return -ENODEV; + case AP_RESPONSE_RESET_IN_PROGRESS: + case AP_RESPONSE_BUSY: + default: + return -EBUSY; } - return rc; } /** @@ -511,109 +480,94 @@ int ap_recv(ap_qid_t qid, unsigned long long *psmid, void *msg, size_t length) EXPORT_SYMBOL(ap_recv); /** + * __ap_schedule_poll_timer(): Schedule poll timer. + * + * Set up the timer to run the poll tasklet + */ +static inline void __ap_schedule_poll_timer(void) +{ + ktime_t hr_time; + + spin_lock_bh(&ap_poll_timer_lock); + if (!hrtimer_is_queued(&ap_poll_timer) && !ap_suspend_flag) { + hr_time = ktime_set(0, poll_timeout); + hrtimer_forward_now(&ap_poll_timer, hr_time); + hrtimer_restart(&ap_poll_timer); + } + spin_unlock_bh(&ap_poll_timer_lock); +} + +/** + * ap_schedule_poll_timer(): Schedule poll timer. + * + * Set up the timer to run the poll tasklet + */ +static inline void ap_schedule_poll_timer(void) +{ + if (ap_using_interrupts()) + return; + __ap_schedule_poll_timer(); +} + + +/** * ap_query_queue(): Check if an AP queue is available. * @qid: The AP queue number * @queue_depth: Pointer to queue depth value * @device_type: Pointer to device type value - * - * The test is repeated for AP_MAX_RESET times. */ static int ap_query_queue(ap_qid_t qid, int *queue_depth, int *device_type) { struct ap_queue_status status; - int t_depth, t_device_type, rc, i; + int t_depth, t_device_type; - rc = -EBUSY; - for (i = 0; i < AP_MAX_RESET; i++) { - status = ap_test_queue(qid, &t_depth, &t_device_type); - switch (status.response_code) { - case AP_RESPONSE_NORMAL: - *queue_depth = t_depth + 1; - *device_type = t_device_type; - rc = 0; - break; - case AP_RESPONSE_Q_NOT_AVAIL: - rc = -ENODEV; - break; - case AP_RESPONSE_RESET_IN_PROGRESS: - break; - case AP_RESPONSE_DECONFIGURED: - rc = -ENODEV; - break; - case AP_RESPONSE_CHECKSTOPPED: - rc = -ENODEV; - break; - case AP_RESPONSE_INVALID_ADDRESS: - rc = -ENODEV; - break; - case AP_RESPONSE_OTHERWISE_CHANGED: - break; - case AP_RESPONSE_BUSY: - break; - default: - BUG(); - } - if (rc != -EBUSY) - break; - if (i < AP_MAX_RESET - 1) - udelay(5); + status = ap_test_queue(qid, &t_depth, &t_device_type); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + *queue_depth = t_depth + 1; + *device_type = t_device_type; + return 0; + case AP_RESPONSE_Q_NOT_AVAIL: + case AP_RESPONSE_DECONFIGURED: + case AP_RESPONSE_CHECKSTOPPED: + case AP_RESPONSE_INVALID_ADDRESS: + return -ENODEV; + case AP_RESPONSE_RESET_IN_PROGRESS: + case AP_RESPONSE_OTHERWISE_CHANGED: + case AP_RESPONSE_BUSY: + return -EBUSY; + default: + BUG(); } - return rc; } /** * ap_init_queue(): Reset an AP queue. * @qid: The AP queue number * - * Reset an AP queue and wait for it to become available again. + * Submit the Reset command to an AP queue. + * Since the reset is asynchron set the state to 'RESET_IN_PROGRESS' + * and check later via ap_poll_queue() if the reset is done. */ -static int ap_init_queue(ap_qid_t qid) +static int ap_init_queue(struct ap_device *ap_dev) { struct ap_queue_status status; - int rc, dummy, i; - rc = -ENODEV; - status = ap_reset_queue(qid); - for (i = 0; i < AP_MAX_RESET; i++) { - switch (status.response_code) { - case AP_RESPONSE_NORMAL: - if (status.queue_empty) - rc = 0; - break; - case AP_RESPONSE_Q_NOT_AVAIL: - case AP_RESPONSE_DECONFIGURED: - case AP_RESPONSE_CHECKSTOPPED: - i = AP_MAX_RESET; /* return with -ENODEV */ - break; - case AP_RESPONSE_RESET_IN_PROGRESS: - rc = -EBUSY; - case AP_RESPONSE_BUSY: - default: - break; - } - if (rc != -ENODEV && rc != -EBUSY) - break; - if (i < AP_MAX_RESET - 1) { - /* Time we are waiting until we give up (0.7sec * 90). - * Since the actual request (in progress) will not - * interrupted immediately for the reset command, - * we have to be patient. In worst case we have to - * wait 60sec + reset time (some msec). - */ - schedule_timeout(AP_RESET_TIMEOUT); - status = ap_test_queue(qid, &dummy, &dummy); - } - } - if (rc == 0 && ap_using_interrupts()) { - rc = ap_queue_enable_interruption(qid, ap_airq.lsi_ptr); - /* If interruption mode is supported by the machine, - * but an AP can not be enabled for interruption then - * the AP will be discarded. */ - if (rc) - pr_err("Registering adapter interrupts for " - "AP %d failed\n", AP_QID_DEVICE(qid)); + status = ap_reset_queue(ap_dev->qid); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + ap_dev->interrupt = AP_INTR_DISABLED; + ap_dev->reset = AP_RESET_IN_PROGRESS; + return 0; + case AP_RESPONSE_RESET_IN_PROGRESS: + case AP_RESPONSE_BUSY: + return -EBUSY; + case AP_RESPONSE_Q_NOT_AVAIL: + case AP_RESPONSE_DECONFIGURED: + case AP_RESPONSE_CHECKSTOPPED: + default: + return -ENODEV; } - return rc; } /** @@ -729,10 +683,63 @@ static ssize_t ap_pendingq_count_show(struct device *dev, static DEVICE_ATTR(pendingq_count, 0444, ap_pendingq_count_show, NULL); +static ssize_t ap_reset_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ap_device *ap_dev = to_ap_dev(dev); + int rc = 0; + + spin_lock_bh(&ap_dev->lock); + switch (ap_dev->reset) { + case AP_RESET_IGNORE: + rc = snprintf(buf, PAGE_SIZE, "No Reset Timer set.\n"); + break; + case AP_RESET_ARMED: + rc = snprintf(buf, PAGE_SIZE, "Reset Timer armed.\n"); + break; + case AP_RESET_DO: + rc = snprintf(buf, PAGE_SIZE, "Reset Timer expired.\n"); + break; + case AP_RESET_IN_PROGRESS: + rc = snprintf(buf, PAGE_SIZE, "Reset in progress.\n"); + break; + default: + break; + } + spin_unlock_bh(&ap_dev->lock); + return rc; +} + +static DEVICE_ATTR(reset, 0444, ap_reset_show, NULL); + +static ssize_t ap_interrupt_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ap_device *ap_dev = to_ap_dev(dev); + int rc = 0; + + spin_lock_bh(&ap_dev->lock); + switch (ap_dev->interrupt) { + case AP_INTR_DISABLED: + rc = snprintf(buf, PAGE_SIZE, "Interrupts disabled.\n"); + break; + case AP_INTR_ENABLED: + rc = snprintf(buf, PAGE_SIZE, "Interrupts enabled.\n"); + break; + case AP_INTR_IN_PROGRESS: + rc = snprintf(buf, PAGE_SIZE, "Enable Interrupt pending.\n"); + break; + } + spin_unlock_bh(&ap_dev->lock); + return rc; +} + +static DEVICE_ATTR(interrupt, 0444, ap_interrupt_show, NULL); + static ssize_t ap_modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "ap:t%02X", to_ap_dev(dev)->device_type); + return sprintf(buf, "ap:t%02X\n", to_ap_dev(dev)->device_type); } static DEVICE_ATTR(modalias, 0444, ap_modalias_show, NULL); @@ -753,6 +760,8 @@ static struct attribute *ap_dev_attrs[] = { &dev_attr_request_count.attr, &dev_attr_requestq_count.attr, &dev_attr_pendingq_count.attr, + &dev_attr_reset.attr, + &dev_attr_interrupt.attr, &dev_attr_modalias.attr, &dev_attr_ap_functions.attr, NULL @@ -926,6 +935,10 @@ static int ap_device_probe(struct device *dev) spin_lock_bh(&ap_device_list_lock); list_del_init(&ap_dev->list); spin_unlock_bh(&ap_device_list_lock); + } else { + if (ap_dev->reset == AP_RESET_IN_PROGRESS || + ap_dev->interrupt == AP_INTR_IN_PROGRESS) + __ap_schedule_poll_timer(); } return rc; } @@ -1411,7 +1424,7 @@ static void ap_scan_bus(struct work_struct *unused) struct ap_device *ap_dev; struct device *dev; ap_qid_t qid; - int queue_depth, device_type; + int queue_depth = 0, device_type = 0; unsigned int device_functions; int rc, i; @@ -1429,15 +1442,9 @@ static void ap_scan_bus(struct work_struct *unused) else rc = -ENODEV; if (dev) { - if (rc == -EBUSY) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(AP_RESET_TIMEOUT); - rc = ap_query_queue(qid, &queue_depth, - &device_type); - } ap_dev = to_ap_dev(dev); spin_lock_bh(&ap_dev->lock); - if (rc || ap_dev->unregistered) { + if (rc == -ENODEV || ap_dev->unregistered) { spin_unlock_bh(&ap_dev->lock); if (ap_dev->unregistered) i--; @@ -1451,13 +1458,15 @@ static void ap_scan_bus(struct work_struct *unused) } if (rc) continue; - rc = ap_init_queue(qid); - if (rc) - continue; ap_dev = kzalloc(sizeof(*ap_dev), GFP_KERNEL); if (!ap_dev) break; ap_dev->qid = qid; + rc = ap_init_queue(ap_dev); + if ((rc != 0) && (rc != -EBUSY)) { + kfree(ap_dev); + continue; + } ap_dev->queue_depth = queue_depth; ap_dev->unregistered = 1; spin_lock_init(&ap_dev->lock); @@ -1520,36 +1529,6 @@ ap_config_timeout(unsigned long ptr) } /** - * __ap_schedule_poll_timer(): Schedule poll timer. - * - * Set up the timer to run the poll tasklet - */ -static inline void __ap_schedule_poll_timer(void) -{ - ktime_t hr_time; - - spin_lock_bh(&ap_poll_timer_lock); - if (!hrtimer_is_queued(&ap_poll_timer) && !ap_suspend_flag) { - hr_time = ktime_set(0, poll_timeout); - hrtimer_forward_now(&ap_poll_timer, hr_time); - hrtimer_restart(&ap_poll_timer); - } - spin_unlock_bh(&ap_poll_timer_lock); -} - -/** - * ap_schedule_poll_timer(): Schedule poll timer. - * - * Set up the timer to run the poll tasklet - */ -static inline void ap_schedule_poll_timer(void) -{ - if (ap_using_interrupts()) - return; - __ap_schedule_poll_timer(); -} - -/** * ap_poll_read(): Receive pending reply messages from an AP device. * @ap_dev: pointer to the AP device * @flags: pointer to control flags, bit 2^0 is set if another poll is @@ -1568,6 +1547,7 @@ static int ap_poll_read(struct ap_device *ap_dev, unsigned long *flags) ap_dev->reply->message, ap_dev->reply->length); switch (status.response_code) { case AP_RESPONSE_NORMAL: + ap_dev->interrupt = status.int_enabled; atomic_dec(&ap_poll_requests); ap_decrease_queue_count(ap_dev); list_for_each_entry(ap_msg, &ap_dev->pendingq, list) { @@ -1582,6 +1562,7 @@ static int ap_poll_read(struct ap_device *ap_dev, unsigned long *flags) *flags |= 1; break; case AP_RESPONSE_NO_PENDING_REPLY: + ap_dev->interrupt = status.int_enabled; if (status.queue_empty) { /* The card shouldn't forget requests but who knows. */ atomic_sub(ap_dev->queue_count, &ap_poll_requests); @@ -1612,7 +1593,8 @@ static int ap_poll_write(struct ap_device *ap_dev, unsigned long *flags) struct ap_message *ap_msg; if (ap_dev->requestq_count <= 0 || - ap_dev->queue_count >= ap_dev->queue_depth) + (ap_dev->queue_count >= ap_dev->queue_depth) || + (ap_dev->reset == AP_RESET_IN_PROGRESS)) return 0; /* Start the next request on the queue. */ ap_msg = list_entry(ap_dev->requestq.next, struct ap_message, list); @@ -1646,6 +1628,8 @@ static int ap_poll_write(struct ap_device *ap_dev, unsigned long *flags) /** * ap_poll_queue(): Poll AP device for pending replies and send new messages. + * Check if the queue has a pending reset. In case it's done re-enable + * interrupts, otherwise reschedule the poll_timer for another attempt. * @ap_dev: pointer to the bus device * @flags: pointer to control flags, bit 2^0 is set if another poll is * required, bit 2^1 is set if the poll timer needs to get armed @@ -1656,7 +1640,51 @@ static int ap_poll_write(struct ap_device *ap_dev, unsigned long *flags) */ static inline int ap_poll_queue(struct ap_device *ap_dev, unsigned long *flags) { - int rc; + int rc, depth, type; + struct ap_queue_status status; + + + if (ap_dev->reset == AP_RESET_IN_PROGRESS) { + status = ap_test_queue(ap_dev->qid, &depth, &type); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + ap_dev->reset = AP_RESET_IGNORE; + if (ap_using_interrupts()) { + rc = ap_queue_enable_interruption( + ap_dev, ap_airq.lsi_ptr); + if (!rc) + ap_dev->interrupt = AP_INTR_IN_PROGRESS; + else if (rc == -ENODEV) { + pr_err("Registering adapter interrupts for " + "AP %d failed\n", AP_QID_DEVICE(ap_dev->qid)); + return rc; + } + } + /* fall through */ + case AP_RESPONSE_BUSY: + case AP_RESPONSE_RESET_IN_PROGRESS: + *flags |= AP_POLL_AFTER_TIMEOUT; + break; + case AP_RESPONSE_Q_NOT_AVAIL: + case AP_RESPONSE_DECONFIGURED: + case AP_RESPONSE_CHECKSTOPPED: + return -ENODEV; + default: + break; + } + } + + if ((ap_dev->reset != AP_RESET_IN_PROGRESS) && + (ap_dev->interrupt == AP_INTR_IN_PROGRESS)) { + status = ap_test_queue(ap_dev->qid, &depth, &type); + if (ap_using_interrupts()) { + if (status.int_enabled == 1) + ap_dev->interrupt = AP_INTR_ENABLED; + else + *flags |= AP_POLL_AFTER_TIMEOUT; + } else + ap_dev->interrupt = AP_INTR_DISABLED; + } rc = ap_poll_read(ap_dev, flags); if (rc) @@ -1676,7 +1704,8 @@ static int __ap_queue_message(struct ap_device *ap_dev, struct ap_message *ap_ms struct ap_queue_status status; if (list_empty(&ap_dev->requestq) && - ap_dev->queue_count < ap_dev->queue_depth) { + (ap_dev->queue_count < ap_dev->queue_depth) && + (ap_dev->reset != AP_RESET_IN_PROGRESS)) { status = __ap_send(ap_dev->qid, ap_msg->psmid, ap_msg->message, ap_msg->length, ap_msg->special); @@ -1789,21 +1818,20 @@ static enum hrtimer_restart ap_poll_timeout(struct hrtimer *unused) * Reset a not responding AP device and move all requests from the * pending queue to the request queue. */ -static void ap_reset(struct ap_device *ap_dev) +static void ap_reset(struct ap_device *ap_dev, unsigned long *flags) { int rc; - ap_dev->reset = AP_RESET_IGNORE; atomic_sub(ap_dev->queue_count, &ap_poll_requests); ap_dev->queue_count = 0; list_splice_init(&ap_dev->pendingq, &ap_dev->requestq); ap_dev->requestq_count += ap_dev->pendingq_count; ap_dev->pendingq_count = 0; - rc = ap_init_queue(ap_dev->qid); + rc = ap_init_queue(ap_dev); if (rc == -ENODEV) ap_dev->unregistered = 1; else - __ap_schedule_poll_timer(); + *flags |= AP_POLL_AFTER_TIMEOUT; } static int __ap_poll_device(struct ap_device *ap_dev, unsigned long *flags) @@ -1812,7 +1840,7 @@ static int __ap_poll_device(struct ap_device *ap_dev, unsigned long *flags) if (ap_poll_queue(ap_dev, flags)) ap_dev->unregistered = 1; if (ap_dev->reset == AP_RESET_DO) - ap_reset(ap_dev); + ap_reset(ap_dev, flags); } return 0; } @@ -1845,9 +1873,9 @@ static void ap_poll_all(unsigned long dummy) spin_unlock(&ap_dev->lock); } spin_unlock(&ap_device_list_lock); - } while (flags & 1); - if (flags & 2) - ap_schedule_poll_timer(); + } while (flags & AP_POLL_IMMEDIATELY); + if (flags & AP_POLL_AFTER_TIMEOUT) + __ap_schedule_poll_timer(); } /** diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 2737d261a324..00468c8d0781 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -32,11 +32,13 @@ #define AP_DEVICES 64 /* Number of AP devices. */ #define AP_DOMAINS 256 /* Number of AP domains. */ -#define AP_MAX_RESET 90 /* Maximum number of resets. */ #define AP_RESET_TIMEOUT (HZ*0.7) /* Time in ticks for reset timeouts. */ #define AP_CONFIG_TIME 30 /* Time in seconds between AP bus rescans. */ #define AP_POLL_TIME 1 /* Time in ticks between receive polls. */ +#define AP_POLL_IMMEDIATELY 1 /* continue running poll tasklet */ +#define AP_POLL_AFTER_TIMEOUT 2 /* run poll tasklet again after timout */ + extern int ap_domain_index; /** @@ -135,6 +137,14 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr) #define AP_RESET_IGNORE 0 /* request timeout will be ignored */ #define AP_RESET_ARMED 1 /* request timeout timer is active */ #define AP_RESET_DO 2 /* AP reset required */ +#define AP_RESET_IN_PROGRESS 3 /* AP reset in progress */ + +/* + * AP interrupt states + */ +#define AP_INTR_DISABLED 0 /* AP interrupt disabled */ +#define AP_INTR_ENABLED 1 /* AP interrupt enabled */ +#define AP_INTR_IN_PROGRESS 3 /* AP interrupt in progress */ struct ap_device; struct ap_message; @@ -168,6 +178,7 @@ struct ap_device { struct timer_list timeout; /* Timer for request timeouts. */ int reset; /* Reset required after req. timeout. */ + int interrupt; /* indicate if interrupts are enabled */ int queue_count; /* # messages currently on AP queue. */ struct list_head pendingq; /* List of message sent to AP queue. */ diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c index 71e698b85772..bb3908818505 100644 --- a/drivers/s390/crypto/zcrypt_cex4.c +++ b/drivers/s390/crypto/zcrypt_cex4.c @@ -39,7 +39,7 @@ * But the maximum time limit managed by the stomper code is set to 60sec. * Hence we have to wait at least that time period. */ -#define CEX4_CLEANUP_TIME (61*HZ) +#define CEX4_CLEANUP_TIME (900*HZ) static struct ap_device_id zcrypt_cex4_ids[] = { { AP_DEVICE(AP_DEVICE_TYPE_CEX4) }, diff --git a/drivers/sh/intc/core.c b/drivers/sh/intc/core.c index 81f22980b2de..156b790072b4 100644 --- a/drivers/sh/intc/core.c +++ b/drivers/sh/intc/core.c @@ -366,8 +366,9 @@ int __init register_intc_controller(struct intc_desc *desc) /* redirect this interrupts to the first one */ irq_set_chip(irq2, &dummy_irq_chip); - irq_set_chained_handler(irq2, intc_redirect_irq); - irq_set_handler_data(irq2, (void *)irq); + irq_set_chained_handler_and_data(irq2, + intc_redirect_irq, + (void *)irq); } } diff --git a/drivers/sh/intc/virq.c b/drivers/sh/intc/virq.c index f30ac9354ff2..f5f1b821241a 100644 --- a/drivers/sh/intc/virq.c +++ b/drivers/sh/intc/virq.c @@ -243,8 +243,9 @@ restart: */ irq_set_nothread(irq); - irq_set_chained_handler(entry->pirq, intc_virq_handler); + /* Set handler data before installing the handler */ add_virq_to_pirq(entry->pirq, irq); + irq_set_chained_handler(entry->pirq, intc_virq_handler); radix_tree_tag_clear(&d->tree, entry->enum_id, INTC_TAG_VIRQ_NEEDS_ALLOC); diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c index 2e6716104d3f..5820e8513927 100644 --- a/drivers/thermal/intel_powerclamp.c +++ b/drivers/thermal/intel_powerclamp.c @@ -119,7 +119,7 @@ exit: return ret; } -static struct kernel_param_ops duration_ops = { +static const struct kernel_param_ops duration_ops = { .set = duration_set, .get = param_get_int, }; @@ -167,7 +167,7 @@ exit_win: return ret; } -static struct kernel_param_ops window_size_ops = { +static const struct kernel_param_ops window_size_ops = { .set = window_size_set, .get = param_get_int, }; diff --git a/drivers/tty/hvc/hvc_iucv.c b/drivers/tty/hvc/hvc_iucv.c index f78a87b07872..bb809cf36617 100644 --- a/drivers/tty/hvc/hvc_iucv.c +++ b/drivers/tty/hvc/hvc_iucv.c @@ -1345,7 +1345,7 @@ static int param_get_vmidfilter(char *buffer, const struct kernel_param *kp) #define param_check_vmidfilter(name, p) __param_check(name, p, void) -static struct kernel_param_ops param_ops_vmidfilter = { +static const struct kernel_param_ops param_ops_vmidfilter = { .set = param_set_vmidfilter, .get = param_get_vmidfilter, }; diff --git a/drivers/tty/hvc/hvc_tile.c b/drivers/tty/hvc/hvc_tile.c index 3f6cd3102db5..9da1e842bbe9 100644 --- a/drivers/tty/hvc/hvc_tile.c +++ b/drivers/tty/hvc/hvc_tile.c @@ -51,7 +51,8 @@ int tile_console_write(const char *buf, int count) _SIM_CONTROL_OPERATOR_BITS)); return 0; } else { - return hv_console_write((HV_VirtAddr)buf, count); + /* Translate 0 bytes written to EAGAIN for hvc_console_print. */ + return hv_console_write((HV_VirtAddr)buf, count) ?: -EAGAIN; } } diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 7a3d146a5f0e..a9d837f83ce8 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -302,7 +302,7 @@ static int xen_initial_domain_console_init(void) static void xen_console_update_evtchn(struct xencons_info *info) { if (xen_hvm_domain()) { - uint64_t v; + uint64_t v = 0; int err; err = hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN, &v); diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 2847108cc8dd..b5b427888b24 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -988,7 +988,7 @@ static int sysrq_reset_seq_param_set(const char *buffer, return 0; } -static struct kernel_param_ops param_ops_sysrq_reset_seq = { +static const struct kernel_param_ops param_ops_sysrq_reset_seq = { .get = param_get_ushort, .set = sysrq_reset_seq_param_set, }; diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c index 888998a7fe31..a2ae88dbda78 100644 --- a/drivers/usb/atm/ueagle-atm.c +++ b/drivers/usb/atm/ueagle-atm.c @@ -1599,7 +1599,7 @@ static void cmvs_file_name(struct uea_softc *sc, char *const cmv_name, int ver) char file_arr[] = "CMVxy.bin"; char *file; - kparam_block_sysfs_write(cmv_file); + kernel_param_lock(THIS_MODULE); /* set proper name corresponding modem version and line type */ if (cmv_file[sc->modem_index] == NULL) { if (UEA_CHIP_VERSION(sc) == ADI930) @@ -1618,7 +1618,7 @@ static void cmvs_file_name(struct uea_softc *sc, char *const cmv_name, int ver) strlcat(cmv_name, file, UEA_FW_NAME_MAX); if (ver == 2) strlcat(cmv_name, ".v2", UEA_FW_NAME_MAX); - kparam_unblock_sysfs_write(cmv_file); + kernel_param_unlock(THIS_MODULE); } static int request_cmvs_old(struct uea_softc *sc, diff --git a/drivers/video/fbdev/uvesafb.c b/drivers/video/fbdev/uvesafb.c index d32d1c4d1b99..178ae93b7ebd 100644 --- a/drivers/video/fbdev/uvesafb.c +++ b/drivers/video/fbdev/uvesafb.c @@ -1977,7 +1977,7 @@ static int param_set_scroll(const char *val, const struct kernel_param *kp) return 0; } -static struct kernel_param_ops param_ops_scroll = { +static const struct kernel_param_ops param_ops_scroll = { .set = param_set_scroll, }; #define param_check_scroll(name, p) __param_check(name, p, void) diff --git a/drivers/video/fbdev/vt8623fb.c b/drivers/video/fbdev/vt8623fb.c index ea7f056ed5fe..8bac309c24b9 100644 --- a/drivers/video/fbdev/vt8623fb.c +++ b/drivers/video/fbdev/vt8623fb.c @@ -754,9 +754,9 @@ static int vt8623_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) /* Prepare startup mode */ - kparam_block_sysfs_write(mode_option); + kernel_param_lock(THIS_MODULE); rc = fb_find_mode(&(info->var), info, mode_option, NULL, 0, NULL, 8); - kparam_unblock_sysfs_write(mode_option); + kernel_param_unlock(THIS_MODULE); if (! ((rc == 1) || (rc == 2))) { rc = -EINVAL; dev_err(info->device, "mode %s not found\n", mode_option); diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 7a5e60dea6c5..10189b5b627f 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -691,7 +691,7 @@ static int vm_cmdline_get(char *buffer, const struct kernel_param *kp) return strlen(buffer) + 1; } -static struct kernel_param_ops vm_cmdline_param_ops = { +static const struct kernel_param_ops vm_cmdline_param_ops = { .set = vm_cmdline_set, .get = vm_cmdline_get, }; diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 38387950490e..96093ae369a5 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -39,8 +39,8 @@ #include <asm/irq.h> #include <asm/idle.h> #include <asm/io_apic.h> -#include <asm/xen/page.h> #include <asm/xen/pci.h> +#include <xen/page.h> #endif #include <asm/sync_bitops.h> #include <asm/xen/hypercall.h> diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c index 417415d738d0..ed673e1acd61 100644 --- a/drivers/xen/events/events_fifo.c +++ b/drivers/xen/events/events_fifo.c @@ -44,13 +44,13 @@ #include <asm/sync_bitops.h> #include <asm/xen/hypercall.h> #include <asm/xen/hypervisor.h> -#include <asm/xen/page.h> #include <xen/xen.h> #include <xen/xen-ops.h> #include <xen/events.h> #include <xen/interface/xen.h> #include <xen/interface/event_channel.h> +#include <xen/page.h> #include "events_internal.h" diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 89274850741b..67b9163db718 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -41,9 +41,9 @@ #include <xen/balloon.h> #include <xen/gntdev.h> #include <xen/events.h> +#include <xen/page.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> -#include <asm/xen/page.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Derek G. Murray <Derek.Murray@cl.cam.ac.uk>, " diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index b1c7170e5c9e..62f591f8763c 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -138,7 +138,6 @@ static struct gnttab_free_callback *gnttab_free_callback_list; static int gnttab_expand(unsigned int req_entries); #define RPP (PAGE_SIZE / sizeof(grant_ref_t)) -#define SPP (PAGE_SIZE / sizeof(grant_status_t)) static inline grant_ref_t *__gnttab_entry(grant_ref_t entry) { diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 9e6a85104a20..d10effee9b9e 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -19,10 +19,10 @@ #include <xen/grant_table.h> #include <xen/events.h> #include <xen/hvc-console.h> +#include <xen/page.h> #include <xen/xen-ops.h> #include <asm/xen/hypercall.h> -#include <asm/xen/page.h> #include <asm/xen/hypervisor.h> enum shutdown_state { diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index d88f36754bf7..239738f944ba 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -17,8 +17,8 @@ #include <xen/xen.h> #include <xen/interface/xen.h> +#include <xen/page.h> #include <asm/xen/hypercall.h> -#include <asm/xen/page.h> #include <asm/xen/hypervisor.h> #include <xen/tmem.h> @@ -389,7 +389,7 @@ static int __init xen_tmem_init(void) } #endif #ifdef CONFIG_CLEANCACHE - BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); + BUILD_BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); if (tmem_enabled && cleancache) { int err; diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 96b2011d25f3..9ad327238ba9 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -37,7 +37,7 @@ #include <linux/vmalloc.h> #include <linux/export.h> #include <asm/xen/hypervisor.h> -#include <asm/xen/page.h> +#include <xen/page.h> #include <xen/interface/xen.h> #include <xen/interface/event_channel.h> #include <xen/balloon.h> @@ -379,16 +379,16 @@ int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, int i, j; for (i = 0; i < nr_pages; i++) { - unsigned long addr = (unsigned long)vaddr + - (PAGE_SIZE * i); err = gnttab_grant_foreign_access(dev->otherend_id, - virt_to_mfn(addr), 0); + virt_to_mfn(vaddr), 0); if (err < 0) { xenbus_dev_fatal(dev, err, "granting access to ring page"); goto fail; } grefs[i] = err; + + vaddr = vaddr + PAGE_SIZE; } return 0; diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 5390a674b5e3..4308fb3cf7c2 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -742,7 +742,7 @@ static int xenbus_resume_cb(struct notifier_block *nb, int err = 0; if (xen_hvm_domain()) { - uint64_t v; + uint64_t v = 0; err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); if (!err && v) diff --git a/fs/block_dev.c b/fs/block_dev.c index b155d32db766..4fe10f93db8a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -43,7 +43,7 @@ static inline struct bdev_inode *BDEV_I(struct inode *inode) return container_of(inode, struct bdev_inode, vfs_inode); } -inline struct block_device *I_BDEV(struct inode *inode) +struct block_device *I_BDEV(struct inode *inode) { return &BDEV_I(inode)->bdev; } diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index df9932b00d08..1ce06c849a86 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -85,6 +85,7 @@ BTRFS_WORK_HELPER(extent_refs_helper); BTRFS_WORK_HELPER(scrub_helper); BTRFS_WORK_HELPER(scrubwrc_helper); BTRFS_WORK_HELPER(scrubnc_helper); +BTRFS_WORK_HELPER(scrubparity_helper); static struct __btrfs_workqueue * __btrfs_alloc_workqueue(const char *name, unsigned int flags, int max_active, diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index ec2ee477f8ba..b0b093b6afec 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -64,6 +64,8 @@ BTRFS_WORK_HELPER_PROTO(extent_refs_helper); BTRFS_WORK_HELPER_PROTO(scrub_helper); BTRFS_WORK_HELPER_PROTO(scrubwrc_helper); BTRFS_WORK_HELPER_PROTO(scrubnc_helper); +BTRFS_WORK_HELPER_PROTO(scrubparity_helper); + struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, unsigned int flags, diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 614aaa1969bd..802fabb30e15 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -250,8 +250,12 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, * the first item to check. But sometimes, we may enter it with * slot==nritems. In that case, go to the next leaf before we continue. */ - if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) - ret = btrfs_next_old_leaf(root, path, time_seq); + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + if (time_seq == (u64)-1) + ret = btrfs_next_leaf(root, path); + else + ret = btrfs_next_old_leaf(root, path, time_seq); + } while (!ret && count < total_refs) { eb = path->nodes[0]; @@ -291,7 +295,10 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, eie = NULL; } next: - ret = btrfs_next_old_item(root, path, time_seq); + if (time_seq == (u64)-1) + ret = btrfs_next_item(root, path); + else + ret = btrfs_next_old_item(root, path, time_seq); } if (ret > 0) @@ -334,6 +341,8 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, if (path->search_commit_root) root_level = btrfs_header_level(root->commit_root); + else if (time_seq == (u64)-1) + root_level = btrfs_header_level(root->node); else root_level = btrfs_old_root_level(root, time_seq); @@ -343,7 +352,12 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, } path->lowest_level = level; - ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq); + if (time_seq == (u64)-1) + ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path, + 0, 0); + else + ret = btrfs_search_old_slot(root, &ref->key_for_search, path, + time_seq); /* root node has been locked, we can release @subvol_srcu safely here */ srcu_read_unlock(&fs_info->subvol_srcu, index); @@ -491,7 +505,9 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info, BUG_ON(!ref->wanted_disk_byte); eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte, 0); - if (!eb || !extent_buffer_uptodate(eb)) { + if (IS_ERR(eb)) { + return PTR_ERR(eb); + } else if (!extent_buffer_uptodate(eb)) { free_extent_buffer(eb); return -EIO; } @@ -507,7 +523,7 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info, } /* - * merge two lists of backrefs and adjust counts accordingly + * merge backrefs and adjust counts accordingly * * mode = 1: merge identical keys, if key is set * FIXME: if we add more keys in __add_prelim_ref, we can merge more here. @@ -535,9 +551,9 @@ static void __merge_refs(struct list_head *head, int mode) ref2 = list_entry(pos2, struct __prelim_ref, list); + if (!ref_for_same_block(ref1, ref2)) + continue; if (mode == 1) { - if (!ref_for_same_block(ref1, ref2)) - continue; if (!ref1->parent && ref2->parent) { xchg = ref1; ref1 = ref2; @@ -572,8 +588,8 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, struct list_head *prefs, u64 *total_refs, u64 inum) { + struct btrfs_delayed_ref_node *node; struct btrfs_delayed_extent_op *extent_op = head->extent_op; - struct rb_node *n = &head->node.rb_node; struct btrfs_key key; struct btrfs_key op_key = {0}; int sgn; @@ -583,12 +599,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, btrfs_disk_key_to_cpu(&op_key, &extent_op->key); spin_lock(&head->lock); - n = rb_first(&head->ref_root); - while (n) { - struct btrfs_delayed_ref_node *node; - node = rb_entry(n, struct btrfs_delayed_ref_node, - rb_node); - n = rb_next(n); + list_for_each_entry(node, &head->ref_list, list) { if (node->seq > seq) continue; @@ -882,6 +893,11 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, * * NOTE: This can return values > 0 * + * If time_seq is set to (u64)-1, it will not search delayed_refs, and behave + * much like trans == NULL case, the difference only lies in it will not + * commit root. + * The special case is for qgroup to search roots in commit_transaction(). + * * FIXME some caching might speed things up */ static int find_parent_nodes(struct btrfs_trans_handle *trans, @@ -920,6 +936,9 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, path->skip_locking = 1; } + if (time_seq == (u64)-1) + path->skip_locking = 1; + /* * grab both a lock on the path and a lock on the delayed ref head. * We need both to get a consistent picture of how the refs look @@ -934,9 +953,10 @@ again: BUG_ON(ret == 0); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS - if (trans && likely(trans->type != __TRANS_DUMMY)) { + if (trans && likely(trans->type != __TRANS_DUMMY) && + time_seq != (u64)-1) { #else - if (trans) { + if (trans && time_seq != (u64)-1) { #endif /* * look if there are updates for this ref queued and lock the @@ -1034,7 +1054,10 @@ again: eb = read_tree_block(fs_info->extent_root, ref->parent, 0); - if (!eb || !extent_buffer_uptodate(eb)) { + if (IS_ERR(eb)) { + ret = PTR_ERR(eb); + goto out; + } else if (!extent_buffer_uptodate(eb)) { free_extent_buffer(eb); ret = -EIO; goto out; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 0f11ebc92f02..54114b4887dd 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1439,8 +1439,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq) btrfs_tree_read_unlock(eb_root); free_extent_buffer(eb_root); old = read_tree_block(root, logical, 0); - if (WARN_ON(!old || !extent_buffer_uptodate(old))) { - free_extent_buffer(old); + if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) { + if (!IS_ERR(old)) + free_extent_buffer(old); btrfs_warn(root->fs_info, "failed to read tree block %llu from get_old_root", logical); } else { @@ -1685,7 +1686,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, if (!cur || !uptodate) { if (!cur) { cur = read_tree_block(root, blocknr, gen); - if (!cur || !extent_buffer_uptodate(cur)) { + if (IS_ERR(cur)) { + return PTR_ERR(cur); + } else if (!extent_buffer_uptodate(cur)) { free_extent_buffer(cur); return -EIO; } @@ -1864,8 +1867,9 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, eb = read_tree_block(root, btrfs_node_blockptr(parent, slot), btrfs_node_ptr_generation(parent, slot)); - if (eb && !extent_buffer_uptodate(eb)) { - free_extent_buffer(eb); + if (IS_ERR(eb) || !extent_buffer_uptodate(eb)) { + if (!IS_ERR(eb)) + free_extent_buffer(eb); eb = NULL; } @@ -2494,7 +2498,7 @@ read_block_for_search(struct btrfs_trans_handle *trans, ret = -EAGAIN; tmp = read_tree_block(root, blocknr, 0); - if (tmp) { + if (!IS_ERR(tmp)) { /* * If the read above didn't mark this buffer up to date, * it will never end up being up to date. Set ret to EIO now diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6f364e1d8d3d..80a9aefb0c46 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -174,7 +174,7 @@ struct btrfs_ordered_sum; /* csum types */ #define BTRFS_CSUM_TYPE_CRC32 0 -static int btrfs_csum_sizes[] = { 4, 0 }; +static int btrfs_csum_sizes[] = { 4 }; /* four bytes for CRC32 */ #define BTRFS_EMPTY_DIR_SIZE 0 @@ -1619,10 +1619,7 @@ struct btrfs_fs_info { struct task_struct *cleaner_kthread; int thread_pool_size; - struct kobject super_kobj; struct kobject *space_info_kobj; - struct kobject *device_dir_kobj; - struct completion kobj_unregister; int do_barriers; int closing; int log_root_recovering; @@ -1698,6 +1695,7 @@ struct btrfs_fs_info { struct btrfs_workqueue *scrub_workers; struct btrfs_workqueue *scrub_wr_completion_workers; struct btrfs_workqueue *scrub_nocow_workers; + struct btrfs_workqueue *scrub_parity_workers; #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY u32 check_integrity_print_mask; @@ -1735,7 +1733,7 @@ struct btrfs_fs_info { /* list of dirty qgroups to be written at next commit */ struct list_head dirty_qgroups; - /* used by btrfs_qgroup_record_ref for an efficient tree traversal */ + /* used by qgroup for an efficient tree traversal */ u64 qgroup_seq; /* qgroup rescan items */ @@ -3458,6 +3456,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes) void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, struct btrfs_root *root); +void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans); int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, struct inode *inode); void btrfs_orphan_release_metadata(struct inode *inode); @@ -3515,6 +3514,9 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, int __get_raid_index(u64 flags); int btrfs_start_write_no_snapshoting(struct btrfs_root *root); void btrfs_end_write_no_snapshoting(struct btrfs_root *root); +void check_system_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const u64 type); /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, int level, int *slot); @@ -4050,6 +4052,7 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) #ifdef CONFIG_BTRFS_ASSERT +__cold static inline void assfail(char *expr, char *file, int line) { pr_err("BTRFS: assertion failed: %s, file: %s, line: %d", @@ -4065,10 +4068,12 @@ static inline void assfail(char *expr, char *file, int line) #define btrfs_assert() __printf(5, 6) +__cold void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, unsigned int line, int errno, const char *fmt, ...); +__cold void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *function, unsigned int line, int errno); @@ -4111,11 +4116,17 @@ static inline int __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag) * Call btrfs_abort_transaction as early as possible when an error condition is * detected, that way the exact line number is reported. */ - #define btrfs_abort_transaction(trans, root, errno) \ do { \ - __btrfs_abort_transaction(trans, root, __func__, \ - __LINE__, errno); \ + /* Report first abort since mount */ \ + if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \ + &((root)->fs_info->fs_state))) { \ + WARN(1, KERN_DEBUG \ + "BTRFS: Transaction aborted (error %d)\n", \ + (errno)); \ + } \ + __btrfs_abort_transaction((trans), (root), __func__, \ + __LINE__, (errno)); \ } while (0) #define btrfs_std_error(fs_info, errno) \ @@ -4132,6 +4143,7 @@ do { \ } while (0) __printf(5, 6) +__cold void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, unsigned int line, int errno, const char *fmt, ...); diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 8f8ed7d20bac..ac3e81da6d4e 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -22,6 +22,7 @@ #include "ctree.h" #include "delayed-ref.h" #include "transaction.h" +#include "qgroup.h" struct kmem_cache *btrfs_delayed_ref_head_cachep; struct kmem_cache *btrfs_delayed_tree_ref_cachep; @@ -84,87 +85,6 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2, return 0; } -/* - * entries in the rb tree are ordered by the byte number of the extent, - * type of the delayed backrefs and content of delayed backrefs. - */ -static int comp_entry(struct btrfs_delayed_ref_node *ref2, - struct btrfs_delayed_ref_node *ref1, - bool compare_seq) -{ - if (ref1->bytenr < ref2->bytenr) - return -1; - if (ref1->bytenr > ref2->bytenr) - return 1; - if (ref1->is_head && ref2->is_head) - return 0; - if (ref2->is_head) - return -1; - if (ref1->is_head) - return 1; - if (ref1->type < ref2->type) - return -1; - if (ref1->type > ref2->type) - return 1; - if (ref1->no_quota > ref2->no_quota) - return 1; - if (ref1->no_quota < ref2->no_quota) - return -1; - /* merging of sequenced refs is not allowed */ - if (compare_seq) { - if (ref1->seq < ref2->seq) - return -1; - if (ref1->seq > ref2->seq) - return 1; - } - if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || - ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) { - return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2), - btrfs_delayed_node_to_tree_ref(ref1), - ref1->type); - } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY || - ref1->type == BTRFS_SHARED_DATA_REF_KEY) { - return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2), - btrfs_delayed_node_to_data_ref(ref1)); - } - BUG(); - return 0; -} - -/* - * insert a new ref into the rbtree. This returns any existing refs - * for the same (bytenr,parent) tuple, or NULL if the new node was properly - * inserted. - */ -static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root, - struct rb_node *node) -{ - struct rb_node **p = &root->rb_node; - struct rb_node *parent_node = NULL; - struct btrfs_delayed_ref_node *entry; - struct btrfs_delayed_ref_node *ins; - int cmp; - - ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - while (*p) { - parent_node = *p; - entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, - rb_node); - - cmp = comp_entry(entry, ins, 1); - if (cmp < 0) - p = &(*p)->rb_left; - else if (cmp > 0) - p = &(*p)->rb_right; - else - return entry; - } - - rb_link_node(node, parent_node, p); - rb_insert_color(node, root); - return NULL; -} - /* insert a new ref to head ref rbtree */ static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root, struct rb_node *node) @@ -268,7 +188,7 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans, rb_erase(&head->href_node, &delayed_refs->href_root); } else { assert_spin_locked(&head->lock); - rb_erase(&ref->rb_node, &head->ref_root); + list_del(&ref->list); } ref->in_tree = 0; btrfs_put_delayed_ref(ref); @@ -277,99 +197,6 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans, trans->delayed_ref_updates--; } -static int merge_ref(struct btrfs_trans_handle *trans, - struct btrfs_delayed_ref_root *delayed_refs, - struct btrfs_delayed_ref_head *head, - struct btrfs_delayed_ref_node *ref, u64 seq) -{ - struct rb_node *node; - int mod = 0; - int done = 0; - - node = rb_next(&ref->rb_node); - while (!done && node) { - struct btrfs_delayed_ref_node *next; - - next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - node = rb_next(node); - if (seq && next->seq >= seq) - break; - if (comp_entry(ref, next, 0)) - continue; - - if (ref->action == next->action) { - mod = next->ref_mod; - } else { - if (ref->ref_mod < next->ref_mod) { - struct btrfs_delayed_ref_node *tmp; - - tmp = ref; - ref = next; - next = tmp; - done = 1; - } - mod = -next->ref_mod; - } - - drop_delayed_ref(trans, delayed_refs, head, next); - ref->ref_mod += mod; - if (ref->ref_mod == 0) { - drop_delayed_ref(trans, delayed_refs, head, ref); - done = 1; - } else { - /* - * You can't have multiples of the same ref on a tree - * block. - */ - WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY || - ref->type == BTRFS_SHARED_BLOCK_REF_KEY); - } - } - return done; -} - -void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_root *delayed_refs, - struct btrfs_delayed_ref_head *head) -{ - struct rb_node *node; - u64 seq = 0; - - assert_spin_locked(&head->lock); - /* - * We don't have too much refs to merge in the case of delayed data - * refs. - */ - if (head->is_data) - return; - - spin_lock(&fs_info->tree_mod_seq_lock); - if (!list_empty(&fs_info->tree_mod_seq_list)) { - struct seq_list *elem; - - elem = list_first_entry(&fs_info->tree_mod_seq_list, - struct seq_list, list); - seq = elem->seq; - } - spin_unlock(&fs_info->tree_mod_seq_lock); - - node = rb_first(&head->ref_root); - while (node) { - struct btrfs_delayed_ref_node *ref; - - ref = rb_entry(node, struct btrfs_delayed_ref_node, - rb_node); - /* We can't merge refs that are outside of our seq count */ - if (seq && ref->seq >= seq) - break; - if (merge_ref(trans, delayed_refs, head, ref, seq)) - node = rb_first(&head->ref_root); - else - node = rb_next(&ref->rb_node); - } -} - int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs, u64 seq) @@ -443,45 +270,71 @@ again: } /* - * helper function to update an extent delayed ref in the - * rbtree. existing and update must both have the same - * bytenr and parent + * Helper to insert the ref_node to the tail or merge with tail. * - * This may free existing if the update cancels out whatever - * operation it was doing. + * Return 0 for insert. + * Return >0 for merge. */ -static noinline void -update_existing_ref(struct btrfs_trans_handle *trans, - struct btrfs_delayed_ref_root *delayed_refs, - struct btrfs_delayed_ref_head *head, - struct btrfs_delayed_ref_node *existing, - struct btrfs_delayed_ref_node *update) +static int +add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_root *root, + struct btrfs_delayed_ref_head *href, + struct btrfs_delayed_ref_node *ref) { - if (update->action != existing->action) { - /* - * this is effectively undoing either an add or a - * drop. We decrement the ref_mod, and if it goes - * down to zero we just delete the entry without - * every changing the extent allocation tree. - */ - existing->ref_mod--; - if (existing->ref_mod == 0) - drop_delayed_ref(trans, delayed_refs, head, existing); - else - WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || - existing->type == BTRFS_SHARED_BLOCK_REF_KEY); + struct btrfs_delayed_ref_node *exist; + int mod; + int ret = 0; + + spin_lock(&href->lock); + /* Check whether we can merge the tail node with ref */ + if (list_empty(&href->ref_list)) + goto add_tail; + exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node, + list); + /* No need to compare bytenr nor is_head */ + if (exist->type != ref->type || exist->no_quota != ref->no_quota || + exist->seq != ref->seq) + goto add_tail; + + if ((exist->type == BTRFS_TREE_BLOCK_REF_KEY || + exist->type == BTRFS_SHARED_BLOCK_REF_KEY) && + comp_tree_refs(btrfs_delayed_node_to_tree_ref(exist), + btrfs_delayed_node_to_tree_ref(ref), + ref->type)) + goto add_tail; + if ((exist->type == BTRFS_EXTENT_DATA_REF_KEY || + exist->type == BTRFS_SHARED_DATA_REF_KEY) && + comp_data_refs(btrfs_delayed_node_to_data_ref(exist), + btrfs_delayed_node_to_data_ref(ref))) + goto add_tail; + + /* Now we are sure we can merge */ + ret = 1; + if (exist->action == ref->action) { + mod = ref->ref_mod; } else { - WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || - existing->type == BTRFS_SHARED_BLOCK_REF_KEY); - /* - * the action on the existing ref matches - * the action on the ref we're trying to add. - * Bump the ref_mod by one so the backref that - * is eventually added/removed has the correct - * reference count - */ - existing->ref_mod += update->ref_mod; + /* Need to change action */ + if (exist->ref_mod < ref->ref_mod) { + exist->action = ref->action; + mod = -exist->ref_mod; + exist->ref_mod = ref->ref_mod; + } else + mod = -ref->ref_mod; } + exist->ref_mod += mod; + + /* remove existing tail if its ref_mod is zero */ + if (exist->ref_mod == 0) + drop_delayed_ref(trans, root, href, exist); + spin_unlock(&href->lock); + return ret; + +add_tail: + list_add_tail(&ref->list, &href->ref_list); + atomic_inc(&root->num_entries); + trans->delayed_ref_updates++; + spin_unlock(&href->lock); + return ret; } /* @@ -568,12 +421,14 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs, static noinline struct btrfs_delayed_ref_head * add_delayed_ref_head(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, - struct btrfs_delayed_ref_node *ref, u64 bytenr, - u64 num_bytes, int action, int is_data) + struct btrfs_delayed_ref_node *ref, + struct btrfs_qgroup_extent_record *qrecord, + u64 bytenr, u64 num_bytes, int action, int is_data) { struct btrfs_delayed_ref_head *existing; struct btrfs_delayed_ref_head *head_ref = NULL; struct btrfs_delayed_ref_root *delayed_refs; + struct btrfs_qgroup_extent_record *qexisting; int count_mod = 1; int must_insert_reserved = 0; @@ -618,10 +473,22 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, head_ref = btrfs_delayed_node_to_head(ref); head_ref->must_insert_reserved = must_insert_reserved; head_ref->is_data = is_data; - head_ref->ref_root = RB_ROOT; + INIT_LIST_HEAD(&head_ref->ref_list); head_ref->processing = 0; head_ref->total_ref_mod = count_mod; + /* Record qgroup extent info if provided */ + if (qrecord) { + qrecord->bytenr = bytenr; + qrecord->num_bytes = num_bytes; + qrecord->old_roots = NULL; + + qexisting = btrfs_qgroup_insert_dirty_extent(delayed_refs, + qrecord); + if (qexisting) + kfree(qrecord); + } + spin_lock_init(&head_ref->lock); mutex_init(&head_ref->mutex); @@ -659,10 +526,10 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, u64 num_bytes, u64 parent, u64 ref_root, int level, int action, int no_quota) { - struct btrfs_delayed_ref_node *existing; struct btrfs_delayed_tree_ref *full_ref; struct btrfs_delayed_ref_root *delayed_refs; u64 seq = 0; + int ret; if (action == BTRFS_ADD_DELAYED_EXTENT) action = BTRFS_ADD_DELAYED_REF; @@ -693,21 +560,14 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, trace_add_delayed_tree_ref(ref, full_ref, action); - spin_lock(&head_ref->lock); - existing = tree_insert(&head_ref->ref_root, &ref->rb_node); - if (existing) { - update_existing_ref(trans, delayed_refs, head_ref, existing, - ref); - /* - * we've updated the existing ref, free the newly - * allocated ref - */ + ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref); + + /* + * XXX: memory should be freed at the same level allocated. + * But bad practice is anywhere... Follow it now. Need cleanup. + */ + if (ret > 0) kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref); - } else { - atomic_inc(&delayed_refs->num_entries); - trans->delayed_ref_updates++; - } - spin_unlock(&head_ref->lock); } /* @@ -721,10 +581,10 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, u64 num_bytes, u64 parent, u64 ref_root, u64 owner, u64 offset, int action, int no_quota) { - struct btrfs_delayed_ref_node *existing; struct btrfs_delayed_data_ref *full_ref; struct btrfs_delayed_ref_root *delayed_refs; u64 seq = 0; + int ret; if (action == BTRFS_ADD_DELAYED_EXTENT) action = BTRFS_ADD_DELAYED_REF; @@ -758,21 +618,10 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, trace_add_delayed_data_ref(ref, full_ref, action); - spin_lock(&head_ref->lock); - existing = tree_insert(&head_ref->ref_root, &ref->rb_node); - if (existing) { - update_existing_ref(trans, delayed_refs, head_ref, existing, - ref); - /* - * we've updated the existing ref, free the newly - * allocated ref - */ + ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref); + + if (ret > 0) kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref); - } else { - atomic_inc(&delayed_refs->num_entries); - trans->delayed_ref_updates++; - } - spin_unlock(&head_ref->lock); } /* @@ -790,6 +639,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, struct btrfs_delayed_tree_ref *ref; struct btrfs_delayed_ref_head *head_ref; struct btrfs_delayed_ref_root *delayed_refs; + struct btrfs_qgroup_extent_record *record = NULL; if (!is_fstree(ref_root) || !fs_info->quota_enabled) no_quota = 0; @@ -800,9 +650,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, return -ENOMEM; head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS); - if (!head_ref) { - kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); - return -ENOMEM; + if (!head_ref) + goto free_ref; + + if (fs_info->quota_enabled && is_fstree(ref_root)) { + record = kmalloc(sizeof(*record), GFP_NOFS); + if (!record) + goto free_head_ref; } head_ref->extent_op = extent_op; @@ -814,7 +668,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, * insert both the head node and the new ref without dropping * the spin lock */ - head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, + head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record, bytenr, num_bytes, action, 0); add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, @@ -823,6 +677,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, spin_unlock(&delayed_refs->lock); return 0; + +free_head_ref: + kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); +free_ref: + kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); + + return -ENOMEM; } /* @@ -839,6 +700,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, struct btrfs_delayed_data_ref *ref; struct btrfs_delayed_ref_head *head_ref; struct btrfs_delayed_ref_root *delayed_refs; + struct btrfs_qgroup_extent_record *record = NULL; if (!is_fstree(ref_root) || !fs_info->quota_enabled) no_quota = 0; @@ -854,6 +716,16 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, return -ENOMEM; } + if (fs_info->quota_enabled && is_fstree(ref_root)) { + record = kmalloc(sizeof(*record), GFP_NOFS); + if (!record) { + kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); + kmem_cache_free(btrfs_delayed_ref_head_cachep, + head_ref); + return -ENOMEM; + } + } + head_ref->extent_op = extent_op; delayed_refs = &trans->transaction->delayed_refs; @@ -863,7 +735,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, * insert both the head node and the new ref without dropping * the spin lock */ - head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, + head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record, bytenr, num_bytes, action, 1); add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, @@ -891,9 +763,9 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, delayed_refs = &trans->transaction->delayed_refs; spin_lock(&delayed_refs->lock); - add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, - num_bytes, BTRFS_UPDATE_DELAYED_HEAD, - extent_op->is_data); + add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr, + num_bytes, BTRFS_UPDATE_DELAYED_HEAD, + extent_op->is_data); spin_unlock(&delayed_refs->lock); return 0; diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 5eb0892396d0..13fb5e6090fe 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -24,9 +24,25 @@ #define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */ #define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */ +/* + * XXX: Qu: I really hate the design that ref_head and tree/data ref shares the + * same ref_node structure. + * Ref_head is in a higher logic level than tree/data ref, and duplicated + * bytenr/num_bytes in ref_node is really a waste or memory, they should be + * referred from ref_head. + * This gets more disgusting after we use list to store tree/data ref in + * ref_head. Must clean this mess up later. + */ struct btrfs_delayed_ref_node { + /* + * ref_head use rb tree, stored in ref_root->href. + * indexed by bytenr + */ struct rb_node rb_node; + /*data/tree ref use list, stored in ref_head->ref_list. */ + struct list_head list; + /* the starting bytenr of the extent */ u64 bytenr; @@ -83,7 +99,7 @@ struct btrfs_delayed_ref_head { struct mutex mutex; spinlock_t lock; - struct rb_root ref_root; + struct list_head ref_list; struct rb_node href_node; @@ -132,6 +148,9 @@ struct btrfs_delayed_ref_root { /* head ref rbtree */ struct rb_root href_root; + /* dirty extent records */ + struct rb_root dirty_extent_root; + /* this spin lock protects the rbtree and the entries inside */ spinlock_t lock; @@ -156,6 +175,14 @@ struct btrfs_delayed_ref_root { int flushing; u64 run_delayed_start; + + /* + * To make qgroup to skip given root. + * This is for snapshot, as btrfs_qgroup_inherit() will manully + * modify counters for snapshot and its source, so we should skip + * the snapshot in new_root/old_roots or it will get calculated twice + */ + u64 qgroup_to_skip; }; extern struct kmem_cache *btrfs_delayed_ref_head_cachep; diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 0573848c7333..862fbc206755 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -376,6 +376,10 @@ int btrfs_dev_replace_start(struct btrfs_root *root, WARN_ON(!tgt_device); dev_replace->tgtdev = tgt_device; + ret = btrfs_kobj_add_device(tgt_device->fs_devices, tgt_device); + if (ret) + btrfs_error(root->fs_info, ret, "kobj add dev failed"); + printk_in_rcu(KERN_INFO "BTRFS: dev_replace from %s (devid %llu) to %s started\n", src_device->missing ? "<missing disk>" : @@ -583,8 +587,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, mutex_unlock(&uuid_mutex); /* replace the sysfs entry */ - btrfs_kobj_rm_device(fs_info, src_device); - btrfs_kobj_add_device(fs_info, tgt_device); + btrfs_kobj_rm_device(fs_info->fs_devices, src_device); btrfs_rm_dev_replace_free_srcdev(fs_info, src_device); /* write back the superblocks */ diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0bccf18dc1dc..3f43bfea3684 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1149,12 +1149,12 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, buf = btrfs_find_create_tree_block(root, bytenr); if (!buf) - return NULL; + return ERR_PTR(-ENOMEM); ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); if (ret) { free_extent_buffer(buf); - return NULL; + return ERR_PTR(ret); } return buf; @@ -1509,20 +1509,19 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, generation = btrfs_root_generation(&root->root_item); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), generation); - if (!root->node) { - ret = -ENOMEM; + if (IS_ERR(root->node)) { + ret = PTR_ERR(root->node); goto find_fail; } else if (!btrfs_buffer_uptodate(root->node, generation, 0)) { ret = -EIO; - goto read_fail; + free_extent_buffer(root->node); + goto find_fail; } root->commit_root = btrfs_root_node(root); out: btrfs_free_path(path); return root; -read_fail: - free_extent_buffer(root->node); find_fail: kfree(root); alloc_fail: @@ -2320,8 +2319,12 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, log_tree_root->node = read_tree_block(tree_root, bytenr, fs_info->generation + 1); - if (!log_tree_root->node || - !extent_buffer_uptodate(log_tree_root->node)) { + if (IS_ERR(log_tree_root->node)) { + printk(KERN_ERR "BTRFS: failed to read log tree\n"); + ret = PTR_ERR(log_tree_root->node); + kfree(log_tree_root); + return ret; + } else if (!extent_buffer_uptodate(log_tree_root->node)) { printk(KERN_ERR "BTRFS: failed to read log tree\n"); free_extent_buffer(log_tree_root->node); kfree(log_tree_root); @@ -2494,7 +2497,6 @@ int open_ctree(struct super_block *sb, seqlock_init(&fs_info->profiles_lock); init_rwsem(&fs_info->delayed_iput_sem); - init_completion(&fs_info->kobj_unregister); INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); INIT_LIST_HEAD(&fs_info->space_info); INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); @@ -2797,8 +2799,8 @@ int open_ctree(struct super_block *sb, chunk_root->node = read_tree_block(chunk_root, btrfs_super_chunk_root(disk_super), generation); - if (!chunk_root->node || - !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { + if (IS_ERR(chunk_root->node) || + !extent_buffer_uptodate(chunk_root->node)) { printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n", sb->s_id); goto fail_tree_roots; @@ -2834,8 +2836,8 @@ retry_root_backup: tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super), generation); - if (!tree_root->node || - !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { + if (IS_ERR(tree_root->node) || + !extent_buffer_uptodate(tree_root->node)) { printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n", sb->s_id); @@ -2874,10 +2876,22 @@ retry_root_backup: btrfs_close_extra_devices(fs_devices, 1); + ret = btrfs_sysfs_add_fsid(fs_devices, NULL); + if (ret) { + pr_err("BTRFS: failed to init sysfs fsid interface: %d\n", ret); + goto fail_block_groups; + } + + ret = btrfs_sysfs_add_device(fs_devices); + if (ret) { + pr_err("BTRFS: failed to init sysfs device interface: %d\n", ret); + goto fail_fsdev_sysfs; + } + ret = btrfs_sysfs_add_one(fs_info); if (ret) { pr_err("BTRFS: failed to init sysfs interface: %d\n", ret); - goto fail_block_groups; + goto fail_fsdev_sysfs; } ret = btrfs_init_space_info(fs_info); @@ -3055,6 +3069,9 @@ fail_cleaner: fail_sysfs: btrfs_sysfs_remove_one(fs_info); +fail_fsdev_sysfs: + btrfs_sysfs_remove_fsid(fs_info->fs_devices); + fail_block_groups: btrfs_put_block_group_cache(fs_info); btrfs_free_block_groups(fs_info); @@ -3725,6 +3742,7 @@ void close_ctree(struct btrfs_root *root) } btrfs_sysfs_remove_one(fs_info); + btrfs_sysfs_remove_fsid(fs_info->fs_devices); btrfs_free_fs_roots(fs_info); @@ -4053,6 +4071,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, while ((node = rb_first(&delayed_refs->href_root)) != NULL) { struct btrfs_delayed_ref_head *head; + struct btrfs_delayed_ref_node *tmp; bool pin_bytes = false; head = rb_entry(node, struct btrfs_delayed_ref_head, @@ -4068,11 +4087,10 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, continue; } spin_lock(&head->lock); - while ((node = rb_first(&head->ref_root)) != NULL) { - ref = rb_entry(node, struct btrfs_delayed_ref_node, - rb_node); + list_for_each_entry_safe_reverse(ref, tmp, &head->ref_list, + list) { ref->in_tree = 0; - rb_erase(&ref->rb_node, &head->ref_root); + list_del(&ref->list); atomic_dec(&delayed_refs->num_entries); btrfs_put_delayed_ref(ref); } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0ec3acd14cbf..38b76cc02f48 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -79,11 +79,10 @@ static int update_block_group(struct btrfs_trans_handle *trans, u64 num_bytes, int alloc); static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 bytenr, u64 num_bytes, u64 parent, + struct btrfs_delayed_ref_node *node, u64 parent, u64 root_objectid, u64 owner_objectid, u64 owner_offset, int refs_to_drop, - struct btrfs_delayed_extent_op *extra_op, - int no_quota); + struct btrfs_delayed_extent_op *extra_op); static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, struct extent_buffer *leaf, struct btrfs_extent_item *ei); @@ -1967,10 +1966,9 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 bytenr, u64 num_bytes, + struct btrfs_delayed_ref_node *node, u64 parent, u64 root_objectid, u64 owner, u64 offset, int refs_to_add, - int no_quota, struct btrfs_delayed_extent_op *extent_op) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -1978,9 +1976,11 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; struct btrfs_extent_item *item; struct btrfs_key key; + u64 bytenr = node->bytenr; + u64 num_bytes = node->num_bytes; u64 refs; int ret; - enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_ADD_EXCL; + int no_quota = node->no_quota; path = btrfs_alloc_path(); if (!path) @@ -1996,26 +1996,8 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, bytenr, num_bytes, parent, root_objectid, owner, offset, refs_to_add, extent_op); - if ((ret < 0 && ret != -EAGAIN) || (!ret && no_quota)) + if ((ret < 0 && ret != -EAGAIN) || !ret) goto out; - /* - * Ok we were able to insert an inline extent and it appears to be a new - * reference, deal with the qgroup accounting. - */ - if (!ret && !no_quota) { - ASSERT(root->fs_info->quota_enabled); - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - item = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_extent_item); - if (btrfs_extent_refs(leaf, item) > (u64)refs_to_add) - type = BTRFS_QGROUP_OPER_ADD_SHARED; - btrfs_release_path(path); - - ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid, - bytenr, num_bytes, type, 0); - goto out; - } /* * Ok we had -EAGAIN which means we didn't have space to insert and @@ -2026,8 +2008,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(leaf, item); - if (refs) - type = BTRFS_QGROUP_OPER_ADD_SHARED; btrfs_set_extent_refs(leaf, item, refs + refs_to_add); if (extent_op) __run_delayed_extent_op(extent_op, leaf, item); @@ -2035,13 +2015,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); - if (!no_quota) { - ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid, - bytenr, num_bytes, type, 0); - if (ret) - goto out; - } - path->reada = 1; path->leave_spinning = 1; /* now insert the actual backref */ @@ -2087,17 +2060,15 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, ref->objectid, ref->offset, &ins, node->ref_mod); } else if (node->action == BTRFS_ADD_DELAYED_REF) { - ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, - node->num_bytes, parent, + ret = __btrfs_inc_extent_ref(trans, root, node, parent, ref_root, ref->objectid, ref->offset, node->ref_mod, - node->no_quota, extent_op); + extent_op); } else if (node->action == BTRFS_DROP_DELAYED_REF) { - ret = __btrfs_free_extent(trans, root, node->bytenr, - node->num_bytes, parent, + ret = __btrfs_free_extent(trans, root, node, parent, ref_root, ref->objectid, ref->offset, node->ref_mod, - extent_op, node->no_quota); + extent_op); } else { BUG(); } @@ -2255,15 +2226,14 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, ref->level, &ins, node->no_quota); } else if (node->action == BTRFS_ADD_DELAYED_REF) { - ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, - node->num_bytes, parent, ref_root, - ref->level, 0, 1, node->no_quota, + ret = __btrfs_inc_extent_ref(trans, root, node, + parent, ref_root, + ref->level, 0, 1, extent_op); } else if (node->action == BTRFS_DROP_DELAYED_REF) { - ret = __btrfs_free_extent(trans, root, node->bytenr, - node->num_bytes, parent, ref_root, - ref->level, 0, 1, extent_op, - node->no_quota); + ret = __btrfs_free_extent(trans, root, node, + parent, ref_root, + ref->level, 0, 1, extent_op); } else { BUG(); } @@ -2323,28 +2293,14 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, return ret; } -static noinline struct btrfs_delayed_ref_node * +static inline struct btrfs_delayed_ref_node * select_delayed_ref(struct btrfs_delayed_ref_head *head) { - struct rb_node *node; - struct btrfs_delayed_ref_node *ref, *last = NULL;; + if (list_empty(&head->ref_list)) + return NULL; - /* - * select delayed ref of type BTRFS_ADD_DELAYED_REF first. - * this prevents ref count from going down to zero when - * there still are pending delayed ref. - */ - node = rb_first(&head->ref_root); - while (node) { - ref = rb_entry(node, struct btrfs_delayed_ref_node, - rb_node); - if (ref->action == BTRFS_ADD_DELAYED_REF) - return ref; - else if (last == NULL) - last = ref; - node = rb_next(node); - } - return last; + return list_entry(head->ref_list.next, struct btrfs_delayed_ref_node, + list); } /* @@ -2396,16 +2352,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, } } - /* - * We need to try and merge add/drops of the same ref since we - * can run into issues with relocate dropping the implicit ref - * and then it being added back again before the drop can - * finish. If we merged anything we need to re-loop so we can - * get a good ref. - */ spin_lock(&locked_ref->lock); - btrfs_merge_delayed_refs(trans, fs_info, delayed_refs, - locked_ref); /* * locked_ref is the head node, so we have to go one @@ -2482,7 +2429,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, spin_unlock(&locked_ref->lock); spin_lock(&delayed_refs->lock); spin_lock(&locked_ref->lock); - if (rb_first(&locked_ref->ref_root) || + if (!list_empty(&locked_ref->ref_list) || locked_ref->extent_op) { spin_unlock(&locked_ref->lock); spin_unlock(&delayed_refs->lock); @@ -2496,7 +2443,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, } else { actual_count++; ref->in_tree = 0; - rb_erase(&ref->rb_node, &locked_ref->ref_root); + list_del(&ref->list); } atomic_dec(&delayed_refs->num_entries); @@ -2864,9 +2811,6 @@ again: goto again; } out: - ret = btrfs_delayed_qgroup_accounting(trans, root->fs_info); - if (ret) - return ret; assert_qgroups_uptodate(trans); return 0; } @@ -2905,7 +2849,6 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_node *ref; struct btrfs_delayed_data_ref *data_ref; struct btrfs_delayed_ref_root *delayed_refs; - struct rb_node *node; int ret = 0; delayed_refs = &trans->transaction->delayed_refs; @@ -2934,11 +2877,7 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, spin_unlock(&delayed_refs->lock); spin_lock(&head->lock); - node = rb_first(&head->ref_root); - while (node) { - ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - node = rb_next(node); - + list_for_each_entry(ref, &head->ref_list, list) { /* If it's a shared ref we know a cross reference exists */ if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) { ret = 1; @@ -3693,7 +3632,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->disk_total += total_bytes * factor; found->bytes_used += bytes_used; found->disk_used += bytes_used * factor; - found->full = 0; + if (total_bytes > 0) + found->full = 0; spin_unlock(&found->lock); *space_info = found; return 0; @@ -3721,7 +3661,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->bytes_reserved = 0; found->bytes_readonly = 0; found->bytes_may_use = 0; - found->full = 0; + if (total_bytes > 0) + found->full = 0; + else + found->full = 1; found->force_alloc = CHUNK_ALLOC_NO_FORCE; found->chunk_alloc = 0; found->flush = 0; @@ -3975,6 +3918,9 @@ commit_trans: !atomic_read(&root->fs_info->open_ioctl_trans)) { need_commit--; + if (need_commit > 0) + btrfs_wait_ordered_roots(fs_info, -1); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); @@ -4088,7 +4034,7 @@ static int should_alloc_chunk(struct btrfs_root *root, return 1; } -static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type) +static u64 get_profile_num_devs(struct btrfs_root *root, u64 type) { u64 num_dev; @@ -4102,24 +4048,43 @@ static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type) else num_dev = 1; /* DUP or single */ - /* metadata for updaing devices and chunk tree */ - return btrfs_calc_trans_metadata_size(root, num_dev + 1); + return num_dev; } -static void check_system_chunk(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 type) +/* + * If @is_allocation is true, reserve space in the system space info necessary + * for allocating a chunk, otherwise if it's false, reserve space necessary for + * removing a chunk. + */ +void check_system_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 type) { struct btrfs_space_info *info; u64 left; u64 thresh; + int ret = 0; + u64 num_devs; + + /* + * Needed because we can end up allocating a system chunk and for an + * atomic and race free space reservation in the chunk block reserve. + */ + ASSERT(mutex_is_locked(&root->fs_info->chunk_mutex)); info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM); spin_lock(&info->lock); left = info->total_bytes - info->bytes_used - info->bytes_pinned - - info->bytes_reserved - info->bytes_readonly; + info->bytes_reserved - info->bytes_readonly - + info->bytes_may_use; spin_unlock(&info->lock); - thresh = get_system_chunk_thresh(root, type); + num_devs = get_profile_num_devs(root, type); + + /* num_devs device items to update and 1 chunk item to add or remove */ + thresh = btrfs_calc_trunc_metadata_size(root, num_devs) + + btrfs_calc_trans_metadata_size(root, 1); + if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) { btrfs_info(root->fs_info, "left=%llu, need=%llu, flags=%llu", left, thresh, type); @@ -4130,7 +4095,21 @@ static void check_system_chunk(struct btrfs_trans_handle *trans, u64 flags; flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0); - btrfs_alloc_chunk(trans, root, flags); + /* + * Ignore failure to create system chunk. We might end up not + * needing it, as we might not need to COW all nodes/leafs from + * the paths we visit in the chunk tree (they were already COWed + * or created in the current transaction for example). + */ + ret = btrfs_alloc_chunk(trans, root, flags); + } + + if (!ret) { + ret = btrfs_block_rsv_add(root->fs_info->chunk_root, + &root->fs_info->chunk_block_rsv, + thresh, BTRFS_RESERVE_NO_FLUSH); + if (!ret) + trans->chunk_bytes_reserved += thresh; } } @@ -5188,6 +5167,24 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, trans->bytes_reserved = 0; } +/* + * To be called after all the new block groups attached to the transaction + * handle have been created (btrfs_create_pending_block_groups()). + */ +void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans) +{ + struct btrfs_fs_info *fs_info = trans->root->fs_info; + + if (!trans->chunk_bytes_reserved) + return; + + WARN_ON_ONCE(!list_empty(&trans->new_bgs)); + + block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL, + trans->chunk_bytes_reserved); + trans->chunk_bytes_reserved = 0; +} + /* Can only return 0 or -ENOSPC */ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, struct inode *inode) @@ -6092,11 +6089,10 @@ static void add_pinned_bytes(struct btrfs_fs_info *fs_info, u64 num_bytes, static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 bytenr, u64 num_bytes, u64 parent, + struct btrfs_delayed_ref_node *node, u64 parent, u64 root_objectid, u64 owner_objectid, u64 owner_offset, int refs_to_drop, - struct btrfs_delayed_extent_op *extent_op, - int no_quota) + struct btrfs_delayed_extent_op *extent_op) { struct btrfs_key key; struct btrfs_path *path; @@ -6110,10 +6106,12 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, int extent_slot = 0; int found_extent = 0; int num_to_del = 1; + int no_quota = node->no_quota; u32 item_size; u64 refs; + u64 bytenr = node->bytenr; + u64 num_bytes = node->num_bytes; int last_ref = 0; - enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_SUB_EXCL; bool skinny_metadata = btrfs_fs_incompat(root->fs_info, SKINNY_METADATA); @@ -6294,7 +6292,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, refs -= refs_to_drop; if (refs > 0) { - type = BTRFS_QGROUP_OPER_SUB_SHARED; if (extent_op) __run_delayed_extent_op(extent_op, leaf, ei); /* @@ -6356,18 +6353,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, } btrfs_release_path(path); - /* Deal with the quota accounting */ - if (!ret && last_ref && !no_quota) { - int mod_seq = 0; - - if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID && - type == BTRFS_QGROUP_OPER_SUB_SHARED) - mod_seq = 1; - - ret = btrfs_qgroup_record_ref(trans, info, root_objectid, - bytenr, num_bytes, type, - mod_seq); - } out: btrfs_free_path(path); return ret; @@ -6393,7 +6378,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, goto out_delayed_unlock; spin_lock(&head->lock); - if (rb_first(&head->ref_root)) + if (!list_empty(&head->ref_list)) goto out; if (head->extent_op) { @@ -7303,13 +7288,6 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_free_path(path); - /* Always set parent to 0 here since its exclusive anyway. */ - ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid, - ins->objectid, ins->offset, - BTRFS_QGROUP_OPER_ADD_EXCL, 0); - if (ret) - return ret; - ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); if (ret) { /* -ENOENT, logic error */ btrfs_err(fs_info, "update block group failed for %llu %llu", @@ -7391,14 +7369,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); btrfs_free_path(path); - if (!no_quota) { - ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid, - ins->objectid, num_bytes, - BTRFS_QGROUP_OPER_ADD_EXCL, 0); - if (ret) - return ret; - } - ret = update_block_group(trans, root, ins->objectid, root->nodesize, 1); if (ret) { /* -ENOENT, logic error */ @@ -7755,12 +7725,18 @@ reada: wc->reada_slot = slot; } +/* + * TODO: Modify related function to add related node/leaf to dirty_extent_root, + * for later qgroup accounting. + * + * Current, this function does nothing. + */ static int account_leaf_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *eb) { int nr = btrfs_header_nritems(eb); - int i, extent_type, ret; + int i, extent_type; struct btrfs_key key; struct btrfs_file_extent_item *fi; u64 bytenr, num_bytes; @@ -7783,13 +7759,6 @@ static int account_leaf_items(struct btrfs_trans_handle *trans, continue; num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); - - ret = btrfs_qgroup_record_ref(trans, root->fs_info, - root->objectid, - bytenr, num_bytes, - BTRFS_QGROUP_OPER_SUB_SUBTREE, 0); - if (ret) - return ret; } return 0; } @@ -7858,6 +7827,8 @@ static int adjust_slots_upwards(struct btrfs_root *root, /* * root_eb is the subtree root and is locked before this function is called. + * TODO: Modify this function to mark all (including complete shared node) + * to dirty_extent_root to allow it get accounted in qgroup. */ static int account_shared_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -7920,7 +7891,11 @@ walk_down: child_gen = btrfs_node_ptr_generation(eb, parent_slot); eb = read_tree_block(root, child_bytenr, child_gen); - if (!eb || !extent_buffer_uptodate(eb)) { + if (IS_ERR(eb)) { + ret = PTR_ERR(eb); + goto out; + } else if (!extent_buffer_uptodate(eb)) { + free_extent_buffer(eb); ret = -EIO; goto out; } @@ -7931,16 +7906,6 @@ walk_down: btrfs_tree_read_lock(eb); btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); path->locks[level] = BTRFS_READ_LOCK_BLOCKING; - - ret = btrfs_qgroup_record_ref(trans, root->fs_info, - root->objectid, - child_bytenr, - root->nodesize, - BTRFS_QGROUP_OPER_SUB_SUBTREE, - 0); - if (ret) - goto out; - } if (level == 0) { @@ -8151,7 +8116,9 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, if (reada && level == 1) reada_walk_down(trans, root, wc, path); next = read_tree_block(root, bytenr, generation); - if (!next || !extent_buffer_uptodate(next)) { + if (IS_ERR(next)) { + return PTR_ERR(next); + } else if (!extent_buffer_uptodate(next)) { free_extent_buffer(next); return -EIO; } @@ -8533,24 +8500,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root, goto out_end_trans; } - /* - * Qgroup update accounting is run from - * delayed ref handling. This usually works - * out because delayed refs are normally the - * only way qgroup updates are added. However, - * we may have added updates during our tree - * walk so run qgroups here to make sure we - * don't lose any updates. - */ - ret = btrfs_delayed_qgroup_accounting(trans, - root->fs_info); - if (ret) - printk_ratelimited(KERN_ERR "BTRFS: Failure %d " - "running qgroup updates " - "during snapshot delete. " - "Quota is out of sync, " - "rescan required.\n", ret); - btrfs_end_transaction_throttle(trans, tree_root); if (!for_reloc && btrfs_need_cleaner_sleep(root)) { pr_debug("BTRFS: drop snapshot early exit\n"); @@ -8604,14 +8553,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root, } root_dropped = true; out_end_trans: - ret = btrfs_delayed_qgroup_accounting(trans, tree_root->fs_info); - if (ret) - printk_ratelimited(KERN_ERR "BTRFS: Failure %d " - "running qgroup updates " - "during snapshot delete. " - "Quota is out of sync, " - "rescan required.\n", ret); - btrfs_end_transaction_throttle(trans, tree_root); out_free: kfree(wc); @@ -9562,6 +9503,19 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, free_excluded_extents(root, cache); + /* + * Call to ensure the corresponding space_info object is created and + * assigned to our block group, but don't update its counters just yet. + * We want our bg to be added to the rbtree with its ->space_info set. + */ + ret = update_space_info(root->fs_info, cache->flags, 0, 0, + &cache->space_info); + if (ret) { + btrfs_remove_free_space_cache(cache); + btrfs_put_block_group(cache); + return ret; + } + ret = btrfs_add_block_group_cache(root->fs_info, cache); if (ret) { btrfs_remove_free_space_cache(cache); @@ -9569,6 +9523,10 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, return ret; } + /* + * Now that our block group has its ->space_info set and is inserted in + * the rbtree, update the space info's counters. + */ ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, &cache->space_info); if (ret) { diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/fs/btrfs/extent-tree.h diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c374e1e71e5f..02d05817cbdf 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1277,7 +1277,12 @@ int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, gfp_t mask) { - return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask); + int wake = 0; + + if (bits & EXTENT_LOCKED) + wake = 1; + + return clear_extent_bit(tree, start, end, bits, wake, 0, NULL, mask); } int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, @@ -4490,6 +4495,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, } if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) flags |= FIEMAP_EXTENT_ENCODED; + if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) + flags |= FIEMAP_EXTENT_UNWRITTEN; free_extent_map(em); em = NULL; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b072e17479aa..795d754327a7 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1868,6 +1868,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) struct btrfs_log_ctx ctx; int ret = 0; bool full_sync = 0; + const u64 len = end - start + 1; trace_btrfs_sync_file(file, datasync); @@ -1896,7 +1897,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) * all extents are persisted and the respective file extent * items are in the fs/subvol btree. */ - ret = btrfs_wait_ordered_range(inode, start, end - start + 1); + ret = btrfs_wait_ordered_range(inode, start, len); } else { /* * Start any new ordered operations before starting to log the @@ -1968,8 +1969,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) */ smp_mb(); if (btrfs_inode_in_log(inode, root->fs_info->generation) || - (full_sync && BTRFS_I(inode)->last_trans <= - root->fs_info->last_trans_committed)) { + (BTRFS_I(inode)->last_trans <= + root->fs_info->last_trans_committed && + (full_sync || + !btrfs_have_ordered_extents_in_range(inode, start, len)))) { /* * We'v had everything committed since the last time we were * modified so clear this flag in case it was set for whatever diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 9dbe5b548fa6..fb5a6b1c62a6 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -231,6 +231,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, { int ret = 0; struct btrfs_path *path = btrfs_alloc_path(); + bool locked = false; if (!path) { ret = -ENOMEM; @@ -238,6 +239,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, } if (block_group) { + locked = true; mutex_lock(&trans->transaction->cache_write_mutex); if (!list_empty(&block_group->io_list)) { list_del_init(&block_group->io_list); @@ -269,18 +271,14 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, */ ret = btrfs_truncate_inode_items(trans, root, inode, 0, BTRFS_EXTENT_DATA_KEY); - if (ret) { - mutex_unlock(&trans->transaction->cache_write_mutex); - btrfs_abort_transaction(trans, root, ret); - return ret; - } + if (ret) + goto fail; ret = btrfs_update_inode(trans, root, inode); - if (block_group) - mutex_unlock(&trans->transaction->cache_write_mutex); - fail: + if (locked) + mutex_unlock(&trans->transaction->cache_write_mutex); if (ret) btrfs_abort_transaction(trans, root, ret); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8bb013672aee..855935f6671a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4986,24 +4986,40 @@ static void evict_inode_truncate_pages(struct inode *inode) } write_unlock(&map_tree->lock); + /* + * Keep looping until we have no more ranges in the io tree. + * We can have ongoing bios started by readpages (called from readahead) + * that didn't get their end io callbacks called yet or they are still + * in progress ((extent_io.c:end_bio_extent_readpage()). This means some + * ranges can still be locked and eviction started because before + * submitting those bios, which are executed by a separate task (work + * queue kthread), inode references (inode->i_count) were not taken + * (which would be dropped in the end io callback of each bio). + * Therefore here we effectively end up waiting for those bios and + * anyone else holding locked ranges without having bumped the inode's + * reference count - if we don't do it, when they access the inode's + * io_tree to unlock a range it may be too late, leading to an + * use-after-free issue. + */ spin_lock(&io_tree->lock); while (!RB_EMPTY_ROOT(&io_tree->state)) { struct extent_state *state; struct extent_state *cached_state = NULL; + u64 start; + u64 end; node = rb_first(&io_tree->state); state = rb_entry(node, struct extent_state, rb_node); - atomic_inc(&state->refs); + start = state->start; + end = state->end; spin_unlock(&io_tree->lock); - lock_extent_bits(io_tree, state->start, state->end, - 0, &cached_state); - clear_extent_bit(io_tree, state->start, state->end, + lock_extent_bits(io_tree, start, end, 0, &cached_state); + clear_extent_bit(io_tree, start, end, EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1, &cached_state, GFP_NOFS); - free_extent_state(state); cond_resched(); spin_lock(&io_tree->lock); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 1c22c6518504..c86b835da7a8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -553,8 +553,8 @@ static noinline int create_subvol(struct inode *dir, key.offset = (u64)-1; new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); if (IS_ERR(new_root)) { - btrfs_abort_transaction(trans, root, PTR_ERR(new_root)); ret = PTR_ERR(new_root); + btrfs_abort_transaction(trans, root, ret); goto fail; } @@ -1318,7 +1318,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, i = range->start >> PAGE_CACHE_SHIFT; } if (!max_to_defrag) - max_to_defrag = last_index + 1; + max_to_defrag = last_index - i + 1; /* * make writeback starts from i, so the defrag range can be @@ -1368,7 +1368,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, ra_index = max(i, ra_index); btrfs_force_ra(inode->i_mapping, ra, file, ra_index, cluster); - ra_index += max_cluster; + ra_index += cluster; } mutex_lock(&inode->i_mutex); @@ -2271,10 +2271,7 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file, { struct btrfs_ioctl_ino_lookup_args *args; struct inode *inode; - int ret; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; + int ret = 0; args = memdup_user(argp, sizeof(*args)); if (IS_ERR(args)) @@ -2282,13 +2279,28 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file, inode = file_inode(file); + /* + * Unprivileged query to obtain the containing subvolume root id. The + * path is reset so it's consistent with btrfs_search_path_in_tree. + */ if (args->treeid == 0) args->treeid = BTRFS_I(inode)->root->root_key.objectid; + if (args->objectid == BTRFS_FIRST_FREE_OBJECTID) { + args->name[0] = 0; + goto out; + } + + if (!capable(CAP_SYS_ADMIN)) { + ret = -EPERM; + goto out; + } + ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info, args->treeid, args->objectid, args->name); +out: if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) ret = -EFAULT; @@ -2413,8 +2425,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, goto out_unlock_inode; } - d_invalidate(dentry); - down_write(&root->fs_info->subvol_sem); err = may_destroy_subvol(dest); @@ -2508,7 +2518,7 @@ out_up_write: out_unlock_inode: mutex_unlock(&inode->i_mutex); if (!err) { - shrink_dcache_sb(root->fs_info->sb); + d_invalidate(dentry); btrfs_invalidate_inodes(dest); d_delete(dentry); ASSERT(dest->send_in_progress == 0); @@ -2879,12 +2889,19 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, return ret; } -static int extent_same_check_offsets(struct inode *inode, u64 off, u64 len) +static int extent_same_check_offsets(struct inode *inode, u64 off, u64 *plen, + u64 olen) { + u64 len = *plen; u64 bs = BTRFS_I(inode)->root->fs_info->sb->s_blocksize; - if (off + len > inode->i_size || off + len < off) + if (off + olen > inode->i_size || off + olen < off) return -EINVAL; + + /* if we extend to eof, continue to block boundary */ + if (off + len == inode->i_size) + *plen = len = ALIGN(inode->i_size, bs) - off; + /* Check that we are block aligned - btrfs_clone() requires this */ if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs)) return -EINVAL; @@ -2892,10 +2909,11 @@ static int extent_same_check_offsets(struct inode *inode, u64 off, u64 len) return 0; } -static int btrfs_extent_same(struct inode *src, u64 loff, u64 len, +static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, struct inode *dst, u64 dst_loff) { int ret; + u64 len = olen; /* * btrfs_clone() can't handle extents in the same file @@ -2910,11 +2928,11 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 len, btrfs_double_lock(src, loff, dst, dst_loff, len); - ret = extent_same_check_offsets(src, loff, len); + ret = extent_same_check_offsets(src, loff, &len, olen); if (ret) goto out_unlock; - ret = extent_same_check_offsets(dst, dst_loff, len); + ret = extent_same_check_offsets(dst, dst_loff, &len, olen); if (ret) goto out_unlock; @@ -2927,7 +2945,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 len, ret = btrfs_cmp_data(src, loff, dst, dst_loff, len); if (ret == 0) - ret = btrfs_clone(src, dst, loff, len, len, dst_loff); + ret = btrfs_clone(src, dst, loff, olen, len, dst_loff); out_unlock: btrfs_double_unlock(src, loff, dst, dst_loff, len); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 760c4a5e096b..89656d799ff6 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -198,9 +198,6 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, entry->file_offset = file_offset; entry->start = start; entry->len = len; - if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) && - !(type == BTRFS_ORDERED_NOCOW)) - entry->csum_bytes_left = disk_len; entry->disk_len = disk_len; entry->bytes_left = len; entry->inode = igrab(inode); @@ -286,10 +283,6 @@ void btrfs_add_ordered_sum(struct inode *inode, tree = &BTRFS_I(inode)->ordered_tree; spin_lock_irq(&tree->lock); list_add_tail(&sum->list, &entry->list); - WARN_ON(entry->csum_bytes_left < sum->len); - entry->csum_bytes_left -= sum->len; - if (entry->csum_bytes_left == 0) - wake_up(&entry->wait); spin_unlock_irq(&tree->lock); } @@ -509,7 +502,21 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)); - list_add_tail(&ordered->trans_list, &trans->ordered); + /* + * If our ordered extent completed it means it updated the + * fs/subvol and csum trees already, so no need to make the + * current transaction's commit wait for it, as we end up + * holding memory unnecessarily and delaying the inode's iput + * until the transaction commit (we schedule an iput for the + * inode when the ordered extent's refcount drops to 0), which + * prevents it from being evictable until the transaction + * commits. + */ + if (test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) + btrfs_put_ordered_extent(ordered); + else + list_add_tail(&ordered->trans_list, &trans->ordered); + spin_lock_irq(&log->log_extents_lock[index]); } spin_unlock_irq(&log->log_extents_lock[index]); @@ -844,6 +851,20 @@ out: return entry; } +bool btrfs_have_ordered_extents_in_range(struct inode *inode, + u64 file_offset, + u64 len) +{ + struct btrfs_ordered_extent *oe; + + oe = btrfs_lookup_ordered_range(inode, file_offset, len); + if (oe) { + btrfs_put_ordered_extent(oe); + return true; + } + return false; +} + /* * lookup and return any extent before 'file_offset'. NULL is returned * if none is found diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index e96cd4ccd805..7176cc0fe43f 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -89,9 +89,6 @@ struct btrfs_ordered_extent { /* number of bytes that still need writing */ u64 bytes_left; - /* number of bytes that still need csumming */ - u64 csum_bytes_left; - /* * the end of the ordered extent which is behind it but * didn't update disk_i_size. Please see the comment of @@ -191,6 +188,9 @@ btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, u64 file_offset, u64 len); +bool btrfs_have_ordered_extents_in_range(struct inode *inode, + u64 file_offset, + u64 len); int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, struct btrfs_ordered_extent *ordered); int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 3d6546581bb9..d5f1f033b7a0 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -34,6 +34,7 @@ #include "extent_io.h" #include "qgroup.h" + /* TODO XXX FIXME * - subvol delete -> delete when ref goes to 0? delete limits also? * - reorganize keys @@ -84,11 +85,42 @@ struct btrfs_qgroup { /* * temp variables for accounting operations + * Refer to qgroup_shared_accouting() for details. */ u64 old_refcnt; u64 new_refcnt; }; +static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq, + int mod) +{ + if (qg->old_refcnt < seq) + qg->old_refcnt = seq; + qg->old_refcnt += mod; +} + +static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq, + int mod) +{ + if (qg->new_refcnt < seq) + qg->new_refcnt = seq; + qg->new_refcnt += mod; +} + +static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq) +{ + if (qg->old_refcnt < seq) + return 0; + return qg->old_refcnt - seq; +} + +static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq) +{ + if (qg->new_refcnt < seq) + return 0; + return qg->new_refcnt - seq; +} + /* * glue structure to represent the relations between qgroups. */ @@ -1115,14 +1147,14 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, struct ulist *tmp; int ret = 0; - tmp = ulist_alloc(GFP_NOFS); - if (!tmp) - return -ENOMEM; - /* Check the level of src and dst first */ if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) return -EINVAL; + tmp = ulist_alloc(GFP_NOFS); + if (!tmp) + return -ENOMEM; + mutex_lock(&fs_info->qgroup_ioctl_lock); quota_root = fs_info->quota_root; if (!quota_root) { @@ -1356,239 +1388,86 @@ out: return ret; } -static int comp_oper_exist(struct btrfs_qgroup_operation *oper1, - struct btrfs_qgroup_operation *oper2) +int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) { - /* - * Ignore seq and type here, we're looking for any operation - * at all related to this extent on that root. - */ - if (oper1->bytenr < oper2->bytenr) - return -1; - if (oper1->bytenr > oper2->bytenr) - return 1; - if (oper1->ref_root < oper2->ref_root) - return -1; - if (oper1->ref_root > oper2->ref_root) - return 1; - return 0; -} + struct btrfs_qgroup_extent_record *record; + struct btrfs_delayed_ref_root *delayed_refs; + struct rb_node *node; + u64 qgroup_to_skip; + int ret = 0; -static int qgroup_oper_exists(struct btrfs_fs_info *fs_info, - struct btrfs_qgroup_operation *oper) -{ - struct rb_node *n; - struct btrfs_qgroup_operation *cur; - int cmp; + delayed_refs = &trans->transaction->delayed_refs; + qgroup_to_skip = delayed_refs->qgroup_to_skip; - spin_lock(&fs_info->qgroup_op_lock); - n = fs_info->qgroup_op_tree.rb_node; - while (n) { - cur = rb_entry(n, struct btrfs_qgroup_operation, n); - cmp = comp_oper_exist(cur, oper); - if (cmp < 0) { - n = n->rb_right; - } else if (cmp) { - n = n->rb_left; - } else { - spin_unlock(&fs_info->qgroup_op_lock); - return -EEXIST; - } + /* + * No need to do lock, since this function will only be called in + * btrfs_commmit_transaction(). + */ + node = rb_first(&delayed_refs->dirty_extent_root); + while (node) { + record = rb_entry(node, struct btrfs_qgroup_extent_record, + node); + ret = btrfs_find_all_roots(NULL, fs_info, record->bytenr, 0, + &record->old_roots); + if (ret < 0) + break; + if (qgroup_to_skip) + ulist_del(record->old_roots, qgroup_to_skip, 0); + node = rb_next(node); } - spin_unlock(&fs_info->qgroup_op_lock); - return 0; -} - -static int comp_oper(struct btrfs_qgroup_operation *oper1, - struct btrfs_qgroup_operation *oper2) -{ - if (oper1->bytenr < oper2->bytenr) - return -1; - if (oper1->bytenr > oper2->bytenr) - return 1; - if (oper1->ref_root < oper2->ref_root) - return -1; - if (oper1->ref_root > oper2->ref_root) - return 1; - if (oper1->seq < oper2->seq) - return -1; - if (oper1->seq > oper2->seq) - return 1; - if (oper1->type < oper2->type) - return -1; - if (oper1->type > oper2->type) - return 1; - return 0; + return ret; } -static int insert_qgroup_oper(struct btrfs_fs_info *fs_info, - struct btrfs_qgroup_operation *oper) +struct btrfs_qgroup_extent_record +*btrfs_qgroup_insert_dirty_extent(struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_qgroup_extent_record *record) { - struct rb_node **p; - struct rb_node *parent = NULL; - struct btrfs_qgroup_operation *cur; - int cmp; + struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node; + struct rb_node *parent_node = NULL; + struct btrfs_qgroup_extent_record *entry; + u64 bytenr = record->bytenr; - spin_lock(&fs_info->qgroup_op_lock); - p = &fs_info->qgroup_op_tree.rb_node; while (*p) { - parent = *p; - cur = rb_entry(parent, struct btrfs_qgroup_operation, n); - cmp = comp_oper(cur, oper); - if (cmp < 0) { - p = &(*p)->rb_right; - } else if (cmp) { + parent_node = *p; + entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record, + node); + if (bytenr < entry->bytenr) p = &(*p)->rb_left; - } else { - spin_unlock(&fs_info->qgroup_op_lock); - return -EEXIST; - } - } - rb_link_node(&oper->n, parent, p); - rb_insert_color(&oper->n, &fs_info->qgroup_op_tree); - spin_unlock(&fs_info->qgroup_op_lock); - return 0; -} - -/* - * Record a quota operation for processing later on. - * @trans: the transaction we are adding the delayed op to. - * @fs_info: the fs_info for this fs. - * @ref_root: the root of the reference we are acting on, - * @bytenr: the bytenr we are acting on. - * @num_bytes: the number of bytes in the reference. - * @type: the type of operation this is. - * @mod_seq: do we need to get a sequence number for looking up roots. - * - * We just add it to our trans qgroup_ref_list and carry on and process these - * operations in order at some later point. If the reference root isn't a fs - * root then we don't bother with doing anything. - * - * MUST BE HOLDING THE REF LOCK. - */ -int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 ref_root, - u64 bytenr, u64 num_bytes, - enum btrfs_qgroup_operation_type type, int mod_seq) -{ - struct btrfs_qgroup_operation *oper; - int ret; - - if (!is_fstree(ref_root) || !fs_info->quota_enabled) - return 0; - - oper = kmalloc(sizeof(*oper), GFP_NOFS); - if (!oper) - return -ENOMEM; - - oper->ref_root = ref_root; - oper->bytenr = bytenr; - oper->num_bytes = num_bytes; - oper->type = type; - oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq); - INIT_LIST_HEAD(&oper->elem.list); - oper->elem.seq = 0; - - trace_btrfs_qgroup_record_ref(oper); - - if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) { - /* - * If any operation for this bytenr/ref_root combo - * exists, then we know it's not exclusively owned and - * shouldn't be queued up. - * - * This also catches the case where we have a cloned - * extent that gets queued up multiple times during - * drop snapshot. - */ - if (qgroup_oper_exists(fs_info, oper)) { - kfree(oper); - return 0; - } - } - - ret = insert_qgroup_oper(fs_info, oper); - if (ret) { - /* Shouldn't happen so have an assert for developers */ - ASSERT(0); - kfree(oper); - return ret; + else if (bytenr > entry->bytenr) + p = &(*p)->rb_right; + else + return entry; } - list_add_tail(&oper->list, &trans->qgroup_ref_list); - if (mod_seq) - btrfs_get_tree_mod_seq(fs_info, &oper->elem); - - return 0; -} - -static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info, - struct btrfs_qgroup_operation *oper) -{ - struct ulist *tmp; - int sign = 0; - int ret = 0; - - tmp = ulist_alloc(GFP_NOFS); - if (!tmp) - return -ENOMEM; - - spin_lock(&fs_info->qgroup_lock); - if (!fs_info->quota_root) - goto out; - - switch (oper->type) { - case BTRFS_QGROUP_OPER_ADD_EXCL: - sign = 1; - break; - case BTRFS_QGROUP_OPER_SUB_EXCL: - sign = -1; - break; - default: - ASSERT(0); - } - ret = __qgroup_excl_accounting(fs_info, tmp, oper->ref_root, - oper->num_bytes, sign); -out: - spin_unlock(&fs_info->qgroup_lock); - ulist_free(tmp); - return ret; + rb_link_node(&record->node, parent_node, p); + rb_insert_color(&record->node, &delayed_refs->dirty_extent_root); + return NULL; } +#define UPDATE_NEW 0 +#define UPDATE_OLD 1 /* - * Walk all of the roots that pointed to our bytenr and adjust their refcnts as - * properly. + * Walk all of the roots that points to the bytenr and adjust their refcnts. */ -static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info, - u64 root_to_skip, struct ulist *tmp, - struct ulist *roots, struct ulist *qgroups, - u64 seq, int *old_roots, int rescan) +static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info, + struct ulist *roots, struct ulist *tmp, + struct ulist *qgroups, u64 seq, int update_old) { struct ulist_node *unode; struct ulist_iterator uiter; struct ulist_node *tmp_unode; struct ulist_iterator tmp_uiter; struct btrfs_qgroup *qg; - int ret; + int ret = 0; + if (!roots) + return 0; ULIST_ITER_INIT(&uiter); while ((unode = ulist_next(roots, &uiter))) { - /* We don't count our current root here */ - if (unode->val == root_to_skip) - continue; qg = find_qgroup_rb(fs_info, unode->val); if (!qg) continue; - /* - * We could have a pending removal of this same ref so we may - * not have actually found our ref root when doing - * btrfs_find_all_roots, so we need to keep track of how many - * old roots we find in case we removed ours and added a - * different one at the same time. I don't think this could - * happen in practice but that sort of thinking leads to pain - * and suffering and to the dark side. - */ - (*old_roots)++; ulist_reinit(tmp); ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg), @@ -1603,29 +1482,10 @@ static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info, struct btrfs_qgroup_list *glist; qg = u64_to_ptr(tmp_unode->aux); - /* - * We use this sequence number to keep from having to - * run the whole list and 0 out the refcnt every time. - * We basically use sequnce as the known 0 count and - * then add 1 everytime we see a qgroup. This is how we - * get how many of the roots actually point up to the - * upper level qgroups in order to determine exclusive - * counts. - * - * For rescan we want to set old_refcnt to seq so our - * exclusive calculations end up correct. - */ - if (rescan) - qg->old_refcnt = seq; - else if (qg->old_refcnt < seq) - qg->old_refcnt = seq + 1; + if (update_old) + btrfs_qgroup_update_old_refcnt(qg, seq, 1); else - qg->old_refcnt++; - - if (qg->new_refcnt < seq) - qg->new_refcnt = seq + 1; - else - qg->new_refcnt++; + btrfs_qgroup_update_new_refcnt(qg, seq, 1); list_for_each_entry(glist, &qg->groups, next_group) { ret = ulist_add(qgroups, glist->group->qgroupid, ptr_to_u64(glist->group), @@ -1644,161 +1504,46 @@ static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info, } /* - * We need to walk forward in our operation tree and account for any roots that - * were deleted after we made this operation. - */ -static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info, - struct btrfs_qgroup_operation *oper, - struct ulist *tmp, - struct ulist *qgroups, u64 seq, - int *old_roots) -{ - struct ulist_node *unode; - struct ulist_iterator uiter; - struct btrfs_qgroup *qg; - struct btrfs_qgroup_operation *tmp_oper; - struct rb_node *n; - int ret; - - ulist_reinit(tmp); - - /* - * We only walk forward in the tree since we're only interested in - * removals that happened _after_ our operation. - */ - spin_lock(&fs_info->qgroup_op_lock); - n = rb_next(&oper->n); - spin_unlock(&fs_info->qgroup_op_lock); - if (!n) - return 0; - tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n); - while (tmp_oper->bytenr == oper->bytenr) { - /* - * If it's not a removal we don't care, additions work out - * properly with our refcnt tracking. - */ - if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED && - tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL) - goto next; - qg = find_qgroup_rb(fs_info, tmp_oper->ref_root); - if (!qg) - goto next; - ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg), - GFP_ATOMIC); - if (ret) { - if (ret < 0) - return ret; - /* - * We only want to increase old_roots if this qgroup is - * not already in the list of qgroups. If it is already - * there then that means it must have been re-added or - * the delete will be discarded because we had an - * existing ref that we haven't looked up yet. In this - * case we don't want to increase old_roots. So if ret - * == 1 then we know that this is the first time we've - * seen this qgroup and we can bump the old_roots. - */ - (*old_roots)++; - ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), - GFP_ATOMIC); - if (ret < 0) - return ret; - } -next: - spin_lock(&fs_info->qgroup_op_lock); - n = rb_next(&tmp_oper->n); - spin_unlock(&fs_info->qgroup_op_lock); - if (!n) - break; - tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n); - } - - /* Ok now process the qgroups we found */ - ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(tmp, &uiter))) { - struct btrfs_qgroup_list *glist; - - qg = u64_to_ptr(unode->aux); - if (qg->old_refcnt < seq) - qg->old_refcnt = seq + 1; - else - qg->old_refcnt++; - if (qg->new_refcnt < seq) - qg->new_refcnt = seq + 1; - else - qg->new_refcnt++; - list_for_each_entry(glist, &qg->groups, next_group) { - ret = ulist_add(qgroups, glist->group->qgroupid, - ptr_to_u64(glist->group), GFP_ATOMIC); - if (ret < 0) - return ret; - ret = ulist_add(tmp, glist->group->qgroupid, - ptr_to_u64(glist->group), GFP_ATOMIC); - if (ret < 0) - return ret; - } - } - return 0; -} - -/* Add refcnt for the newly added reference. */ -static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info, - struct btrfs_qgroup_operation *oper, - struct btrfs_qgroup *qgroup, - struct ulist *tmp, struct ulist *qgroups, - u64 seq) -{ - struct ulist_node *unode; - struct ulist_iterator uiter; - struct btrfs_qgroup *qg; - int ret; - - ulist_reinit(tmp); - ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup), - GFP_ATOMIC); - if (ret < 0) - return ret; - ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup), - GFP_ATOMIC); - if (ret < 0) - return ret; - ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(tmp, &uiter))) { - struct btrfs_qgroup_list *glist; - - qg = u64_to_ptr(unode->aux); - if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) { - if (qg->new_refcnt < seq) - qg->new_refcnt = seq + 1; - else - qg->new_refcnt++; - } else { - if (qg->old_refcnt < seq) - qg->old_refcnt = seq + 1; - else - qg->old_refcnt++; - } - list_for_each_entry(glist, &qg->groups, next_group) { - ret = ulist_add(tmp, glist->group->qgroupid, - ptr_to_u64(glist->group), GFP_ATOMIC); - if (ret < 0) - return ret; - ret = ulist_add(qgroups, glist->group->qgroupid, - ptr_to_u64(glist->group), GFP_ATOMIC); - if (ret < 0) - return ret; - } - } - return 0; -} - -/* - * This adjusts the counters for all referenced qgroups if need be. + * Update qgroup rfer/excl counters. + * Rfer update is easy, codes can explain themselves. + * + * Excl update is tricky, the update is split into 2 part. + * Part 1: Possible exclusive <-> sharing detect: + * | A | !A | + * ------------------------------------- + * B | * | - | + * ------------------------------------- + * !B | + | ** | + * ------------------------------------- + * + * Conditions: + * A: cur_old_roots < nr_old_roots (not exclusive before) + * !A: cur_old_roots == nr_old_roots (possible exclusive before) + * B: cur_new_roots < nr_new_roots (not exclusive now) + * !B: cur_new_roots == nr_new_roots (possible exclsuive now) + * + * Results: + * +: Possible sharing -> exclusive -: Possible exclusive -> sharing + * *: Definitely not changed. **: Possible unchanged. + * + * For !A and !B condition, the exception is cur_old/new_roots == 0 case. + * + * To make the logic clear, we first use condition A and B to split + * combination into 4 results. + * + * Then, for result "+" and "-", check old/new_roots == 0 case, as in them + * only on variant maybe 0. + * + * Lastly, check result **, since there are 2 variants maybe 0, split them + * again(2x2). + * But this time we don't need to consider other things, the codes and logic + * is easy to understand now. */ -static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info, - u64 root_to_skip, u64 num_bytes, - struct ulist *qgroups, u64 seq, - int old_roots, int new_roots, int rescan) +static int qgroup_update_counters(struct btrfs_fs_info *fs_info, + struct ulist *qgroups, + u64 nr_old_roots, + u64 nr_new_roots, + u64 num_bytes, u64 seq) { struct ulist_node *unode; struct ulist_iterator uiter; @@ -1810,423 +1555,191 @@ static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info, bool dirty = false; qg = u64_to_ptr(unode->aux); - /* - * Wasn't referenced before but is now, add to the reference - * counters. - */ - if (qg->old_refcnt <= seq && qg->new_refcnt > seq) { + cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq); + cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq); + + /* Rfer update part */ + if (cur_old_count == 0 && cur_new_count > 0) { qg->rfer += num_bytes; qg->rfer_cmpr += num_bytes; dirty = true; } - - /* - * Was referenced before but isn't now, subtract from the - * reference counters. - */ - if (qg->old_refcnt > seq && qg->new_refcnt <= seq) { + if (cur_old_count > 0 && cur_new_count == 0) { qg->rfer -= num_bytes; qg->rfer_cmpr -= num_bytes; dirty = true; } - if (qg->old_refcnt < seq) - cur_old_count = 0; - else - cur_old_count = qg->old_refcnt - seq; - if (qg->new_refcnt < seq) - cur_new_count = 0; - else - cur_new_count = qg->new_refcnt - seq; - - /* - * If our refcount was the same as the roots previously but our - * new count isn't the same as the number of roots now then we - * went from having a exclusive reference on this range to not. - */ - if (old_roots && cur_old_count == old_roots && - (cur_new_count != new_roots || new_roots == 0)) { - WARN_ON(cur_new_count != new_roots && new_roots == 0); - qg->excl -= num_bytes; - qg->excl_cmpr -= num_bytes; - dirty = true; + /* Excl update part */ + /* Exclusive/none -> shared case */ + if (cur_old_count == nr_old_roots && + cur_new_count < nr_new_roots) { + /* Exclusive -> shared */ + if (cur_old_count != 0) { + qg->excl -= num_bytes; + qg->excl_cmpr -= num_bytes; + dirty = true; + } } - /* - * If we didn't reference all the roots before but now we do we - * have an exclusive reference to this range. - */ - if ((!old_roots || (old_roots && cur_old_count != old_roots)) - && cur_new_count == new_roots) { - qg->excl += num_bytes; - qg->excl_cmpr += num_bytes; - dirty = true; + /* Shared -> exclusive/none case */ + if (cur_old_count < nr_old_roots && + cur_new_count == nr_new_roots) { + /* Shared->exclusive */ + if (cur_new_count != 0) { + qg->excl += num_bytes; + qg->excl_cmpr += num_bytes; + dirty = true; + } } + /* Exclusive/none -> exclusive/none case */ + if (cur_old_count == nr_old_roots && + cur_new_count == nr_new_roots) { + if (cur_old_count == 0) { + /* None -> exclusive/none */ + + if (cur_new_count != 0) { + /* None -> exclusive */ + qg->excl += num_bytes; + qg->excl_cmpr += num_bytes; + dirty = true; + } + /* None -> none, nothing changed */ + } else { + /* Exclusive -> exclusive/none */ + + if (cur_new_count == 0) { + /* Exclusive -> none */ + qg->excl -= num_bytes; + qg->excl_cmpr -= num_bytes; + dirty = true; + } + /* Exclusive -> exclusive, nothing changed */ + } + } if (dirty) qgroup_dirty(fs_info, qg); } return 0; } -/* - * If we removed a data extent and there were other references for that bytenr - * then we need to lookup all referenced roots to make sure we still don't - * reference this bytenr. If we do then we can just discard this operation. - */ -static int check_existing_refs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - struct btrfs_qgroup_operation *oper) -{ - struct ulist *roots = NULL; - struct ulist_node *unode; - struct ulist_iterator uiter; - int ret = 0; - - ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, - oper->elem.seq, &roots); - if (ret < 0) - return ret; - ret = 0; - - ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(roots, &uiter))) { - if (unode->val == oper->ref_root) { - ret = 1; - break; - } - } - ulist_free(roots); - btrfs_put_tree_mod_seq(fs_info, &oper->elem); - - return ret; -} - -/* - * If we share a reference across multiple roots then we may need to adjust - * various qgroups referenced and exclusive counters. The basic premise is this - * - * 1) We have seq to represent a 0 count. Instead of looping through all of the - * qgroups and resetting their refcount to 0 we just constantly bump this - * sequence number to act as the base reference count. This means that if - * anybody is equal to or below this sequence they were never referenced. We - * jack this sequence up by the number of roots we found each time in order to - * make sure we don't have any overlap. - * - * 2) We first search all the roots that reference the area _except_ the root - * we're acting on currently. This makes up the old_refcnt of all the qgroups - * before. - * - * 3) We walk all of the qgroups referenced by the root we are currently acting - * on, and will either adjust old_refcnt in the case of a removal or the - * new_refcnt in the case of an addition. - * - * 4) Finally we walk all the qgroups that are referenced by this range - * including the root we are acting on currently. We will adjust the counters - * based on the number of roots we had and will have after this operation. - * - * Take this example as an illustration - * - * [qgroup 1/0] - * / | \ - * [qg 0/0] [qg 0/1] [qg 0/2] - * \ | / - * [ extent ] - * - * Say we are adding a reference that is covered by qg 0/0. The first step - * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with - * old_roots being 2. Because it is adding new_roots will be 1. We then go - * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's - * new_refcnt, bringing it to 3. We then walk through all of the qgroups, we - * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a - * reference and thus must add the size to the referenced bytes. Everything - * else is the same so nothing else changes. - */ -static int qgroup_shared_accounting(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - struct btrfs_qgroup_operation *oper) +int +btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 bytenr, u64 num_bytes, + struct ulist *old_roots, struct ulist *new_roots) { - struct ulist *roots = NULL; - struct ulist *qgroups, *tmp; - struct btrfs_qgroup *qgroup; - struct seq_list elem = SEQ_LIST_INIT(elem); + struct ulist *qgroups = NULL; + struct ulist *tmp = NULL; u64 seq; - int old_roots = 0; - int new_roots = 0; + u64 nr_new_roots = 0; + u64 nr_old_roots = 0; int ret = 0; - if (oper->elem.seq) { - ret = check_existing_refs(trans, fs_info, oper); - if (ret < 0) - return ret; - if (ret) - return 0; - } + if (new_roots) + nr_new_roots = new_roots->nnodes; + if (old_roots) + nr_old_roots = old_roots->nnodes; - qgroups = ulist_alloc(GFP_NOFS); - if (!qgroups) - return -ENOMEM; + if (!fs_info->quota_enabled) + goto out_free; + BUG_ON(!fs_info->quota_root); + qgroups = ulist_alloc(GFP_NOFS); + if (!qgroups) { + ret = -ENOMEM; + goto out_free; + } tmp = ulist_alloc(GFP_NOFS); if (!tmp) { - ulist_free(qgroups); - return -ENOMEM; + ret = -ENOMEM; + goto out_free; } - btrfs_get_tree_mod_seq(fs_info, &elem); - ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq, - &roots); - btrfs_put_tree_mod_seq(fs_info, &elem); - if (ret < 0) { - ulist_free(qgroups); - ulist_free(tmp); - return ret; + mutex_lock(&fs_info->qgroup_rescan_lock); + if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { + if (fs_info->qgroup_rescan_progress.objectid <= bytenr) { + mutex_unlock(&fs_info->qgroup_rescan_lock); + ret = 0; + goto out_free; + } } + mutex_unlock(&fs_info->qgroup_rescan_lock); + spin_lock(&fs_info->qgroup_lock); - qgroup = find_qgroup_rb(fs_info, oper->ref_root); - if (!qgroup) - goto out; seq = fs_info->qgroup_seq; - /* - * So roots is the list of all the roots currently pointing at the - * bytenr, including the ref we are adding if we are adding, or not if - * we are removing a ref. So we pass in the ref_root to skip that root - * in our calculations. We set old_refnct and new_refcnt cause who the - * hell knows what everything looked like before, and it doesn't matter - * except... - */ - ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups, - seq, &old_roots, 0); + /* Update old refcnts using old_roots */ + ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq, + UPDATE_OLD); if (ret < 0) goto out; - /* - * Now adjust the refcounts of the qgroups that care about this - * reference, either the old_count in the case of removal or new_count - * in the case of an addition. - */ - ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups, - seq); + /* Update new refcnts using new_roots */ + ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq, + UPDATE_NEW); if (ret < 0) goto out; - /* - * ...in the case of removals. If we had a removal before we got around - * to processing this operation then we need to find that guy and count - * his references as if they really existed so we don't end up screwing - * up the exclusive counts. Then whenever we go to process the delete - * everything will be grand and we can account for whatever exclusive - * changes need to be made there. We also have to pass in old_roots so - * we have an accurate count of the roots as it pertains to this - * operations view of the world. - */ - ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq, - &old_roots); - if (ret < 0) - goto out; + qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots, + num_bytes, seq); /* - * We are adding our root, need to adjust up the number of roots, - * otherwise old_roots is the number of roots we want. + * Bump qgroup_seq to avoid seq overlap */ - if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) { - new_roots = old_roots + 1; - } else { - new_roots = old_roots; - old_roots++; - } - fs_info->qgroup_seq += old_roots + 1; - - - /* - * And now the magic happens, bless Arne for having a pretty elegant - * solution for this. - */ - qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes, - qgroups, seq, old_roots, new_roots, 0); + fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1; out: spin_unlock(&fs_info->qgroup_lock); - ulist_free(qgroups); - ulist_free(roots); +out_free: ulist_free(tmp); + ulist_free(qgroups); + ulist_free(old_roots); + ulist_free(new_roots); return ret; } -/* - * Process a reference to a shared subtree. This type of operation is - * queued during snapshot removal when we encounter extents which are - * shared between more than one root. - */ -static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - struct btrfs_qgroup_operation *oper) -{ - struct ulist *roots = NULL; - struct ulist_node *unode; - struct ulist_iterator uiter; - struct btrfs_qgroup_list *glist; - struct ulist *parents; - int ret = 0; - int err; - struct btrfs_qgroup *qg; - u64 root_obj = 0; - struct seq_list elem = SEQ_LIST_INIT(elem); - - parents = ulist_alloc(GFP_NOFS); - if (!parents) - return -ENOMEM; - - btrfs_get_tree_mod_seq(fs_info, &elem); - ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, - elem.seq, &roots); - btrfs_put_tree_mod_seq(fs_info, &elem); - if (ret < 0) - goto out; - - if (roots->nnodes != 1) - goto out; - - ULIST_ITER_INIT(&uiter); - unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */ - /* - * If we find our ref root then that means all refs - * this extent has to the root have not yet been - * deleted. In that case, we do nothing and let the - * last ref for this bytenr drive our update. - * - * This can happen for example if an extent is - * referenced multiple times in a snapshot (clone, - * etc). If we are in the middle of snapshot removal, - * queued updates for such an extent will find the - * root if we have not yet finished removing the - * snapshot. - */ - if (unode->val == oper->ref_root) - goto out; - - root_obj = unode->val; - BUG_ON(!root_obj); - - spin_lock(&fs_info->qgroup_lock); - qg = find_qgroup_rb(fs_info, root_obj); - if (!qg) - goto out_unlock; - - qg->excl += oper->num_bytes; - qg->excl_cmpr += oper->num_bytes; - qgroup_dirty(fs_info, qg); - - /* - * Adjust counts for parent groups. First we find all - * parents, then in the 2nd loop we do the adjustment - * while adding parents of the parents to our ulist. - */ - list_for_each_entry(glist, &qg->groups, next_group) { - err = ulist_add(parents, glist->group->qgroupid, - ptr_to_u64(glist->group), GFP_ATOMIC); - if (err < 0) { - ret = err; - goto out_unlock; - } - } - - ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(parents, &uiter))) { - qg = u64_to_ptr(unode->aux); - qg->excl += oper->num_bytes; - qg->excl_cmpr += oper->num_bytes; - qgroup_dirty(fs_info, qg); - - /* Add any parents of the parents */ - list_for_each_entry(glist, &qg->groups, next_group) { - err = ulist_add(parents, glist->group->qgroupid, - ptr_to_u64(glist->group), GFP_ATOMIC); - if (err < 0) { - ret = err; - goto out_unlock; - } - } - } - -out_unlock: - spin_unlock(&fs_info->qgroup_lock); - -out: - ulist_free(roots); - ulist_free(parents); - return ret; -} - -/* - * btrfs_qgroup_account_ref is called for every ref that is added to or deleted - * from the fs. First, all roots referencing the extent are searched, and - * then the space is accounted accordingly to the different roots. The - * accounting algorithm works in 3 steps documented inline. - */ -static int btrfs_qgroup_account(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - struct btrfs_qgroup_operation *oper) +int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) { + struct btrfs_qgroup_extent_record *record; + struct btrfs_delayed_ref_root *delayed_refs; + struct ulist *new_roots = NULL; + struct rb_node *node; + u64 qgroup_to_skip; int ret = 0; - if (!fs_info->quota_enabled) - return 0; - - BUG_ON(!fs_info->quota_root); + delayed_refs = &trans->transaction->delayed_refs; + qgroup_to_skip = delayed_refs->qgroup_to_skip; + while ((node = rb_first(&delayed_refs->dirty_extent_root))) { + record = rb_entry(node, struct btrfs_qgroup_extent_record, + node); - mutex_lock(&fs_info->qgroup_rescan_lock); - if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { - if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) { - mutex_unlock(&fs_info->qgroup_rescan_lock); - return 0; + if (!ret) { + /* + * Use (u64)-1 as time_seq to do special search, which + * doesn't lock tree or delayed_refs and search current + * root. It's safe inside commit_transaction(). + */ + ret = btrfs_find_all_roots(trans, fs_info, + record->bytenr, (u64)-1, &new_roots); + if (ret < 0) + goto cleanup; + if (qgroup_to_skip) + ulist_del(new_roots, qgroup_to_skip, 0); + ret = btrfs_qgroup_account_extent(trans, fs_info, + record->bytenr, record->num_bytes, + record->old_roots, new_roots); + record->old_roots = NULL; + new_roots = NULL; } - } - mutex_unlock(&fs_info->qgroup_rescan_lock); +cleanup: + ulist_free(record->old_roots); + ulist_free(new_roots); + new_roots = NULL; + rb_erase(node, &delayed_refs->dirty_extent_root); + kfree(record); - ASSERT(is_fstree(oper->ref_root)); - - trace_btrfs_qgroup_account(oper); - - switch (oper->type) { - case BTRFS_QGROUP_OPER_ADD_EXCL: - case BTRFS_QGROUP_OPER_SUB_EXCL: - ret = qgroup_excl_accounting(fs_info, oper); - break; - case BTRFS_QGROUP_OPER_ADD_SHARED: - case BTRFS_QGROUP_OPER_SUB_SHARED: - ret = qgroup_shared_accounting(trans, fs_info, oper); - break; - case BTRFS_QGROUP_OPER_SUB_SUBTREE: - ret = qgroup_subtree_accounting(trans, fs_info, oper); - break; - default: - ASSERT(0); - } - return ret; -} - -/* - * Needs to be called everytime we run delayed refs, even if there is an error - * in order to cleanup outstanding operations. - */ -int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) -{ - struct btrfs_qgroup_operation *oper; - int ret = 0; - - while (!list_empty(&trans->qgroup_ref_list)) { - oper = list_first_entry(&trans->qgroup_ref_list, - struct btrfs_qgroup_operation, list); - list_del_init(&oper->list); - if (!ret || !trans->aborted) - ret = btrfs_qgroup_account(trans, fs_info, oper); - spin_lock(&fs_info->qgroup_op_lock); - rb_erase(&oper->n, &fs_info->qgroup_op_tree); - spin_unlock(&fs_info->qgroup_op_lock); - btrfs_put_tree_mod_seq(fs_info, &oper->elem); - kfree(oper); } return ret; } @@ -2637,15 +2150,13 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) */ static int qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, - struct btrfs_trans_handle *trans, struct ulist *qgroups, - struct ulist *tmp, struct extent_buffer *scratch_leaf) + struct btrfs_trans_handle *trans, + struct extent_buffer *scratch_leaf) { struct btrfs_key found; struct ulist *roots = NULL; struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem); u64 num_bytes; - u64 seq; - int new_roots; int slot; int ret; @@ -2695,33 +2206,15 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, else num_bytes = found.offset; - ulist_reinit(qgroups); ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, &roots); if (ret < 0) goto out; - spin_lock(&fs_info->qgroup_lock); - seq = fs_info->qgroup_seq; - fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ - - new_roots = 0; - ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups, - seq, &new_roots, 1); - if (ret < 0) { - spin_unlock(&fs_info->qgroup_lock); - ulist_free(roots); - goto out; - } - - ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups, - seq, 0, new_roots, 1); - if (ret < 0) { - spin_unlock(&fs_info->qgroup_lock); - ulist_free(roots); + /* For rescan, just pass old_roots as NULL */ + ret = btrfs_qgroup_account_extent(trans, fs_info, + found.objectid, num_bytes, NULL, roots); + if (ret < 0) goto out; - } - spin_unlock(&fs_info->qgroup_lock); - ulist_free(roots); } out: btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); @@ -2735,7 +2228,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) qgroup_rescan_work); struct btrfs_path *path; struct btrfs_trans_handle *trans = NULL; - struct ulist *tmp = NULL, *qgroups = NULL; struct extent_buffer *scratch_leaf = NULL; int err = -ENOMEM; int ret = 0; @@ -2743,12 +2235,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) path = btrfs_alloc_path(); if (!path) goto out; - qgroups = ulist_alloc(GFP_NOFS); - if (!qgroups) - goto out; - tmp = ulist_alloc(GFP_NOFS); - if (!tmp) - goto out; scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS); if (!scratch_leaf) goto out; @@ -2764,7 +2250,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) err = -EINTR; } else { err = qgroup_rescan_leaf(fs_info, path, trans, - qgroups, tmp, scratch_leaf); + scratch_leaf); } if (err > 0) btrfs_commit_transaction(trans, fs_info->fs_root); @@ -2774,8 +2260,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) out: kfree(scratch_leaf); - ulist_free(qgroups); - ulist_free(tmp); btrfs_free_path(path); mutex_lock(&fs_info->qgroup_rescan_lock); diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index c5242aa9a4b2..6387dcfa354c 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -19,43 +19,18 @@ #ifndef __BTRFS_QGROUP__ #define __BTRFS_QGROUP__ +#include "ulist.h" +#include "delayed-ref.h" + /* - * A description of the operations, all of these operations only happen when we - * are adding the 1st reference for that subvolume in the case of adding space - * or on the last reference delete in the case of subtraction. The only - * exception is the last one, which is added for confusion. - * - * BTRFS_QGROUP_OPER_ADD_EXCL: adding bytes where this subvolume is the only - * one pointing at the bytes we are adding. This is called on the first - * allocation. - * - * BTRFS_QGROUP_OPER_ADD_SHARED: adding bytes where this bytenr is going to be - * shared between subvols. This is called on the creation of a ref that already - * has refs from a different subvolume, so basically reflink. - * - * BTRFS_QGROUP_OPER_SUB_EXCL: removing bytes where this subvolume is the only - * one referencing the range. - * - * BTRFS_QGROUP_OPER_SUB_SHARED: removing bytes where this subvolume shares with - * refs with other subvolumes. + * Record a dirty extent, and info qgroup to update quota on it + * TODO: Use kmem cache to alloc it. */ -enum btrfs_qgroup_operation_type { - BTRFS_QGROUP_OPER_ADD_EXCL, - BTRFS_QGROUP_OPER_ADD_SHARED, - BTRFS_QGROUP_OPER_SUB_EXCL, - BTRFS_QGROUP_OPER_SUB_SHARED, - BTRFS_QGROUP_OPER_SUB_SUBTREE, -}; - -struct btrfs_qgroup_operation { - u64 ref_root; +struct btrfs_qgroup_extent_record { + struct rb_node node; u64 bytenr; u64 num_bytes; - u64 seq; - enum btrfs_qgroup_operation_type type; - struct seq_list elem; - struct rb_node n; - struct list_head list; + struct ulist *old_roots; }; int btrfs_quota_enable(struct btrfs_trans_handle *trans, @@ -79,16 +54,18 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); struct btrfs_delayed_extent_op; -int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 ref_root, +int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info); +struct btrfs_qgroup_extent_record +*btrfs_qgroup_insert_dirty_extent(struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_qgroup_extent_record *record); +int +btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, - enum btrfs_qgroup_operation_type type, - int mod_seq); -int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info); -void btrfs_remove_qgroup_operation(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - struct btrfs_qgroup_operation *oper); + struct ulist *old_roots, struct ulist *new_roots); +int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info); int btrfs_run_qgroups(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 74b24b01d574..827951fbf7fc 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1847,8 +1847,10 @@ again: } eb = read_tree_block(dest, old_bytenr, old_ptr_gen); - if (!eb || !extent_buffer_uptodate(eb)) { - ret = (!eb) ? -ENOMEM : -EIO; + if (IS_ERR(eb)) { + ret = PTR_ERR(eb); + } else if (!extent_buffer_uptodate(eb)) { + ret = -EIO; free_extent_buffer(eb); break; } @@ -2002,7 +2004,9 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, bytenr = btrfs_node_blockptr(eb, path->slots[i]); eb = read_tree_block(root, bytenr, ptr_gen); - if (!eb || !extent_buffer_uptodate(eb)) { + if (IS_ERR(eb)) { + return PTR_ERR(eb); + } else if (!extent_buffer_uptodate(eb)) { free_extent_buffer(eb); return -EIO; } @@ -2710,7 +2714,10 @@ static int do_relocation(struct btrfs_trans_handle *trans, blocksize = root->nodesize; generation = btrfs_node_ptr_generation(upper->eb, slot); eb = read_tree_block(root, bytenr, generation); - if (!eb || !extent_buffer_uptodate(eb)) { + if (IS_ERR(eb)) { + err = PTR_ERR(eb); + goto next; + } else if (!extent_buffer_uptodate(eb)) { free_extent_buffer(eb); err = -EIO; goto next; @@ -2873,7 +2880,9 @@ static int get_tree_block_key(struct reloc_control *rc, BUG_ON(block->key_ready); eb = read_tree_block(rc->extent_root, block->bytenr, block->key.offset); - if (!eb || !extent_buffer_uptodate(eb)) { + if (IS_ERR(eb)) { + return PTR_ERR(eb); + } else if (!extent_buffer_uptodate(eb)) { free_extent_buffer(eb); return -EIO; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index ab5811545a98..9f2feabe99f2 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2662,18 +2662,30 @@ static void scrub_free_parity(struct scrub_parity *sparity) kfree(sparity); } +static void scrub_parity_bio_endio_worker(struct btrfs_work *work) +{ + struct scrub_parity *sparity = container_of(work, struct scrub_parity, + work); + struct scrub_ctx *sctx = sparity->sctx; + + scrub_free_parity(sparity); + scrub_pending_bio_dec(sctx); +} + static void scrub_parity_bio_endio(struct bio *bio, int error) { struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private; - struct scrub_ctx *sctx = sparity->sctx; if (error) bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, sparity->nsectors); - scrub_free_parity(sparity); - scrub_pending_bio_dec(sctx); bio_put(bio); + + btrfs_init_work(&sparity->work, btrfs_scrubparity_helper, + scrub_parity_bio_endio_worker, NULL, NULL); + btrfs_queue_work(sparity->sctx->dev_root->fs_info->scrub_parity_workers, + &sparity->work); } static void scrub_parity_check_and_repair(struct scrub_parity *sparity) @@ -3589,6 +3601,13 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, ret = -ENOMEM; goto out; } + fs_info->scrub_parity_workers = + btrfs_alloc_workqueue("btrfs-scrubparity", flags, + max_active, 2); + if (!fs_info->scrub_parity_workers) { + ret = -ENOMEM; + goto out; + } } ++fs_info->scrub_workers_refcnt; out: @@ -3601,6 +3620,7 @@ static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) btrfs_destroy_workqueue(fs_info->scrub_workers); btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); btrfs_destroy_workqueue(fs_info->scrub_nocow_workers); + btrfs_destroy_workqueue(fs_info->scrub_parity_workers); } WARN_ON(fs_info->scrub_workers_refcnt < 0); } diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index a1216f9b4917..aa72bfd28f7d 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -243,6 +243,7 @@ struct waiting_dir_move { * after this directory is moved, we can try to rmdir the ino rmdir_ino. */ u64 rmdir_ino; + bool orphanized; }; struct orphan_dir_info { @@ -1158,6 +1159,9 @@ struct backref_ctx { /* may be truncated in case it's the last extent in a file */ u64 extent_len; + /* data offset in the file extent item */ + u64 data_offset; + /* Just to check for bugs in backref resolving */ int found_itself; }; @@ -1221,7 +1225,7 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) if (ret < 0) return ret; - if (offset + bctx->extent_len > i_size) + if (offset + bctx->data_offset + bctx->extent_len > i_size) return 0; /* @@ -1363,6 +1367,19 @@ static int find_extent_clone(struct send_ctx *sctx, backref_ctx->cur_offset = data_offset; backref_ctx->found_itself = 0; backref_ctx->extent_len = num_bytes; + /* + * For non-compressed extents iterate_extent_inodes() gives us extent + * offsets that already take into account the data offset, but not for + * compressed extents, since the offset is logical and not relative to + * the physical extent locations. We must take this into account to + * avoid sending clone offsets that go beyond the source file's size, + * which would result in the clone ioctl failing with -EINVAL on the + * receiving end. + */ + if (compressed == BTRFS_COMPRESS_NONE) + backref_ctx->data_offset = 0; + else + backref_ctx->data_offset = btrfs_file_extent_offset(eb, fi); /* * The last extent of a file may be too large due to page alignment. @@ -1900,8 +1917,13 @@ static int did_overwrite_ref(struct send_ctx *sctx, goto out; } - /* we know that it is or will be overwritten. check this now */ - if (ow_inode < sctx->send_progress) + /* + * We know that it is or will be overwritten. Check this now. + * The current inode being processed might have been the one that caused + * inode 'ino' to be orphanized, therefore ow_inode can actually be the + * same as sctx->send_progress. + */ + if (ow_inode <= sctx->send_progress) ret = 1; else ret = 0; @@ -2223,6 +2245,8 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, fs_path_reset(dest); while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { + struct waiting_dir_move *wdm; + fs_path_reset(name); if (is_waiting_for_rm(sctx, ino)) { @@ -2233,7 +2257,11 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, break; } - if (is_waiting_for_move(sctx, ino)) { + wdm = get_waiting_dir_move(sctx, ino); + if (wdm && wdm->orphanized) { + ret = gen_unique_name(sctx, ino, gen, name); + stop = 1; + } else if (wdm) { ret = get_first_ref(sctx->parent_root, ino, &parent_inode, &parent_gen, name); } else { @@ -2328,8 +2356,12 @@ static int send_subvol_begin(struct send_ctx *sctx) TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID, le64_to_cpu(sctx->send_root->root_item.ctransid)); if (parent_root) { - TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, - sctx->parent_root->root_item.uuid); + if (!btrfs_is_empty_uuid(parent_root->root_item.received_uuid)) + TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, + parent_root->root_item.received_uuid); + else + TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, + parent_root->root_item.uuid); TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, le64_to_cpu(sctx->parent_root->root_item.ctransid)); } @@ -2923,7 +2955,7 @@ static int is_waiting_for_move(struct send_ctx *sctx, u64 ino) return entry != NULL; } -static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) +static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized) { struct rb_node **p = &sctx->waiting_dir_moves.rb_node; struct rb_node *parent = NULL; @@ -2934,6 +2966,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) return -ENOMEM; dm->ino = ino; dm->rmdir_ino = 0; + dm->orphanized = orphanized; while (*p) { parent = *p; @@ -3030,7 +3063,7 @@ static int add_pending_dir_move(struct send_ctx *sctx, goto out; } - ret = add_waiting_dir_move(sctx, pm->ino); + ret = add_waiting_dir_move(sctx, pm->ino, is_orphan); if (ret) goto out; @@ -3353,8 +3386,40 @@ out: return ret; } +/* + * Check if ino ino1 is an ancestor of inode ino2 in the given root. + * Return 1 if true, 0 if false and < 0 on error. + */ +static int is_ancestor(struct btrfs_root *root, + const u64 ino1, + const u64 ino1_gen, + const u64 ino2, + struct fs_path *fs_path) +{ + u64 ino = ino2; + + while (ino > BTRFS_FIRST_FREE_OBJECTID) { + int ret; + u64 parent; + u64 parent_gen; + + fs_path_reset(fs_path); + ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path); + if (ret < 0) { + if (ret == -ENOENT && ino == ino2) + ret = 0; + return ret; + } + if (parent == ino1) + return parent_gen == ino1_gen ? 1 : 0; + ino = parent; + } + return 0; +} + static int wait_for_parent_move(struct send_ctx *sctx, - struct recorded_ref *parent_ref) + struct recorded_ref *parent_ref, + const bool is_orphan) { int ret = 0; u64 ino = parent_ref->dir; @@ -3374,11 +3439,24 @@ static int wait_for_parent_move(struct send_ctx *sctx, * Our current directory inode may not yet be renamed/moved because some * ancestor (immediate or not) has to be renamed/moved first. So find if * such ancestor exists and make sure our own rename/move happens after - * that ancestor is processed. + * that ancestor is processed to avoid path build infinite loops (done + * at get_cur_path()). */ while (ino > BTRFS_FIRST_FREE_OBJECTID) { if (is_waiting_for_move(sctx, ino)) { - ret = 1; + /* + * If the current inode is an ancestor of ino in the + * parent root, we need to delay the rename of the + * current inode, otherwise don't delayed the rename + * because we can end up with a circular dependency + * of renames, resulting in some directories never + * getting the respective rename operations issued in + * the send stream or getting into infinite path build + * loops. + */ + ret = is_ancestor(sctx->parent_root, + sctx->cur_ino, sctx->cur_inode_gen, + ino, path_before); break; } @@ -3420,7 +3498,7 @@ out: ino, &sctx->new_refs, &sctx->deleted_refs, - false); + is_orphan); if (!ret) ret = 1; } @@ -3589,6 +3667,17 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); } } + if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root && + can_rename) { + ret = wait_for_parent_move(sctx, cur, is_orphan); + if (ret < 0) + goto out; + if (ret == 1) { + can_rename = false; + *pending_move = 1; + } + } + /* * link/move the ref to the new place. If we have an orphan * inode, move it and update valid_path. If not, link or move @@ -3609,18 +3698,11 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); * dirs, we always have one new and one deleted * ref. The deleted ref is ignored later. */ - ret = wait_for_parent_move(sctx, cur); - if (ret < 0) - goto out; - if (ret) { - *pending_move = 1; - } else { - ret = send_rename(sctx, valid_path, - cur->full_path); - if (!ret) - ret = fs_path_copy(valid_path, - cur->full_path); - } + ret = send_rename(sctx, valid_path, + cur->full_path); + if (!ret) + ret = fs_path_copy(valid_path, + cur->full_path); if (ret < 0) goto out; } else { @@ -4508,8 +4590,21 @@ verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, " if (ret < 0) goto out; - TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, - clone_root->root->root_item.uuid); + /* + * If the parent we're using has a received_uuid set then use that as + * our clone source as that is what we will look for when doing a + * receive. + * + * This covers the case that we create a snapshot off of a received + * subvolume and then use that as the parent and try to receive on a + * different host. + */ + if (!btrfs_is_empty_uuid(clone_root->root->root_item.received_uuid)) + TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, + clone_root->root->root_item.received_uuid); + else + TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, + clone_root->root->root_item.uuid); TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, le64_to_cpu(clone_root->root->root_item.ctransid)); TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9e66f5e724db..cd7ef34d2dce 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -135,6 +135,7 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info) * __btrfs_std_error decodes expected errors from the caller and * invokes the approciate error response. */ +__cold void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, unsigned int line, int errno, const char *fmt, ...) { @@ -247,18 +248,11 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, * We'll complete the cleanup in btrfs_end_transaction and * btrfs_commit_transaction. */ +__cold void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *function, unsigned int line, int errno) { - /* - * Report first abort since mount - */ - if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, - &root->fs_info->fs_state)) { - WARN(1, KERN_DEBUG "BTRFS: Transaction aborted (error %d)\n", - errno); - } trans->aborted = errno; /* Nothing used. The other threads that have joined this * transaction may be able to continue. */ @@ -281,6 +275,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, * __btrfs_panic decodes unexpected, fatal errors from the caller, * issues an alert, and either panics or BUGs, depending on mount options. */ +__cold void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, unsigned int line, int errno, const char *fmt, ...) { @@ -841,33 +836,153 @@ out: return error; } -static struct dentry *get_default_root(struct super_block *sb, - u64 subvol_objectid) +static char *get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info, + u64 subvol_objectid) { - struct btrfs_fs_info *fs_info = btrfs_sb(sb); struct btrfs_root *root = fs_info->tree_root; - struct btrfs_root *new_root; - struct btrfs_dir_item *di; - struct btrfs_path *path; - struct btrfs_key location; - struct inode *inode; - u64 dir_id; - int new = 0; + struct btrfs_root *fs_root; + struct btrfs_root_ref *root_ref; + struct btrfs_inode_ref *inode_ref; + struct btrfs_key key; + struct btrfs_path *path = NULL; + char *name = NULL, *ptr; + u64 dirid; + int len; + int ret; + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto err; + } + path->leave_spinning = 1; + + name = kmalloc(PATH_MAX, GFP_NOFS); + if (!name) { + ret = -ENOMEM; + goto err; + } + ptr = name + PATH_MAX - 1; + ptr[0] = '\0'; /* - * We have a specific subvol we want to mount, just setup location and - * go look up the root. + * Walk up the subvolume trees in the tree of tree roots by root + * backrefs until we hit the top-level subvolume. */ - if (subvol_objectid) { - location.objectid = subvol_objectid; - location.type = BTRFS_ROOT_ITEM_KEY; - location.offset = (u64)-1; - goto find_root; + while (subvol_objectid != BTRFS_FS_TREE_OBJECTID) { + key.objectid = subvol_objectid; + key.type = BTRFS_ROOT_BACKREF_KEY; + key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) { + goto err; + } else if (ret > 0) { + ret = btrfs_previous_item(root, path, subvol_objectid, + BTRFS_ROOT_BACKREF_KEY); + if (ret < 0) { + goto err; + } else if (ret > 0) { + ret = -ENOENT; + goto err; + } + } + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + subvol_objectid = key.offset; + + root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_root_ref); + len = btrfs_root_ref_name_len(path->nodes[0], root_ref); + ptr -= len + 1; + if (ptr < name) { + ret = -ENAMETOOLONG; + goto err; + } + read_extent_buffer(path->nodes[0], ptr + 1, + (unsigned long)(root_ref + 1), len); + ptr[0] = '/'; + dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref); + btrfs_release_path(path); + + key.objectid = subvol_objectid; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + fs_root = btrfs_read_fs_root_no_name(fs_info, &key); + if (IS_ERR(fs_root)) { + ret = PTR_ERR(fs_root); + goto err; + } + + /* + * Walk up the filesystem tree by inode refs until we hit the + * root directory. + */ + while (dirid != BTRFS_FIRST_FREE_OBJECTID) { + key.objectid = dirid; + key.type = BTRFS_INODE_REF_KEY; + key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0); + if (ret < 0) { + goto err; + } else if (ret > 0) { + ret = btrfs_previous_item(fs_root, path, dirid, + BTRFS_INODE_REF_KEY); + if (ret < 0) { + goto err; + } else if (ret > 0) { + ret = -ENOENT; + goto err; + } + } + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + dirid = key.offset; + + inode_ref = btrfs_item_ptr(path->nodes[0], + path->slots[0], + struct btrfs_inode_ref); + len = btrfs_inode_ref_name_len(path->nodes[0], + inode_ref); + ptr -= len + 1; + if (ptr < name) { + ret = -ENAMETOOLONG; + goto err; + } + read_extent_buffer(path->nodes[0], ptr + 1, + (unsigned long)(inode_ref + 1), len); + ptr[0] = '/'; + btrfs_release_path(path); + } } + btrfs_free_path(path); + if (ptr == name + PATH_MAX - 1) { + name[0] = '/'; + name[1] = '\0'; + } else { + memmove(name, ptr, name + PATH_MAX - ptr); + } + return name; + +err: + btrfs_free_path(path); + kfree(name); + return ERR_PTR(ret); +} + +static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objectid) +{ + struct btrfs_root *root = fs_info->tree_root; + struct btrfs_dir_item *di; + struct btrfs_path *path; + struct btrfs_key location; + u64 dir_id; + path = btrfs_alloc_path(); if (!path) - return ERR_PTR(-ENOMEM); + return -ENOMEM; path->leave_spinning = 1; /* @@ -879,58 +994,23 @@ static struct dentry *get_default_root(struct super_block *sb, di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); if (IS_ERR(di)) { btrfs_free_path(path); - return ERR_CAST(di); + return PTR_ERR(di); } if (!di) { /* * Ok the default dir item isn't there. This is weird since * it's always been there, but don't freak out, just try and - * mount to root most subvolume. + * mount the top-level subvolume. */ btrfs_free_path(path); - dir_id = BTRFS_FIRST_FREE_OBJECTID; - new_root = fs_info->fs_root; - goto setup_root; + *objectid = BTRFS_FS_TREE_OBJECTID; + return 0; } btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); btrfs_free_path(path); - -find_root: - new_root = btrfs_read_fs_root_no_name(fs_info, &location); - if (IS_ERR(new_root)) - return ERR_CAST(new_root); - - if (!(sb->s_flags & MS_RDONLY)) { - int ret; - down_read(&fs_info->cleanup_work_sem); - ret = btrfs_orphan_cleanup(new_root); - up_read(&fs_info->cleanup_work_sem); - if (ret) - return ERR_PTR(ret); - } - - dir_id = btrfs_root_dirid(&new_root->root_item); -setup_root: - location.objectid = dir_id; - location.type = BTRFS_INODE_ITEM_KEY; - location.offset = 0; - - inode = btrfs_iget(sb, &location, new_root, &new); - if (IS_ERR(inode)) - return ERR_CAST(inode); - - /* - * If we're just mounting the root most subvol put the inode and return - * a reference to the dentry. We will have already gotten a reference - * to the inode in btrfs_fill_super so we're good to go. - */ - if (!new && d_inode(sb->s_root) == inode) { - iput(inode); - return dget(sb->s_root); - } - - return d_obtain_root(inode); + *objectid = location.objectid; + return 0; } static int btrfs_fill_super(struct super_block *sb, @@ -1108,6 +1188,10 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ",fatal_errors=panic"); if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL) seq_printf(seq, ",commit=%d", info->commit_interval); + seq_printf(seq, ",subvolid=%llu", + BTRFS_I(d_inode(dentry))->root->root_key.objectid); + seq_puts(seq, ",subvol="); + seq_dentry(seq, dentry, " \t\n\\"); return 0; } @@ -1138,107 +1222,139 @@ static inline int is_subvolume_inode(struct inode *inode) } /* - * This will strip out the subvol=%s argument for an argument string and add - * subvolid=0 to make sure we get the actual tree root for path walking to the - * subvol we want. + * This will add subvolid=0 to the argument string while removing any subvol= + * and subvolid= arguments to make sure we get the top-level root for path + * walking to the subvol we want. */ static char *setup_root_args(char *args) { - unsigned len = strlen(args) + 2 + 1; - char *src, *dst, *buf; + char *buf, *dst, *sep; - /* - * We need the same args as before, but with this substitution: - * s!subvol=[^,]+!subvolid=0! - * - * Since the replacement string is up to 2 bytes longer than the - * original, allocate strlen(args) + 2 + 1 bytes. - */ + if (!args) + return kstrdup("subvolid=0", GFP_NOFS); - src = strstr(args, "subvol="); - /* This shouldn't happen, but just in case.. */ - if (!src) - return NULL; - - buf = dst = kmalloc(len, GFP_NOFS); + /* The worst case is that we add ",subvolid=0" to the end. */ + buf = dst = kmalloc(strlen(args) + strlen(",subvolid=0") + 1, GFP_NOFS); if (!buf) return NULL; - /* - * If the subvol= arg is not at the start of the string, - * copy whatever precedes it into buf. - */ - if (src != args) { - *src++ = '\0'; - strcpy(buf, args); - dst += strlen(args); + while (1) { + sep = strchrnul(args, ','); + if (!strstarts(args, "subvol=") && + !strstarts(args, "subvolid=")) { + memcpy(dst, args, sep - args); + dst += sep - args; + *dst++ = ','; + } + if (*sep) + args = sep + 1; + else + break; } - strcpy(dst, "subvolid=0"); - dst += strlen("subvolid=0"); - - /* - * If there is a "," after the original subvol=... string, - * copy that suffix into our buffer. Otherwise, we're done. - */ - src = strchr(src, ','); - if (src) - strcpy(dst, src); return buf; } -static struct dentry *mount_subvol(const char *subvol_name, int flags, - const char *device_name, char *data) +static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, + int flags, const char *device_name, + char *data) { struct dentry *root; - struct vfsmount *mnt; + struct vfsmount *mnt = NULL; char *newargs; + int ret; newargs = setup_root_args(data); - if (!newargs) - return ERR_PTR(-ENOMEM); - mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, - newargs); + if (!newargs) { + root = ERR_PTR(-ENOMEM); + goto out; + } - if (PTR_RET(mnt) == -EBUSY) { + mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs); + if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) { if (flags & MS_RDONLY) { - mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY, device_name, - newargs); + mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY, + device_name, newargs); } else { - int r; - mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name, - newargs); + mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, + device_name, newargs); if (IS_ERR(mnt)) { - kfree(newargs); - return ERR_CAST(mnt); + root = ERR_CAST(mnt); + mnt = NULL; + goto out; } - r = btrfs_remount(mnt->mnt_sb, &flags, NULL); - if (r < 0) { - /* FIXME: release vfsmount mnt ??*/ - kfree(newargs); - return ERR_PTR(r); + down_write(&mnt->mnt_sb->s_umount); + ret = btrfs_remount(mnt->mnt_sb, &flags, NULL); + up_write(&mnt->mnt_sb->s_umount); + if (ret < 0) { + root = ERR_PTR(ret); + goto out; } } } + if (IS_ERR(mnt)) { + root = ERR_CAST(mnt); + mnt = NULL; + goto out; + } - kfree(newargs); + if (!subvol_name) { + if (!subvol_objectid) { + ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb), + &subvol_objectid); + if (ret) { + root = ERR_PTR(ret); + goto out; + } + } + subvol_name = get_subvol_name_from_objectid(btrfs_sb(mnt->mnt_sb), + subvol_objectid); + if (IS_ERR(subvol_name)) { + root = ERR_CAST(subvol_name); + subvol_name = NULL; + goto out; + } - if (IS_ERR(mnt)) - return ERR_CAST(mnt); + } root = mount_subtree(mnt, subvol_name); + /* mount_subtree() drops our reference on the vfsmount. */ + mnt = NULL; - if (!IS_ERR(root) && !is_subvolume_inode(d_inode(root))) { + if (!IS_ERR(root)) { struct super_block *s = root->d_sb; - dput(root); - root = ERR_PTR(-EINVAL); - deactivate_locked_super(s); - printk(KERN_ERR "BTRFS: '%s' is not a valid subvolume\n", - subvol_name); + struct inode *root_inode = d_inode(root); + u64 root_objectid = BTRFS_I(root_inode)->root->root_key.objectid; + + ret = 0; + if (!is_subvolume_inode(root_inode)) { + pr_err("BTRFS: '%s' is not a valid subvolume\n", + subvol_name); + ret = -EINVAL; + } + if (subvol_objectid && root_objectid != subvol_objectid) { + /* + * This will also catch a race condition where a + * subvolume which was passed by ID is renamed and + * another subvolume is renamed over the old location. + */ + pr_err("BTRFS: subvol '%s' does not match subvolid %llu\n", + subvol_name, subvol_objectid); + ret = -EINVAL; + } + if (ret) { + dput(root); + root = ERR_PTR(ret); + deactivate_locked_super(s); + } } +out: + mntput(mnt); + kfree(newargs); + kfree(subvol_name); return root; } @@ -1303,7 +1419,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, { struct block_device *bdev = NULL; struct super_block *s; - struct dentry *root; struct btrfs_fs_devices *fs_devices = NULL; struct btrfs_fs_info *fs_info = NULL; struct security_mnt_opts new_sec_opts; @@ -1323,10 +1438,10 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, return ERR_PTR(error); } - if (subvol_name) { - root = mount_subvol(subvol_name, flags, device_name, data); - kfree(subvol_name); - return root; + if (subvol_name || subvol_objectid != BTRFS_FS_TREE_OBJECTID) { + /* mount_subvol() will free subvol_name. */ + return mount_subvol(subvol_name, subvol_objectid, flags, + device_name, data); } security_init_mnt_opts(&new_sec_opts); @@ -1392,23 +1507,19 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, error = btrfs_fill_super(s, fs_devices, data, flags & MS_SILENT ? 1 : 0); } - - root = !error ? get_default_root(s, subvol_objectid) : ERR_PTR(error); - if (IS_ERR(root)) { + if (error) { deactivate_locked_super(s); - error = PTR_ERR(root); goto error_sec_opts; } fs_info = btrfs_sb(s); error = setup_security_options(fs_info, s, &new_sec_opts); if (error) { - dput(root); deactivate_locked_super(s); goto error_sec_opts; } - return root; + return dget(s->s_root); error_close_devices: btrfs_close_devices(fs_devices); diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index e8a4c86d274d..603b0cc2b9bb 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -33,6 +33,7 @@ #include "volumes.h" static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj); +static inline struct btrfs_fs_devices *to_fs_devs(struct kobject *kobj); static u64 get_features(struct btrfs_fs_info *fs_info, enum btrfs_feature_set set) @@ -428,7 +429,7 @@ static ssize_t btrfs_clone_alignment_show(struct kobject *kobj, BTRFS_ATTR(clone_alignment, btrfs_clone_alignment_show); -static struct attribute *btrfs_attrs[] = { +static const struct attribute *btrfs_attrs[] = { BTRFS_ATTR_PTR(label), BTRFS_ATTR_PTR(nodesize), BTRFS_ATTR_PTR(sectorsize), @@ -438,21 +439,29 @@ static struct attribute *btrfs_attrs[] = { static void btrfs_release_super_kobj(struct kobject *kobj) { - struct btrfs_fs_info *fs_info = to_fs_info(kobj); - complete(&fs_info->kobj_unregister); + struct btrfs_fs_devices *fs_devs = to_fs_devs(kobj); + + memset(&fs_devs->super_kobj, 0, sizeof(struct kobject)); + complete(&fs_devs->kobj_unregister); } static struct kobj_type btrfs_ktype = { .sysfs_ops = &kobj_sysfs_ops, .release = btrfs_release_super_kobj, - .default_attrs = btrfs_attrs, }; +static inline struct btrfs_fs_devices *to_fs_devs(struct kobject *kobj) +{ + if (kobj->ktype != &btrfs_ktype) + return NULL; + return container_of(kobj, struct btrfs_fs_devices, super_kobj); +} + static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj) { if (kobj->ktype != &btrfs_ktype) return NULL; - return container_of(kobj, struct btrfs_fs_info, super_kobj); + return to_fs_devs(kobj)->fs_info; } #define NUM_FEATURE_BITS 64 @@ -493,12 +502,12 @@ static int addrm_unknown_feature_attrs(struct btrfs_fs_info *fs_info, bool add) attrs[0] = &fa->kobj_attr.attr; if (add) { int ret; - ret = sysfs_merge_group(&fs_info->super_kobj, + ret = sysfs_merge_group(&fs_info->fs_devices->super_kobj, &agroup); if (ret) return ret; } else - sysfs_unmerge_group(&fs_info->super_kobj, + sysfs_unmerge_group(&fs_info->fs_devices->super_kobj, &agroup); } @@ -506,25 +515,49 @@ static int addrm_unknown_feature_attrs(struct btrfs_fs_info *fs_info, bool add) return 0; } -static void __btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info) +static void __btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs) +{ + if (fs_devs->device_dir_kobj) { + kobject_del(fs_devs->device_dir_kobj); + kobject_put(fs_devs->device_dir_kobj); + fs_devs->device_dir_kobj = NULL; + } + + if (fs_devs->super_kobj.state_initialized) { + kobject_del(&fs_devs->super_kobj); + kobject_put(&fs_devs->super_kobj); + wait_for_completion(&fs_devs->kobj_unregister); + } +} + +/* when fs_devs is NULL it will remove all fsid kobject */ +void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs) { - kobject_del(&fs_info->super_kobj); - kobject_put(&fs_info->super_kobj); - wait_for_completion(&fs_info->kobj_unregister); + struct list_head *fs_uuids = btrfs_get_fs_uuids(); + + if (fs_devs) { + __btrfs_sysfs_remove_fsid(fs_devs); + return; + } + + list_for_each_entry(fs_devs, fs_uuids, list) { + __btrfs_sysfs_remove_fsid(fs_devs); + } } void btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info) { + btrfs_reset_fs_info_ptr(fs_info); + if (fs_info->space_info_kobj) { sysfs_remove_files(fs_info->space_info_kobj, allocation_attrs); kobject_del(fs_info->space_info_kobj); kobject_put(fs_info->space_info_kobj); } - kobject_del(fs_info->device_dir_kobj); - kobject_put(fs_info->device_dir_kobj); addrm_unknown_feature_attrs(fs_info, false); - sysfs_remove_group(&fs_info->super_kobj, &btrfs_feature_attr_group); - __btrfs_sysfs_remove_one(fs_info); + sysfs_remove_group(&fs_info->fs_devices->super_kobj, &btrfs_feature_attr_group); + sysfs_remove_files(&fs_info->fs_devices->super_kobj, btrfs_attrs); + btrfs_kobj_rm_device(fs_info->fs_devices, NULL); } const char * const btrfs_feature_set_names[3] = { @@ -602,40 +635,60 @@ static void init_feature_attrs(void) } } -int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info, +/* when one_device is NULL, it removes all device links */ + +int btrfs_kobj_rm_device(struct btrfs_fs_devices *fs_devices, struct btrfs_device *one_device) { struct hd_struct *disk; struct kobject *disk_kobj; - if (!fs_info->device_dir_kobj) + if (!fs_devices->device_dir_kobj) return -EINVAL; if (one_device && one_device->bdev) { disk = one_device->bdev->bd_part; disk_kobj = &part_to_dev(disk)->kobj; - sysfs_remove_link(fs_info->device_dir_kobj, + sysfs_remove_link(fs_devices->device_dir_kobj, + disk_kobj->name); + } + + if (one_device) + return 0; + + list_for_each_entry(one_device, + &fs_devices->devices, dev_list) { + if (!one_device->bdev) + continue; + disk = one_device->bdev->bd_part; + disk_kobj = &part_to_dev(disk)->kobj; + + sysfs_remove_link(fs_devices->device_dir_kobj, disk_kobj->name); } return 0; } -int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info, - struct btrfs_device *one_device) +int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs) { - int error = 0; - struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; - struct btrfs_device *dev; - - if (!fs_info->device_dir_kobj) - fs_info->device_dir_kobj = kobject_create_and_add("devices", - &fs_info->super_kobj); + if (!fs_devs->device_dir_kobj) + fs_devs->device_dir_kobj = kobject_create_and_add("devices", + &fs_devs->super_kobj); - if (!fs_info->device_dir_kobj) + if (!fs_devs->device_dir_kobj) return -ENOMEM; + return 0; +} + +int btrfs_kobj_add_device(struct btrfs_fs_devices *fs_devices, + struct btrfs_device *one_device) +{ + int error = 0; + struct btrfs_device *dev; + list_for_each_entry(dev, &fs_devices->devices, dev_list) { struct hd_struct *disk; struct kobject *disk_kobj; @@ -649,7 +702,7 @@ int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info, disk = dev->bdev->bd_part; disk_kobj = &part_to_dev(disk)->kobj; - error = sysfs_create_link(fs_info->device_dir_kobj, + error = sysfs_create_link(fs_devices->device_dir_kobj, disk_kobj, disk_kobj->name); if (error) break; @@ -667,34 +720,51 @@ static struct dentry *btrfs_debugfs_root_dentry; /* Debugging tunables and exported data */ u64 btrfs_debugfs_test; +/* + * Can be called by the device discovery thread. + * And parent can be specified for seed device + */ +int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs, + struct kobject *parent) +{ + int error; + + init_completion(&fs_devs->kobj_unregister); + fs_devs->super_kobj.kset = btrfs_kset; + error = kobject_init_and_add(&fs_devs->super_kobj, + &btrfs_ktype, parent, "%pU", fs_devs->fsid); + return error; +} + int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info) { int error; + struct btrfs_fs_devices *fs_devs = fs_info->fs_devices; + struct kobject *super_kobj = &fs_devs->super_kobj; + + btrfs_set_fs_info_ptr(fs_info); - init_completion(&fs_info->kobj_unregister); - fs_info->super_kobj.kset = btrfs_kset; - error = kobject_init_and_add(&fs_info->super_kobj, &btrfs_ktype, NULL, - "%pU", fs_info->fsid); + error = btrfs_kobj_add_device(fs_devs, NULL); if (error) return error; - error = sysfs_create_group(&fs_info->super_kobj, - &btrfs_feature_attr_group); + error = sysfs_create_files(super_kobj, btrfs_attrs); if (error) { - __btrfs_sysfs_remove_one(fs_info); + btrfs_kobj_rm_device(fs_devs, NULL); return error; } - error = addrm_unknown_feature_attrs(fs_info, true); + error = sysfs_create_group(super_kobj, + &btrfs_feature_attr_group); if (error) goto failure; - error = btrfs_kobj_add_device(fs_info, NULL); + error = addrm_unknown_feature_attrs(fs_info, true); if (error) goto failure; fs_info->space_info_kobj = kobject_create_and_add("allocation", - &fs_info->super_kobj); + super_kobj); if (!fs_info->space_info_kobj) { error = -ENOMEM; goto failure; diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h index 3a4bbed723fd..6392527bcc15 100644 --- a/fs/btrfs/sysfs.h +++ b/fs/btrfs/sysfs.h @@ -82,8 +82,12 @@ char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags); extern const char * const btrfs_feature_set_names[3]; extern struct kobj_type space_info_ktype; extern struct kobj_type btrfs_raid_ktype; -int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info, +int btrfs_kobj_add_device(struct btrfs_fs_devices *fs_devices, struct btrfs_device *one_device); -int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info, +int btrfs_kobj_rm_device(struct btrfs_fs_devices *fs_devices, struct btrfs_device *one_device); +int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs, + struct kobject *parent); +int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs); +void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs); #endif /* _BTRFS_SYSFS_H_ */ diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index c32a7ba76bca..846d277b1901 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -21,6 +21,7 @@ #include "../transaction.h" #include "../disk-io.h" #include "../qgroup.h" +#include "../backref.h" static void init_dummy_trans(struct btrfs_trans_handle *trans) { @@ -227,6 +228,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root) { struct btrfs_trans_handle trans; struct btrfs_fs_info *fs_info = root->fs_info; + struct ulist *old_roots = NULL; + struct ulist *new_roots = NULL; int ret; init_dummy_trans(&trans); @@ -238,10 +241,15 @@ static int test_no_shared_qgroup(struct btrfs_root *root) return ret; } - ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096, - BTRFS_QGROUP_OPER_ADD_EXCL, 0); + /* + * Since the test trans doesn't havee the complicated delayed refs, + * we can only call btrfs_qgroup_account_extent() directly to test + * quota. + */ + ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); if (ret) { - test_msg("Couldn't add space to a qgroup %d\n", ret); + ulist_free(old_roots); + test_msg("Couldn't find old roots: %d\n", ret); return ret; } @@ -249,9 +257,18 @@ static int test_no_shared_qgroup(struct btrfs_root *root) if (ret) return ret; - ret = btrfs_delayed_qgroup_accounting(&trans, fs_info); + ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); + if (ret) { + ulist_free(old_roots); + ulist_free(new_roots); + test_msg("Couldn't find old roots: %d\n", ret); + return ret; + } + + ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, + old_roots, new_roots); if (ret) { - test_msg("Delayed qgroup accounting failed %d\n", ret); + test_msg("Couldn't account space for a qgroup %d\n", ret); return ret; } @@ -259,21 +276,32 @@ static int test_no_shared_qgroup(struct btrfs_root *root) test_msg("Qgroup counts didn't match expected values\n"); return -EINVAL; } + old_roots = NULL; + new_roots = NULL; + + ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + if (ret) { + ulist_free(old_roots); + test_msg("Couldn't find old roots: %d\n", ret); + return ret; + } ret = remove_extent_item(root, 4096, 4096); if (ret) return -EINVAL; - ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096, - BTRFS_QGROUP_OPER_SUB_EXCL, 0); + ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); if (ret) { - test_msg("Couldn't remove space from the qgroup %d\n", ret); - return -EINVAL; + ulist_free(old_roots); + ulist_free(new_roots); + test_msg("Couldn't find old roots: %d\n", ret); + return ret; } - ret = btrfs_delayed_qgroup_accounting(&trans, fs_info); + ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, + old_roots, new_roots); if (ret) { - test_msg("Qgroup accounting failed %d\n", ret); + test_msg("Couldn't account space for a qgroup %d\n", ret); return -EINVAL; } @@ -294,6 +322,8 @@ static int test_multiple_refs(struct btrfs_root *root) { struct btrfs_trans_handle trans; struct btrfs_fs_info *fs_info = root->fs_info; + struct ulist *old_roots = NULL; + struct ulist *new_roots = NULL; int ret; init_dummy_trans(&trans); @@ -307,20 +337,29 @@ static int test_multiple_refs(struct btrfs_root *root) return ret; } + ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + if (ret) { + ulist_free(old_roots); + test_msg("Couldn't find old roots: %d\n", ret); + return ret; + } + ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5); if (ret) return ret; - ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096, - BTRFS_QGROUP_OPER_ADD_EXCL, 0); + ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); if (ret) { - test_msg("Couldn't add space to a qgroup %d\n", ret); + ulist_free(old_roots); + ulist_free(new_roots); + test_msg("Couldn't find old roots: %d\n", ret); return ret; } - ret = btrfs_delayed_qgroup_accounting(&trans, fs_info); + ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, + old_roots, new_roots); if (ret) { - test_msg("Delayed qgroup accounting failed %d\n", ret); + test_msg("Couldn't account space for a qgroup %d\n", ret); return ret; } @@ -329,20 +368,29 @@ static int test_multiple_refs(struct btrfs_root *root) return -EINVAL; } + ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + if (ret) { + ulist_free(old_roots); + test_msg("Couldn't find old roots: %d\n", ret); + return ret; + } + ret = add_tree_ref(root, 4096, 4096, 0, 256); if (ret) return ret; - ret = btrfs_qgroup_record_ref(&trans, fs_info, 256, 4096, 4096, - BTRFS_QGROUP_OPER_ADD_SHARED, 0); + ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); if (ret) { - test_msg("Qgroup record ref failed %d\n", ret); + ulist_free(old_roots); + ulist_free(new_roots); + test_msg("Couldn't find old roots: %d\n", ret); return ret; } - ret = btrfs_delayed_qgroup_accounting(&trans, fs_info); + ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, + old_roots, new_roots); if (ret) { - test_msg("Qgroup accounting failed %d\n", ret); + test_msg("Couldn't account space for a qgroup %d\n", ret); return ret; } @@ -356,20 +404,29 @@ static int test_multiple_refs(struct btrfs_root *root) return -EINVAL; } + ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots); + if (ret) { + ulist_free(old_roots); + test_msg("Couldn't find old roots: %d\n", ret); + return ret; + } + ret = remove_extent_ref(root, 4096, 4096, 0, 256); if (ret) return ret; - ret = btrfs_qgroup_record_ref(&trans, fs_info, 256, 4096, 4096, - BTRFS_QGROUP_OPER_SUB_SHARED, 0); + ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots); if (ret) { - test_msg("Qgroup record ref failed %d\n", ret); + ulist_free(old_roots); + ulist_free(new_roots); + test_msg("Couldn't find old roots: %d\n", ret); return ret; } - ret = btrfs_delayed_qgroup_accounting(&trans, fs_info); + ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096, + old_roots, new_roots); if (ret) { - test_msg("Qgroup accounting failed %d\n", ret); + test_msg("Couldn't account space for a qgroup %d\n", ret); return ret; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 5628e25250c0..c0f18e7266b6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -225,12 +225,14 @@ loop: cur_trans->dirty_bg_run = 0; cur_trans->delayed_refs.href_root = RB_ROOT; + cur_trans->delayed_refs.dirty_extent_root = RB_ROOT; atomic_set(&cur_trans->delayed_refs.num_entries, 0); cur_trans->delayed_refs.num_heads_ready = 0; cur_trans->delayed_refs.pending_csums = 0; cur_trans->delayed_refs.num_heads = 0; cur_trans->delayed_refs.flushing = 0; cur_trans->delayed_refs.run_delayed_start = 0; + cur_trans->delayed_refs.qgroup_to_skip = 0; /* * although the tree mod log is per file system and not per transaction, @@ -509,6 +511,7 @@ again: h->transaction = cur_trans; h->blocks_used = 0; h->bytes_reserved = 0; + h->chunk_bytes_reserved = 0; h->root = root; h->delayed_ref_updates = 0; h->use_count = 1; @@ -792,6 +795,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, if (!list_empty(&trans->new_bgs)) btrfs_create_pending_block_groups(trans, root); + btrfs_trans_release_chunk_metadata(trans); + if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && should_end_transaction(trans, root) && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) { @@ -1290,6 +1295,12 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, if (pending->error) goto no_free_objectid; + /* + * Make qgroup to skip current new snapshot's qgroupid, as it is + * accounted by later btrfs_qgroup_inherit(). + */ + btrfs_set_skip_qgroup(trans, objectid); + btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); if (to_reserve > 0) { @@ -1298,7 +1309,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, to_reserve, BTRFS_RESERVE_NO_FLUSH); if (pending->error) - goto no_free_objectid; + goto clear_skip_qgroup; } key.objectid = objectid; @@ -1396,25 +1407,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_abort_transaction(trans, root, ret); goto fail; } - - /* - * We need to flush delayed refs in order to make sure all of our quota - * operations have been done before we call btrfs_qgroup_inherit. - */ - ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - goto fail; - } - - ret = btrfs_qgroup_inherit(trans, fs_info, - root->root_key.objectid, - objectid, pending->inherit); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - goto fail; - } - /* see comments in should_cow_block() */ set_bit(BTRFS_ROOT_FORCE_COW, &root->state); smp_wmb(); @@ -1497,11 +1489,37 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, goto fail; } } + + ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto fail; + } + + /* + * account qgroup counters before qgroup_inherit() + */ + ret = btrfs_qgroup_prepare_account_extents(trans, fs_info); + if (ret) + goto fail; + ret = btrfs_qgroup_account_extents(trans, fs_info); + if (ret) + goto fail; + ret = btrfs_qgroup_inherit(trans, fs_info, + root->root_key.objectid, + objectid, pending->inherit); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto fail; + } + fail: pending->error = ret; dir_item_existed: trans->block_rsv = rsv; trans->bytes_reserved = 0; +clear_skip_qgroup: + btrfs_clear_skip_qgroup(trans); no_free_objectid: kfree(new_root_item); root_item_alloc_fail: @@ -1963,6 +1981,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, goto scrub_continue; } + /* Reocrd old roots for later qgroup accounting */ + ret = btrfs_qgroup_prepare_account_extents(trans, root->fs_info); + if (ret) { + mutex_unlock(&root->fs_info->reloc_mutex); + goto scrub_continue; + } + /* * make sure none of the code above managed to slip in a * delayed item @@ -2004,6 +2029,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, */ btrfs_free_log_root_tree(trans, root->fs_info); + /* + * Since fs roots are all committed, we can get a quite accurate + * new_roots. So let's do quota accounting. + */ + ret = btrfs_qgroup_account_extents(trans, root->fs_info); + if (ret < 0) { + mutex_unlock(&root->fs_info->tree_log_mutex); + mutex_unlock(&root->fs_info->reloc_mutex); + goto scrub_continue; + } + ret = commit_cowonly_roots(trans, root); if (ret) { mutex_unlock(&root->fs_info->tree_log_mutex); @@ -2054,6 +2090,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, clear_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags); clear_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags); + btrfs_trans_release_chunk_metadata(trans); + spin_lock(&root->fs_info->trans_lock); cur_trans->state = TRANS_STATE_UNBLOCKED; root->fs_info->running_transaction = NULL; @@ -2123,6 +2161,7 @@ scrub_continue: btrfs_scrub_continue(root); cleanup_transaction: btrfs_trans_release_metadata(trans, root); + btrfs_trans_release_chunk_metadata(trans); trans->block_rsv = NULL; if (trans->qgroup_reserved) { btrfs_qgroup_free(root, trans->qgroup_reserved); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 0b24755596ba..eb09c2067fa8 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -102,6 +102,7 @@ struct btrfs_transaction { struct btrfs_trans_handle { u64 transid; u64 bytes_reserved; + u64 chunk_bytes_reserved; u64 qgroup_reserved; unsigned long use_count; unsigned long blocks_reserved; @@ -153,6 +154,29 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, spin_unlock(&BTRFS_I(inode)->lock); } +/* + * Make qgroup codes to skip given qgroupid, means the old/new_roots for + * qgroup won't contain the qgroupid in it. + */ +static inline void btrfs_set_skip_qgroup(struct btrfs_trans_handle *trans, + u64 qgroupid) +{ + struct btrfs_delayed_ref_root *delayed_refs; + + delayed_refs = &trans->transaction->delayed_refs; + WARN_ON(delayed_refs->qgroup_to_skip); + delayed_refs->qgroup_to_skip = qgroupid; +} + +static inline void btrfs_clear_skip_qgroup(struct btrfs_trans_handle *trans) +{ + struct btrfs_delayed_ref_root *delayed_refs; + + delayed_refs = &trans->transaction->delayed_refs; + WARN_ON(!delayed_refs->qgroup_to_skip); + delayed_refs->qgroup_to_skip = 0; +} + int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index a63719cc9578..a4b9c8b2d35a 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -52,9 +52,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) goto out; - if (btrfs_test_opt(root, SSD)) - goto out; - path = btrfs_alloc_path(); if (!path) return -ENOMEM; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d04968374e9d..1ce80c1c4eb6 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3881,12 +3881,6 @@ static int wait_ordered_extents(struct btrfs_trans_handle *trans, &ordered->flags)) continue; - if (ordered->csum_bytes_left) { - btrfs_start_ordered_extent(inode, ordered, 0); - wait_event(ordered->wait, - ordered->csum_bytes_left == 0); - } - list_for_each_entry(sum, &ordered->list, list) { ret = btrfs_csum_file_blocks(trans, log, sum); if (ret) diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c index 840a38b2778a..91feb2bdefee 100644 --- a/fs/btrfs/ulist.c +++ b/fs/btrfs/ulist.c @@ -132,6 +132,15 @@ static struct ulist_node *ulist_rbtree_search(struct ulist *ulist, u64 val) return NULL; } +static void ulist_rbtree_erase(struct ulist *ulist, struct ulist_node *node) +{ + rb_erase(&node->rb_node, &ulist->root); + list_del(&node->list); + kfree(node); + BUG_ON(ulist->nnodes == 0); + ulist->nnodes--; +} + static int ulist_rbtree_insert(struct ulist *ulist, struct ulist_node *ins) { struct rb_node **p = &ulist->root.rb_node; @@ -197,9 +206,6 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, node->val = val; node->aux = aux; -#ifdef CONFIG_BTRFS_DEBUG - node->seqnum = ulist->nnodes; -#endif ret = ulist_rbtree_insert(ulist, node); ASSERT(!ret); @@ -209,6 +215,33 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, return 1; } +/* + * ulist_del - delete one node from ulist + * @ulist: ulist to remove node from + * @val: value to delete + * @aux: aux to delete + * + * The deletion will only be done when *BOTH* val and aux matches. + * Return 0 for successful delete. + * Return > 0 for not found. + */ +int ulist_del(struct ulist *ulist, u64 val, u64 aux) +{ + struct ulist_node *node; + + node = ulist_rbtree_search(ulist, val); + /* Not found */ + if (!node) + return 1; + + if (node->aux != aux) + return 1; + + /* Found and delete */ + ulist_rbtree_erase(ulist, node); + return 0; +} + /** * ulist_next - iterate ulist * @ulist: ulist to iterate @@ -237,15 +270,7 @@ struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_iterator *uiter) uiter->cur_list = uiter->cur_list->next; } else { uiter->cur_list = ulist->nodes.next; -#ifdef CONFIG_BTRFS_DEBUG - uiter->i = 0; -#endif } node = list_entry(uiter->cur_list, struct ulist_node, list); -#ifdef CONFIG_BTRFS_DEBUG - ASSERT(node->seqnum == uiter->i); - ASSERT(uiter->i >= 0 && uiter->i < ulist->nnodes); - uiter->i++; -#endif return node; } diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h index 4c29db604bbe..a01a2c45825f 100644 --- a/fs/btrfs/ulist.h +++ b/fs/btrfs/ulist.h @@ -57,6 +57,7 @@ void ulist_free(struct ulist *ulist); int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask); int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, u64 *old_aux, gfp_t gfp_mask); +int ulist_del(struct ulist *ulist, u64 val, u64 aux); /* just like ulist_add_merge() but take a pointer for the aux data */ static inline int ulist_add_merge_ptr(struct ulist *ulist, u64 val, void *aux, diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 53af23f2c087..4b438b4c8c91 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -52,6 +52,10 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *device); DEFINE_MUTEX(uuid_mutex); static LIST_HEAD(fs_uuids); +struct list_head *btrfs_get_fs_uuids(void) +{ + return &fs_uuids; +} static struct btrfs_fs_devices *__alloc_fs_devices(void) { @@ -441,6 +445,61 @@ static void pending_bios_fn(struct btrfs_work *work) run_scheduled_bios(device); } + +void btrfs_free_stale_device(struct btrfs_device *cur_dev) +{ + struct btrfs_fs_devices *fs_devs; + struct btrfs_device *dev; + + if (!cur_dev->name) + return; + + list_for_each_entry(fs_devs, &fs_uuids, list) { + int del = 1; + + if (fs_devs->opened) + continue; + if (fs_devs->seeding) + continue; + + list_for_each_entry(dev, &fs_devs->devices, dev_list) { + + if (dev == cur_dev) + continue; + if (!dev->name) + continue; + + /* + * Todo: This won't be enough. What if the same device + * comes back (with new uuid and) with its mapper path? + * But for now, this does help as mostly an admin will + * either use mapper or non mapper path throughout. + */ + rcu_read_lock(); + del = strcmp(rcu_str_deref(dev->name), + rcu_str_deref(cur_dev->name)); + rcu_read_unlock(); + if (!del) + break; + } + + if (!del) { + /* delete the stale device */ + if (fs_devs->num_devices == 1) { + btrfs_sysfs_remove_fsid(fs_devs); + list_del(&fs_devs->list); + free_fs_devices(fs_devs); + } else { + fs_devs->num_devices--; + list_del(&dev->dev_list); + rcu_string_free(dev->name); + kfree(dev); + } + break; + } + } +} + /* * Add new device to list of registered devices * @@ -556,6 +615,12 @@ static noinline int device_list_add(const char *path, if (!fs_devices->opened) device->generation = found_transid; + /* + * if there is new btrfs on an already registered device, + * then remove the stale device entry. + */ + btrfs_free_stale_device(device); + *fs_devices_ret = fs_devices; return ret; @@ -693,13 +758,13 @@ static void free_device(struct rcu_head *head) static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) { - struct btrfs_device *device; + struct btrfs_device *device, *tmp; if (--fs_devices->opened > 0) return 0; mutex_lock(&fs_devices->device_list_mutex); - list_for_each_entry(device, &fs_devices->devices, dev_list) { + list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) { struct btrfs_device *new_device; struct rcu_string *name; @@ -1067,15 +1132,31 @@ again: map = (struct map_lookup *)em->bdev; for (i = 0; i < map->num_stripes; i++) { + u64 end; + if (map->stripes[i].dev != device) continue; if (map->stripes[i].physical >= physical_start + len || map->stripes[i].physical + em->orig_block_len <= physical_start) continue; - *start = map->stripes[i].physical + - em->orig_block_len; - ret = 1; + /* + * Make sure that while processing the pinned list we do + * not override our *start with a lower value, because + * we can have pinned chunks that fall within this + * device hole and that have lower physical addresses + * than the pending chunks we processed before. If we + * do not take this special care we can end up getting + * 2 pending chunks that start at the same physical + * device offsets because the end offset of a pinned + * chunk can be equal to the start offset of some + * pending chunk. + */ + end = map->stripes[i].physical + em->orig_block_len; + if (end > *start) { + *start = end; + ret = 1; + } } } if (search_list == &trans->transaction->pending_chunks) { @@ -1706,7 +1787,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) if (device->bdev) { device->fs_devices->open_devices--; /* remove sysfs entry */ - btrfs_kobj_rm_device(root->fs_info, device); + btrfs_kobj_rm_device(root->fs_info->fs_devices, device); } call_rcu(&device->rcu, free_device); @@ -1875,6 +1956,9 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, mutex_lock(&uuid_mutex); WARN_ON(!tgtdev); mutex_lock(&fs_info->fs_devices->device_list_mutex); + + btrfs_kobj_rm_device(fs_info->fs_devices, tgtdev); + if (tgtdev->bdev) { btrfs_scratch_superblock(tgtdev); fs_info->fs_devices->open_devices--; @@ -2211,7 +2295,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) tmp + 1); /* add sysfs device entry */ - btrfs_kobj_add_device(root->fs_info, device); + btrfs_kobj_add_device(root->fs_info->fs_devices, device); /* * we've got more storage, clear any full flags on the space @@ -2252,8 +2336,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) */ snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU", root->fs_info->fsid); - if (kobject_rename(&root->fs_info->super_kobj, fsid_buf)) - goto error_trans; + if (kobject_rename(&root->fs_info->fs_devices->super_kobj, + fsid_buf)) + pr_warn("BTRFS: sysfs: failed to create fsid for sprout\n"); } root->fs_info->num_tolerated_disk_barrier_failures = @@ -2289,7 +2374,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) error_trans: btrfs_end_transaction(trans, root); rcu_string_free(device->name); - btrfs_kobj_rm_device(root->fs_info, device); + btrfs_kobj_rm_device(root->fs_info->fs_devices, device); kfree(device); error: blkdev_put(bdev, FMODE_EXCL); @@ -2609,6 +2694,9 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, return -EINVAL; } map = (struct map_lookup *)em->bdev; + lock_chunks(root->fs_info->chunk_root); + check_system_chunk(trans, extent_root, map->type); + unlock_chunks(root->fs_info->chunk_root); for (i = 0; i < map->num_stripes; i++) { struct btrfs_device *device = map->stripes[i].dev; @@ -3908,9 +3996,9 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) uuid_root = btrfs_create_tree(trans, fs_info, BTRFS_UUID_TREE_OBJECTID); if (IS_ERR(uuid_root)) { - btrfs_abort_transaction(trans, tree_root, - PTR_ERR(uuid_root)); - return PTR_ERR(uuid_root); + ret = PTR_ERR(uuid_root); + btrfs_abort_transaction(trans, tree_root, ret); + return ret; } fs_info->uuid_root = uuid_root; @@ -3965,6 +4053,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) int slot; int failed = 0; bool retried = false; + bool checked_pending_chunks = false; struct extent_buffer *l; struct btrfs_key key; struct btrfs_super_block *super_copy = root->fs_info->super_copy; @@ -4045,15 +4134,6 @@ again: goto again; } else if (failed && retried) { ret = -ENOSPC; - lock_chunks(root); - - btrfs_device_set_total_bytes(device, old_size); - if (device->writeable) - device->fs_devices->total_rw_bytes += diff; - spin_lock(&root->fs_info->free_chunk_lock); - root->fs_info->free_chunk_space += diff; - spin_unlock(&root->fs_info->free_chunk_lock); - unlock_chunks(root); goto done; } @@ -4065,6 +4145,35 @@ again: } lock_chunks(root); + + /* + * We checked in the above loop all device extents that were already in + * the device tree. However before we have updated the device's + * total_bytes to the new size, we might have had chunk allocations that + * have not complete yet (new block groups attached to transaction + * handles), and therefore their device extents were not yet in the + * device tree and we missed them in the loop above. So if we have any + * pending chunk using a device extent that overlaps the device range + * that we can not use anymore, commit the current transaction and + * repeat the search on the device tree - this way we guarantee we will + * not have chunks using device extents that end beyond 'new_size'. + */ + if (!checked_pending_chunks) { + u64 start = new_size; + u64 len = old_size - new_size; + + if (contains_pending_extent(trans, device, &start, len)) { + unlock_chunks(root); + checked_pending_chunks = true; + failed = 0; + retried = false; + ret = btrfs_commit_transaction(trans, root); + if (ret) + goto done; + goto again; + } + } + btrfs_device_set_disk_total_bytes(device, new_size); if (list_empty(&device->resized_list)) list_add_tail(&device->resized_list, @@ -4079,6 +4188,16 @@ again: btrfs_end_transaction(trans, root); done: btrfs_free_path(path); + if (ret) { + lock_chunks(root); + btrfs_device_set_total_bytes(device, old_size); + if (device->writeable) + device->fs_devices->total_rw_bytes += diff; + spin_lock(&root->fs_info->free_chunk_lock); + root->fs_info->free_chunk_space += diff; + spin_unlock(&root->fs_info->free_chunk_lock); + unlock_chunks(root); + } return ret; } @@ -6072,6 +6191,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, free_extent_map(em); return -EIO; } + btrfs_warn(root->fs_info, "devid %llu uuid %pU is missing", + devid, uuid); } map->stripes[i].dev->in_fs_metadata = 1; } @@ -6191,10 +6312,11 @@ static int read_one_dev(struct btrfs_root *root, if (!btrfs_test_opt(root, DEGRADED)) return -EIO; - btrfs_warn(root->fs_info, "devid %llu missing", devid); device = add_missing_dev(root, fs_devices, devid, dev_uuid); if (!device) return -ENOMEM; + btrfs_warn(root->fs_info, "devid %llu uuid %pU missing", + devid, dev_uuid); } else { if (!device->bdev && !btrfs_test_opt(root, DEGRADED)) return -EIO; @@ -6722,3 +6844,21 @@ void btrfs_update_commit_device_bytes_used(struct btrfs_root *root, } unlock_chunks(root); } + +void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info) +{ + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; + while (fs_devices) { + fs_devices->fs_info = fs_info; + fs_devices = fs_devices->seed; + } +} + +void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info) +{ + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; + while (fs_devices) { + fs_devices->fs_info = NULL; + fs_devices = fs_devices->seed; + } +} diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index cedae0356558..95842a909e7f 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -253,6 +253,12 @@ struct btrfs_fs_devices { * nonrot flag set */ int rotating; + + struct btrfs_fs_info *fs_info; + /* sysfs kobjects */ + struct kobject super_kobj; + struct kobject *device_dir_kobj; + struct completion kobj_unregister; }; #define BTRFS_BIO_INLINE_CSUM_SIZE 64 @@ -535,5 +541,8 @@ static inline void unlock_chunks(struct btrfs_root *root) mutex_unlock(&root->fs_info->chunk_mutex); } +struct list_head *btrfs_get_fs_uuids(void); +void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info); +void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info); #endif diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index a2172f3f69e3..e7b478b49985 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -192,6 +192,15 @@ config CIFS_SMB2 options are also slightly simpler (compared to CIFS) due to protocol improvements. +config CIFS_SMB311 + bool "SMB3.1.1 network file system support (Experimental)" + depends on CIFS_SMB2 && INET + + help + This enables experimental support for the newest, SMB3.1.1, dialect. + This dialect includes improved security negotiation features. + If unsure, say N + config CIFS_FSCACHE bool "Provide CIFS client caching support" depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 22b289a3b1c4..b406a32deb1f 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -171,6 +171,10 @@ enum smb_version { Smb_21, Smb_30, Smb_302, +#ifdef CONFIG_CIFS_SMB311 + Smb_311, +#endif /* SMB311 */ + Smb_version_err }; struct mid_q_entry; @@ -368,6 +372,8 @@ struct smb_version_operations { void (*new_lease_key)(struct cifs_fid *); int (*generate_signingkey)(struct cifs_ses *); int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *); + int (*set_integrity)(const unsigned int, struct cifs_tcon *tcon, + struct cifsFileInfo *src_file); int (*query_mf_symlink)(unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const unsigned char *, char *, unsigned int *); @@ -386,6 +392,9 @@ struct smb_version_operations { int (*clone_range)(const unsigned int, struct cifsFileInfo *src_file, struct cifsFileInfo *target_file, u64 src_off, u64 len, u64 dest_off); + int (*duplicate_extents)(const unsigned int, struct cifsFileInfo *src, + struct cifsFileInfo *target_file, u64 src_off, u64 len, + u64 dest_off); int (*validate_negotiate)(const unsigned int, struct cifs_tcon *); ssize_t (*query_all_EAs)(const unsigned int, struct cifs_tcon *, const unsigned char *, const unsigned char *, char *, @@ -1617,4 +1626,8 @@ extern struct smb_version_values smb30_values; #define SMB302_VERSION_STRING "3.02" /*extern struct smb_version_operations smb302_operations;*/ /* not needed yet */ extern struct smb_version_values smb302_values; +#define SMB311_VERSION_STRING "3.1.1" +#define ALT_SMB311_VERSION_STRING "3.11" +extern struct smb_version_operations smb311_operations; +extern struct smb_version_values smb311_values; #endif /* _CIFS_GLOB_H */ diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 5f9822ac0245..47b030da0781 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -2255,6 +2255,8 @@ typedef struct { /* List of FileSystemAttributes - see 2.5.1 of MS-FSCC */ +#define FILE_SUPPORTS_SPARSE_VDL 0x10000000 /* faster nonsparse extend */ +#define FILE_SUPPORTS_BLOCK_REFCOUNTING 0x08000000 /* allow ioctl dup extents */ #define FILE_SUPPORT_INTEGRITY_STREAMS 0x04000000 #define FILE_SUPPORTS_USN_JOURNAL 0x02000000 #define FILE_SUPPORTS_OPEN_BY_FILE_ID 0x01000000 @@ -2310,6 +2312,16 @@ typedef struct { /* data block encoding of response to level 263 QPathInfo */ char FileName[1]; } __attribute__((packed)) FILE_ALL_INFO; /* level 0x107 QPathInfo */ +typedef struct { + __le64 AllocationSize; + __le64 EndOfFile; /* size ie offset to first free byte in file */ + __le32 NumberOfLinks; /* hard links */ + __u8 DeletePending; + __u8 Directory; + __u16 Pad; +} __attribute__((packed)) FILE_STANDARD_INFO; /* level 0x102 QPathInfo */ + + /* defines for enumerating possible values of the Unix type field below */ #define UNIX_FILE 0 #define UNIX_DIR 1 diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index f26ffbfc64d8..672ef35c9f73 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -625,9 +625,8 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) server->negflavor = CIFS_NEGFLAVOR_UNENCAP; memcpy(ses->server->cryptkey, pSMBr->u.EncryptionKey, CIFS_CRYPTO_KEY_SIZE); - } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC || - server->capabilities & CAP_EXTENDED_SECURITY) && - (pSMBr->EncryptionKeyLength == 0)) { + } else if (pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC || + server->capabilities & CAP_EXTENDED_SECURITY) { server->negflavor = CIFS_NEGFLAVOR_EXTENDED; rc = decode_ext_sec_blob(ses, pSMBr); } else if (server->sec_mode & SECMODE_PW_ENCRYPT) { diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 8383d5ea4202..773f4dc77630 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -280,6 +280,11 @@ static const match_table_t cifs_smb_version_tokens = { { Smb_21, SMB21_VERSION_STRING }, { Smb_30, SMB30_VERSION_STRING }, { Smb_302, SMB302_VERSION_STRING }, +#ifdef CONFIG_CIFS_SMB311 + { Smb_311, SMB311_VERSION_STRING }, + { Smb_311, ALT_SMB311_VERSION_STRING }, +#endif /* SMB311 */ + { Smb_version_err, NULL } }; static int ip_connect(struct TCP_Server_Info *server); @@ -1133,6 +1138,12 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol) vol->ops = &smb30_operations; /* currently identical with 3.0 */ vol->vals = &smb302_values; break; +#ifdef CONFIG_CIFS_SMB311 + case Smb_311: + vol->ops = &smb311_operations; + vol->vals = &smb311_values; + break; +#endif /* SMB311 */ #endif default: cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value); @@ -3461,6 +3472,8 @@ try_mount_again: else if (ses) cifs_put_smb_ses(ses); + cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS; + free_xid(xid); } #endif diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 8b7898b7670f..49b8b6e41a18 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -31,12 +31,15 @@ #include "cifsproto.h" #include "cifs_debug.h" #include "cifsfs.h" +#include <linux/btrfs.h> #define CIFS_IOCTL_MAGIC 0xCF #define CIFS_IOC_COPYCHUNK_FILE _IOW(CIFS_IOCTL_MAGIC, 3, int) +#define CIFS_IOC_SET_INTEGRITY _IO(CIFS_IOCTL_MAGIC, 4) static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, - unsigned long srcfd, u64 off, u64 len, u64 destoff) + unsigned long srcfd, u64 off, u64 len, u64 destoff, + bool dup_extents) { int rc; struct cifsFileInfo *smb_file_target = dst_file->private_data; @@ -109,9 +112,14 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, truncate_inode_pages_range(&target_inode->i_data, destoff, PAGE_CACHE_ALIGN(destoff + len)-1); - if (target_tcon->ses->server->ops->clone_range) + if (dup_extents && target_tcon->ses->server->ops->duplicate_extents) + rc = target_tcon->ses->server->ops->duplicate_extents(xid, + smb_file_src, smb_file_target, off, len, destoff); + else if (!dup_extents && target_tcon->ses->server->ops->clone_range) rc = target_tcon->ses->server->ops->clone_range(xid, smb_file_src, smb_file_target, off, len, destoff); + else + rc = -EOPNOTSUPP; /* force revalidate of size and timestamps of target file now that target is updated on the server */ @@ -205,7 +213,20 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) } break; case CIFS_IOC_COPYCHUNK_FILE: - rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0); + rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, false); + break; + case BTRFS_IOC_CLONE: + rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, true); + break; + case CIFS_IOC_SET_INTEGRITY: + if (pSMBFile == NULL) + break; + tcon = tlink_tcon(pSMBFile->tlink); + if (tcon->ses->server->ops->set_integrity) + rc = tcon->ses->server->ops->set_integrity(xid, + tcon, pSMBFile); + else + rc = -EOPNOTSUPP; break; default: cifs_dbg(FYI, "unsupported ioctl\n"); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 54daee5ad4c1..df91bcf56d67 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -806,6 +806,53 @@ smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon, cfile->fid.volatile_fid, cfile->pid, &eof, false); } +#ifdef CONFIG_CIFS_SMB311 +static int +smb2_duplicate_extents(const unsigned int xid, + struct cifsFileInfo *srcfile, + struct cifsFileInfo *trgtfile, u64 src_off, + u64 len, u64 dest_off) +{ + int rc; + unsigned int ret_data_len; + char *retbuf = NULL; + struct duplicate_extents_to_file dup_ext_buf; + struct cifs_tcon *tcon = tlink_tcon(trgtfile->tlink); + + /* server fileays advertise duplicate extent support with this flag */ + if ((le32_to_cpu(tcon->fsAttrInfo.Attributes) & + FILE_SUPPORTS_BLOCK_REFCOUNTING) == 0) + return -EOPNOTSUPP; + + dup_ext_buf.VolatileFileHandle = srcfile->fid.volatile_fid; + dup_ext_buf.PersistentFileHandle = srcfile->fid.persistent_fid; + dup_ext_buf.SourceFileOffset = cpu_to_le64(src_off); + dup_ext_buf.TargetFileOffset = cpu_to_le64(dest_off); + dup_ext_buf.ByteCount = cpu_to_le64(len); + cifs_dbg(FYI, "duplicate extents: src off %lld dst off %lld len %lld", + src_off, dest_off, len); + + rc = smb2_set_file_size(xid, tcon, trgtfile, dest_off + len, false); + if (rc) + goto duplicate_extents_out; + + rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid, + trgtfile->fid.volatile_fid, + FSCTL_DUPLICATE_EXTENTS_TO_FILE, + true /* is_fsctl */, (char *)&dup_ext_buf, + sizeof(struct duplicate_extents_to_file), + (char **)&retbuf, + &ret_data_len); + + if (ret_data_len > 0) + cifs_dbg(FYI, "non-zero response length in duplicate extents"); + +duplicate_extents_out: + return rc; +} +#endif /* CONFIG_CIFS_SMB311 */ + + static int smb2_set_compression(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *cfile) @@ -815,6 +862,28 @@ smb2_set_compression(const unsigned int xid, struct cifs_tcon *tcon, } static int +smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon, + struct cifsFileInfo *cfile) +{ + struct fsctl_set_integrity_information_req integr_info; + char *retbuf = NULL; + unsigned int ret_data_len; + + integr_info.ChecksumAlgorithm = cpu_to_le16(CHECKSUM_TYPE_UNCHANGED); + integr_info.Flags = 0; + integr_info.Reserved = 0; + + return SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, + cfile->fid.volatile_fid, + FSCTL_SET_INTEGRITY_INFORMATION, + true /* is_fsctl */, (char *)&integr_info, + sizeof(struct fsctl_set_integrity_information_req), + (char **)&retbuf, + &ret_data_len); + +} + +static int smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, const char *path, struct cifs_sb_info *cifs_sb, struct cifs_fid *fid, __u16 search_flags, @@ -1624,6 +1693,7 @@ struct smb_version_operations smb30_operations = { .new_lease_key = smb2_new_lease_key, .generate_signingkey = generate_smb3signingkey, .calc_signature = smb3_calc_signature, + .set_integrity = smb3_set_integrity, .is_read_op = smb21_is_read_op, .set_oplock_level = smb3_set_oplock_level, .create_lease_buf = smb3_create_lease_buf, @@ -1635,6 +1705,94 @@ struct smb_version_operations smb30_operations = { .fallocate = smb3_fallocate, }; +#ifdef CONFIG_CIFS_SMB311 +struct smb_version_operations smb311_operations = { + .compare_fids = smb2_compare_fids, + .setup_request = smb2_setup_request, + .setup_async_request = smb2_setup_async_request, + .check_receive = smb2_check_receive, + .add_credits = smb2_add_credits, + .set_credits = smb2_set_credits, + .get_credits_field = smb2_get_credits_field, + .get_credits = smb2_get_credits, + .wait_mtu_credits = smb2_wait_mtu_credits, + .get_next_mid = smb2_get_next_mid, + .read_data_offset = smb2_read_data_offset, + .read_data_length = smb2_read_data_length, + .map_error = map_smb2_to_linux_error, + .find_mid = smb2_find_mid, + .check_message = smb2_check_message, + .dump_detail = smb2_dump_detail, + .clear_stats = smb2_clear_stats, + .print_stats = smb2_print_stats, + .dump_share_caps = smb2_dump_share_caps, + .is_oplock_break = smb2_is_valid_oplock_break, + .downgrade_oplock = smb2_downgrade_oplock, + .need_neg = smb2_need_neg, + .negotiate = smb2_negotiate, + .negotiate_wsize = smb2_negotiate_wsize, + .negotiate_rsize = smb2_negotiate_rsize, + .sess_setup = SMB2_sess_setup, + .logoff = SMB2_logoff, + .tree_connect = SMB2_tcon, + .tree_disconnect = SMB2_tdis, + .qfs_tcon = smb3_qfs_tcon, + .is_path_accessible = smb2_is_path_accessible, + .can_echo = smb2_can_echo, + .echo = SMB2_echo, + .query_path_info = smb2_query_path_info, + .get_srv_inum = smb2_get_srv_inum, + .query_file_info = smb2_query_file_info, + .set_path_size = smb2_set_path_size, + .set_file_size = smb2_set_file_size, + .set_file_info = smb2_set_file_info, + .set_compression = smb2_set_compression, + .mkdir = smb2_mkdir, + .mkdir_setinfo = smb2_mkdir_setinfo, + .rmdir = smb2_rmdir, + .unlink = smb2_unlink, + .rename = smb2_rename_path, + .create_hardlink = smb2_create_hardlink, + .query_symlink = smb2_query_symlink, + .query_mf_symlink = smb3_query_mf_symlink, + .create_mf_symlink = smb3_create_mf_symlink, + .open = smb2_open_file, + .set_fid = smb2_set_fid, + .close = smb2_close_file, + .flush = smb2_flush_file, + .async_readv = smb2_async_readv, + .async_writev = smb2_async_writev, + .sync_read = smb2_sync_read, + .sync_write = smb2_sync_write, + .query_dir_first = smb2_query_dir_first, + .query_dir_next = smb2_query_dir_next, + .close_dir = smb2_close_dir, + .calc_smb_size = smb2_calc_size, + .is_status_pending = smb2_is_status_pending, + .oplock_response = smb2_oplock_response, + .queryfs = smb2_queryfs, + .mand_lock = smb2_mand_lock, + .mand_unlock_range = smb2_unlock_range, + .push_mand_locks = smb2_push_mandatory_locks, + .get_lease_key = smb2_get_lease_key, + .set_lease_key = smb2_set_lease_key, + .new_lease_key = smb2_new_lease_key, + .generate_signingkey = generate_smb3signingkey, + .calc_signature = smb3_calc_signature, + .set_integrity = smb3_set_integrity, + .is_read_op = smb21_is_read_op, + .set_oplock_level = smb3_set_oplock_level, + .create_lease_buf = smb3_create_lease_buf, + .parse_lease_buf = smb3_parse_lease_buf, + .clone_range = smb2_clone_range, + .duplicate_extents = smb2_duplicate_extents, +/* .validate_negotiate = smb3_validate_negotiate, */ /* not used in 3.11 */ + .wp_retry_size = smb2_wp_retry_size, + .dir_needs_close = smb2_dir_needs_close, + .fallocate = smb3_fallocate, +}; +#endif /* CIFS_SMB311 */ + struct smb_version_values smb20_values = { .version_string = SMB20_VERSION_STRING, .protocol_id = SMB20_PROT_ID, @@ -1714,3 +1872,25 @@ struct smb_version_values smb302_values = { .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, .create_lease_size = sizeof(struct create_lease_v2), }; + +#ifdef CONFIG_CIFS_SMB311 +struct smb_version_values smb311_values = { + .version_string = SMB311_VERSION_STRING, + .protocol_id = SMB311_PROT_ID, + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU, + .large_lock_type = 0, + .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, + .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, + .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, + .header_size = sizeof(struct smb2_hdr), + .max_header_size = MAX_SMB2_HDR_SIZE, + .read_rsp_size = sizeof(struct smb2_read_rsp) - 1, + .lock_cmd = SMB2_LOCK, + .cap_unix = 0, + .cap_nt_find = SMB2_NT_FIND, + .cap_large_files = SMB2_LARGE_FILES, + .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, + .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, + .create_lease_size = sizeof(struct create_lease_v2), +}; +#endif /* SMB311 */ diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 54cbe19d9c08..b8b4f08ee094 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -304,6 +304,59 @@ small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon, return rc; } +#ifdef CONFIG_CIFS_SMB311 +/* offset is sizeof smb2_negotiate_req - 4 but rounded up to 8 bytes */ +#define OFFSET_OF_NEG_CONTEXT 0x68 /* sizeof(struct smb2_negotiate_req) - 4 */ + + +#define SMB2_PREAUTH_INTEGRITY_CAPABILITIES cpu_to_le16(1) +#define SMB2_ENCRYPTION_CAPABILITIES cpu_to_le16(2) + +static void +build_preauth_ctxt(struct smb2_preauth_neg_context *pneg_ctxt) +{ + pneg_ctxt->ContextType = SMB2_PREAUTH_INTEGRITY_CAPABILITIES; + pneg_ctxt->DataLength = cpu_to_le16(38); + pneg_ctxt->HashAlgorithmCount = cpu_to_le16(1); + pneg_ctxt->SaltLength = cpu_to_le16(SMB311_SALT_SIZE); + get_random_bytes(pneg_ctxt->Salt, SMB311_SALT_SIZE); + pneg_ctxt->HashAlgorithms = SMB2_PREAUTH_INTEGRITY_SHA512; +} + +static void +build_encrypt_ctxt(struct smb2_encryption_neg_context *pneg_ctxt) +{ + pneg_ctxt->ContextType = SMB2_ENCRYPTION_CAPABILITIES; + pneg_ctxt->DataLength = cpu_to_le16(6); + pneg_ctxt->CipherCount = cpu_to_le16(2); + pneg_ctxt->Ciphers[0] = SMB2_ENCRYPTION_AES128_GCM; + pneg_ctxt->Ciphers[1] = SMB2_ENCRYPTION_AES128_CCM; +} + +static void +assemble_neg_contexts(struct smb2_negotiate_req *req) +{ + + /* +4 is to account for the RFC1001 len field */ + char *pneg_ctxt = (char *)req + OFFSET_OF_NEG_CONTEXT + 4; + + build_preauth_ctxt((struct smb2_preauth_neg_context *)pneg_ctxt); + /* Add 2 to size to round to 8 byte boundary */ + pneg_ctxt += 2 + sizeof(struct smb2_preauth_neg_context); + build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt); + req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT); + req->NegotiateContextCount = cpu_to_le16(2); + inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context) + 2 + + sizeof(struct smb2_encryption_neg_context)); /* calculate hash */ +} +#else +static void assemble_neg_contexts(struct smb2_negotiate_req *req) +{ + return; +} +#endif /* SMB311 */ + + /* * * SMB2 Worker functions follow: @@ -363,10 +416,12 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) /* ClientGUID must be zero for SMB2.02 dialect */ if (ses->server->vals->protocol_id == SMB20_PROT_ID) memset(req->ClientGUID, 0, SMB2_CLIENT_GUID_SIZE); - else + else { memcpy(req->ClientGUID, server->client_guid, SMB2_CLIENT_GUID_SIZE); - + if (ses->server->vals->protocol_id == SMB311_PROT_ID) + assemble_neg_contexts(req); + } iov[0].iov_base = (char *)req; /* 4 for rfc1002 length field */ iov[0].iov_len = get_rfc1002_length(req) + 4; @@ -393,8 +448,12 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) cifs_dbg(FYI, "negotiated smb3.0 dialect\n"); else if (rsp->DialectRevision == cpu_to_le16(SMB302_PROT_ID)) cifs_dbg(FYI, "negotiated smb3.02 dialect\n"); +#ifdef CONFIG_CIFS_SMB311 + else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) + cifs_dbg(FYI, "negotiated smb3.1.1 dialect\n"); +#endif /* SMB311 */ else { - cifs_dbg(VFS, "Illegal dialect returned by server %d\n", + cifs_dbg(VFS, "Illegal dialect returned by server 0x%x\n", le16_to_cpu(rsp->DialectRevision)); rc = -EIO; goto neg_exit; @@ -572,7 +631,7 @@ ssetup_ntlmssp_authenticate: return rc; req->hdr.SessionId = 0; /* First session, not a reauthenticate */ - req->VcNumber = 0; /* MBZ */ + req->Flags = 0; /* MBZ */ /* to enable echos and oplocks */ req->hdr.CreditRequest = cpu_to_le16(3); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 70867d54fb8b..451108284a2f 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -136,9 +136,6 @@ struct smb2_transform_hdr { __u64 SessionId; } __packed; -/* Encryption Algorithms */ -#define SMB2_ENCRYPTION_AES128_CCM cpu_to_le16(0x0001) - /* * SMB2 flag definitions */ @@ -191,7 +188,10 @@ struct smb2_negotiate_req { __le16 Reserved; /* MBZ */ __le32 Capabilities; __u8 ClientGUID[SMB2_CLIENT_GUID_SIZE]; - __le64 ClientStartTime; /* MBZ */ + /* In SMB3.02 and earlier next three were MBZ le64 ClientStartTime */ + __le32 NegotiateContextOffset; /* SMB3.1.1 only. MBZ earlier */ + __le16 NegotiateContextCount; /* SMB3.1.1 only. MBZ earlier */ + __le16 Reserved2; __le16 Dialects[1]; /* One dialect (vers=) at a time for now */ } __packed; @@ -200,6 +200,7 @@ struct smb2_negotiate_req { #define SMB21_PROT_ID 0x0210 #define SMB30_PROT_ID 0x0300 #define SMB302_PROT_ID 0x0302 +#define SMB311_PROT_ID 0x0311 #define BAD_PROT_ID 0xFFFF /* SecurityMode flags */ @@ -217,12 +218,38 @@ struct smb2_negotiate_req { #define SMB2_NT_FIND 0x00100000 #define SMB2_LARGE_FILES 0x00200000 +#define SMB311_SALT_SIZE 32 +/* Hash Algorithm Types */ +#define SMB2_PREAUTH_INTEGRITY_SHA512 cpu_to_le16(0x0001) + +struct smb2_preauth_neg_context { + __le16 ContextType; /* 1 */ + __le16 DataLength; + __le32 Reserved; + __le16 HashAlgorithmCount; /* 1 */ + __le16 SaltLength; + __le16 HashAlgorithms; /* HashAlgorithms[0] since only one defined */ + __u8 Salt[SMB311_SALT_SIZE]; +} __packed; + +/* Encryption Algorithms Ciphers */ +#define SMB2_ENCRYPTION_AES128_CCM cpu_to_le16(0x0001) +#define SMB2_ENCRYPTION_AES128_GCM cpu_to_le16(0x0002) + +struct smb2_encryption_neg_context { + __le16 ContextType; /* 2 */ + __le16 DataLength; + __le32 Reserved; + __le16 CipherCount; /* AES-128-GCM and AES-128-CCM */ + __le16 Ciphers[2]; /* Ciphers[0] since only one used now */ +} __packed; + struct smb2_negotiate_rsp { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 65 */ __le16 SecurityMode; __le16 DialectRevision; - __le16 Reserved; /* MBZ */ + __le16 NegotiateContextCount; /* Prior to SMB3.1.1 was Reserved & MBZ */ __u8 ServerGUID[16]; __le32 Capabilities; __le32 MaxTransactSize; @@ -232,14 +259,18 @@ struct smb2_negotiate_rsp { __le64 ServerStartTime; __le16 SecurityBufferOffset; __le16 SecurityBufferLength; - __le32 Reserved2; /* may be any value, ignore */ + __le32 NegotiateContextOffset; /* Pre:SMB3.1.1 was reserved/ignored */ __u8 Buffer[1]; /* variable length GSS security buffer */ } __packed; +/* Flags */ +#define SMB2_SESSION_REQ_FLAG_BINDING 0x01 +#define SMB2_SESSION_REQ_FLAG_ENCRYPT_DATA 0x04 + struct smb2_sess_setup_req { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 25 */ - __u8 VcNumber; + __u8 Flags; __u8 SecurityMode; __le32 Capabilities; __le32 Channel; @@ -274,10 +305,13 @@ struct smb2_logoff_rsp { __le16 Reserved; } __packed; +/* Flags/Reserved for SMB3.1.1 */ +#define SMB2_SHAREFLAG_CLUSTER_RECONNECT 0x0001 + struct smb2_tree_connect_req { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 9 */ - __le16 Reserved; + __le16 Reserved; /* Flags in SMB3.1.1 */ __le16 PathOffset; __le16 PathLength; __u8 Buffer[1]; /* variable length */ @@ -587,6 +621,29 @@ struct copychunk_ioctl_rsp { __le32 TotalBytesWritten; } __packed; +struct fsctl_set_integrity_information_req { + __le16 ChecksumAlgorithm; + __le16 Reserved; + __le32 Flags; +} __packed; + +struct fsctl_get_integrity_information_rsp { + __le16 ChecksumAlgorithm; + __le16 Reserved; + __le32 Flags; + __le32 ChecksumChunkSizeInBytes; + __le32 ClusterSizeInBytes; +} __packed; + +/* Integrity ChecksumAlgorithm choices for above */ +#define CHECKSUM_TYPE_NONE 0x0000 +#define CHECKSUM_TYPE_CRC64 0x0002 +#define CHECKSUM_TYPE_UNCHANGED 0xFFFF /* set only */ + +/* Integrity flags for above */ +#define FSCTL_INTEGRITY_FLAG_CHECKSUM_ENFORCEMENT_OFF 0x00000001 + + struct validate_negotiate_info_req { __le32 Capabilities; __u8 Guid[SMB2_CLIENT_GUID_SIZE]; @@ -620,6 +677,14 @@ struct compress_ioctl { __le16 CompressionState; /* See cifspdu.h for possible flag values */ } __packed; +struct duplicate_extents_to_file { + __u64 PersistentFileHandle; /* source file handle, opaque endianness */ + __u64 VolatileFileHandle; + __le64 SourceFileOffset; + __le64 TargetFileOffset; + __le64 ByteCount; /* Bytes to be copied */ +} __packed; + struct smb2_ioctl_req { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 57 */ diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h index 83efa59535be..a639d0dab453 100644 --- a/fs/cifs/smbfsctl.h +++ b/fs/cifs/smbfsctl.h @@ -75,10 +75,13 @@ #define FSCTL_QUERY_SPARING_INFO 0x00090138 /* BB add struct */ #define FSCTL_SET_ZERO_ON_DEALLOC 0x00090194 /* BB add struct */ #define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */ +#define FSCTL_GET_INTEGRITY_INFORMATION 0x0009027C #define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF /* BB add struct */ #define FSCTL_SET_DEFECT_MANAGEMENT 0x00098134 /* BB add struct */ #define FSCTL_FILE_LEVEL_TRIM 0x00098208 /* BB add struct */ +#define FSCTL_DUPLICATE_EXTENTS_TO_FILE 0x00098344 #define FSCTL_SIS_LINK_FILES 0x0009C104 +#define FSCTL_SET_INTEGRITY_INFORMATION 0x0009C280 #define FSCTL_PIPE_PEEK 0x0011400C /* BB add struct */ #define FSCTL_PIPE_TRANSCEIVE 0x0011C017 /* BB add struct */ /* strange that the number for this op is not sequential with previous op */ @@ -309,14 +309,21 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, out: i_mmap_unlock_read(mapping); - if (bh->b_end_io) - bh->b_end_io(bh, 1); - return error; } -static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, - get_block_t get_block) +/** + * __dax_fault - handle a page fault on a DAX file + * @vma: The virtual memory area where the fault occurred + * @vmf: The description of the fault + * @get_block: The filesystem method used to translate file offsets to blocks + * + * When a page fault occurs, filesystems may call this helper in their + * fault handler for DAX files. __dax_fault() assumes the caller has done all + * the necessary locking for the page fault to proceed successfully. + */ +int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, + get_block_t get_block, dax_iodone_t complete_unwritten) { struct file *file = vma->vm_file; struct address_space *mapping = file->f_mapping; @@ -417,7 +424,19 @@ static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, page_cache_release(page); } + /* + * If we successfully insert the new mapping over an unwritten extent, + * we need to ensure we convert the unwritten extent. If there is an + * error inserting the mapping, the filesystem needs to leave it as + * unwritten to prevent exposure of the stale underlying data to + * userspace, but we still need to call the completion function so + * the private resources on the mapping buffer can be released. We + * indicate what the callback should do via the uptodate variable, same + * as for normal BH based IO completions. + */ error = dax_insert_mapping(inode, &bh, vma, vmf); + if (buffer_unwritten(&bh)) + complete_unwritten(&bh, !error); out: if (error == -ENOMEM) @@ -434,6 +453,7 @@ static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, } goto out; } +EXPORT_SYMBOL(__dax_fault); /** * dax_fault - handle a page fault on a DAX file @@ -445,7 +465,7 @@ static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, * fault handler for DAX files. */ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, - get_block_t get_block) + get_block_t get_block, dax_iodone_t complete_unwritten) { int result; struct super_block *sb = file_inode(vma->vm_file)->i_sb; @@ -454,7 +474,7 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, sb_start_pagefault(sb); file_update_time(vma->vm_file); } - result = do_dax_fault(vma, vmf, get_block); + result = __dax_fault(vma, vmf, get_block, complete_unwritten); if (vmf->flags & FAULT_FLAG_WRITE) sb_end_pagefault(sb); diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 3a0a6c6406d0..3b57c9f83c9b 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -28,12 +28,12 @@ #ifdef CONFIG_FS_DAX static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { - return dax_fault(vma, vmf, ext2_get_block); + return dax_fault(vma, vmf, ext2_get_block, NULL); } static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { - return dax_mkwrite(vma, vmf, ext2_get_block); + return dax_mkwrite(vma, vmf, ext2_get_block, NULL); } static const struct vm_operations_struct ext2_dax_vm_ops = { diff --git a/fs/ext4/file.c b/fs/ext4/file.c index ac517f15741c..bc313ac5d3fa 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -192,15 +192,27 @@ out: } #ifdef CONFIG_FS_DAX +static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate) +{ + struct inode *inode = bh->b_assoc_map->host; + /* XXX: breaks on 32-bit > 16GB. Is that even supported? */ + loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits; + int err; + if (!uptodate) + return; + WARN_ON(!buffer_unwritten(bh)); + err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size); +} + static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { - return dax_fault(vma, vmf, ext4_get_block); + return dax_fault(vma, vmf, ext4_get_block, ext4_end_io_unwritten); /* Is this the right get_block? */ } static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { - return dax_mkwrite(vma, vmf, ext4_get_block); + return dax_mkwrite(vma, vmf, ext4_get_block, ext4_end_io_unwritten); } static const struct vm_operations_struct ext4_dax_vm_ops = { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f8a8d4ee7459..41f8e55afcd1 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -656,18 +656,6 @@ has_zeroout: return retval; } -static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate) -{ - struct inode *inode = bh->b_assoc_map->host; - /* XXX: breaks on 32-bit > 16GB. Is that even supported? */ - loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits; - int err; - if (!uptodate) - return; - WARN_ON(!buffer_unwritten(bh)); - err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size); -} - /* Maximum number of blocks we map for direct IO at once. */ #define DIO_MAX_BLOCKS 4096 @@ -705,10 +693,15 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, map_bh(bh, inode->i_sb, map.m_pblk); bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; - if (IS_DAX(inode) && buffer_unwritten(bh) && !io_end) { + if (IS_DAX(inode) && buffer_unwritten(bh)) { + /* + * dgc: I suspect unwritten conversion on ext4+DAX is + * fundamentally broken here when there are concurrent + * read/write in progress on this inode. + */ + WARN_ON_ONCE(io_end); bh->b_assoc_map = inode->i_mapping; bh->b_private = (void *)(unsigned long)iblock; - bh->b_end_io = ext4_end_io_unwritten; } if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) set_buffer_defer_completion(bh); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f175b833b6ba..aa62004f1706 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2847,7 +2847,7 @@ static int param_set_portnr(const char *val, const struct kernel_param *kp) *((unsigned int *)kp->arg) = num; return 0; } -static struct kernel_param_ops param_ops_portnr = { +static const struct kernel_param_ops param_ops_portnr = { .set = param_set_portnr, .get = param_get_uint, }; diff --git a/fs/seq_file.c b/fs/seq_file.c index 760e25dad985..1d9c1cbd4d0b 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -541,6 +541,7 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, const char *esc) return res; } +EXPORT_SYMBOL(seq_dentry); static void *single_start(struct seq_file *p, loff_t *pos) { diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 516162be1398..f9e9ffe6fb46 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -149,13 +149,27 @@ xfs_alloc_compute_aligned( { xfs_agblock_t bno; xfs_extlen_t len; + xfs_extlen_t diff; /* Trim busy sections out of found extent */ xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len); + /* + * If we have a largish extent that happens to start before min_agbno, + * see if we can shift it into range... + */ + if (bno < args->min_agbno && bno + len > args->min_agbno) { + diff = args->min_agbno - bno; + if (len > diff) { + bno += diff; + len -= diff; + } + } + if (args->alignment > 1 && len >= args->minlen) { xfs_agblock_t aligned_bno = roundup(bno, args->alignment); - xfs_extlen_t diff = aligned_bno - bno; + + diff = aligned_bno - bno; *resbno = aligned_bno; *reslen = diff >= len ? 0 : len - diff; @@ -795,9 +809,13 @@ xfs_alloc_find_best_extent( * The good extent is closer than this one. */ if (!dir) { + if (*sbnoa > args->max_agbno) + goto out_use_good; if (*sbnoa >= args->agbno + gdiff) goto out_use_good; } else { + if (*sbnoa < args->min_agbno) + goto out_use_good; if (*sbnoa <= args->agbno - gdiff) goto out_use_good; } @@ -884,6 +902,17 @@ xfs_alloc_ag_vextent_near( dofirst = prandom_u32() & 1; #endif + /* handle unitialized agbno range so caller doesn't have to */ + if (!args->min_agbno && !args->max_agbno) + args->max_agbno = args->mp->m_sb.sb_agblocks - 1; + ASSERT(args->min_agbno <= args->max_agbno); + + /* clamp agbno to the range if it's outside */ + if (args->agbno < args->min_agbno) + args->agbno = args->min_agbno; + if (args->agbno > args->max_agbno) + args->agbno = args->max_agbno; + restart: bno_cur_lt = NULL; bno_cur_gt = NULL; @@ -976,6 +1005,8 @@ restart: <bnoa, <lena); if (ltlena < args->minlen) continue; + if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno) + continue; args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); xfs_alloc_fix_len(args); ASSERT(args->len >= args->minlen); @@ -1096,11 +1127,11 @@ restart: XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); xfs_alloc_compute_aligned(args, ltbno, ltlen, <bnoa, <lena); - if (ltlena >= args->minlen) + if (ltlena >= args->minlen && ltbnoa >= args->min_agbno) break; if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) goto error0; - if (!i) { + if (!i || ltbnoa < args->min_agbno) { xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); bno_cur_lt = NULL; @@ -1112,11 +1143,11 @@ restart: XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); xfs_alloc_compute_aligned(args, gtbno, gtlen, >bnoa, >lena); - if (gtlena >= args->minlen) + if (gtlena >= args->minlen && gtbnoa <= args->max_agbno) break; if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) goto error0; - if (!i) { + if (!i || gtbnoa > args->max_agbno) { xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR); bno_cur_gt = NULL; @@ -1216,6 +1247,7 @@ restart: ASSERT(ltnew >= ltbno); ASSERT(ltnew + rlen <= ltbnoa + ltlena); ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); + ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno); args->agbno = ltnew; if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, @@ -1825,11 +1857,11 @@ xfs_alloc_compute_maxlevels( xfs_extlen_t xfs_alloc_longest_free_extent( struct xfs_mount *mp, - struct xfs_perag *pag) + struct xfs_perag *pag, + xfs_extlen_t need) { - xfs_extlen_t need, delta = 0; + xfs_extlen_t delta = 0; - need = XFS_MIN_FREELIST_PAG(pag, mp); if (need > pag->pagf_flcount) delta = need - pag->pagf_flcount; @@ -1838,131 +1870,150 @@ xfs_alloc_longest_free_extent( return pag->pagf_flcount > 0 || pag->pagf_longest > 0; } +unsigned int +xfs_alloc_min_freelist( + struct xfs_mount *mp, + struct xfs_perag *pag) +{ + unsigned int min_free; + + /* space needed by-bno freespace btree */ + min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1, + mp->m_ag_maxlevels); + /* space needed by-size freespace btree */ + min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1, + mp->m_ag_maxlevels); + + return min_free; +} + +/* + * Check if the operation we are fixing up the freelist for should go ahead or + * not. If we are freeing blocks, we always allow it, otherwise the allocation + * is dependent on whether the size and shape of free space available will + * permit the requested allocation to take place. + */ +static bool +xfs_alloc_space_available( + struct xfs_alloc_arg *args, + xfs_extlen_t min_free, + int flags) +{ + struct xfs_perag *pag = args->pag; + xfs_extlen_t longest; + int available; + + if (flags & XFS_ALLOC_FLAG_FREEING) + return true; + + /* do we have enough contiguous free space for the allocation? */ + longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free); + if ((args->minlen + args->alignment + args->minalignslop - 1) > longest) + return false; + + /* do have enough free space remaining for the allocation? */ + available = (int)(pag->pagf_freeblks + pag->pagf_flcount - + min_free - args->total); + if (available < (int)args->minleft) + return false; + + return true; +} + /* * Decide whether to use this allocation group for this allocation. * If so, fix up the btree freelist's size. */ STATIC int /* error */ xfs_alloc_fix_freelist( - xfs_alloc_arg_t *args, /* allocation argument structure */ - int flags) /* XFS_ALLOC_FLAG_... */ + struct xfs_alloc_arg *args, /* allocation argument structure */ + int flags) /* XFS_ALLOC_FLAG_... */ { - xfs_buf_t *agbp; /* agf buffer pointer */ - xfs_agf_t *agf; /* a.g. freespace structure pointer */ - xfs_buf_t *agflbp;/* agfl buffer pointer */ - xfs_agblock_t bno; /* freelist block */ - xfs_extlen_t delta; /* new blocks needed in freelist */ - int error; /* error result code */ - xfs_extlen_t longest;/* longest extent in allocation group */ - xfs_mount_t *mp; /* file system mount point structure */ - xfs_extlen_t need; /* total blocks needed in freelist */ - xfs_perag_t *pag; /* per-ag information structure */ - xfs_alloc_arg_t targs; /* local allocation arguments */ - xfs_trans_t *tp; /* transaction pointer */ - - mp = args->mp; + struct xfs_mount *mp = args->mp; + struct xfs_perag *pag = args->pag; + struct xfs_trans *tp = args->tp; + struct xfs_buf *agbp = NULL; + struct xfs_buf *agflbp = NULL; + struct xfs_alloc_arg targs; /* local allocation arguments */ + xfs_agblock_t bno; /* freelist block */ + xfs_extlen_t need; /* total blocks needed in freelist */ + int error; - pag = args->pag; - tp = args->tp; if (!pag->pagf_init) { - if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags, - &agbp))) - return error; + error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp); + if (error) + goto out_no_agbp; if (!pag->pagf_init) { ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); - args->agbp = NULL; - return 0; + goto out_agbp_relse; } - } else - agbp = NULL; + } /* - * If this is a metadata preferred pag and we are user data - * then try somewhere else if we are not being asked to - * try harder at this point + * If this is a metadata preferred pag and we are user data then try + * somewhere else if we are not being asked to try harder at this + * point */ if (pag->pagf_metadata && args->userdata && (flags & XFS_ALLOC_FLAG_TRYLOCK)) { ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); - args->agbp = NULL; - return 0; + goto out_agbp_relse; } - if (!(flags & XFS_ALLOC_FLAG_FREEING)) { - /* - * If it looks like there isn't a long enough extent, or enough - * total blocks, reject it. - */ - need = XFS_MIN_FREELIST_PAG(pag, mp); - longest = xfs_alloc_longest_free_extent(mp, pag); - if ((args->minlen + args->alignment + args->minalignslop - 1) > - longest || - ((int)(pag->pagf_freeblks + pag->pagf_flcount - - need - args->total) < (int)args->minleft)) { - if (agbp) - xfs_trans_brelse(tp, agbp); - args->agbp = NULL; - return 0; - } - } + need = xfs_alloc_min_freelist(mp, pag); + if (!xfs_alloc_space_available(args, need, flags)) + goto out_agbp_relse; /* * Get the a.g. freespace buffer. * Can fail if we're not blocking on locks, and it's held. */ - if (agbp == NULL) { - if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags, - &agbp))) - return error; - if (agbp == NULL) { + if (!agbp) { + error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp); + if (error) + goto out_no_agbp; + if (!agbp) { ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); - args->agbp = NULL; - return 0; - } - } - /* - * Figure out how many blocks we should have in the freelist. - */ - agf = XFS_BUF_TO_AGF(agbp); - need = XFS_MIN_FREELIST(agf, mp); - /* - * If there isn't enough total or single-extent, reject it. - */ - if (!(flags & XFS_ALLOC_FLAG_FREEING)) { - delta = need > be32_to_cpu(agf->agf_flcount) ? - (need - be32_to_cpu(agf->agf_flcount)) : 0; - longest = be32_to_cpu(agf->agf_longest); - longest = (longest > delta) ? (longest - delta) : - (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0); - if ((args->minlen + args->alignment + args->minalignslop - 1) > - longest || - ((int)(be32_to_cpu(agf->agf_freeblks) + - be32_to_cpu(agf->agf_flcount) - need - args->total) < - (int)args->minleft)) { - xfs_trans_brelse(tp, agbp); - args->agbp = NULL; - return 0; + goto out_no_agbp; } } + + /* If there isn't enough total space or single-extent, reject it. */ + need = xfs_alloc_min_freelist(mp, pag); + if (!xfs_alloc_space_available(args, need, flags)) + goto out_agbp_relse; + /* * Make the freelist shorter if it's too long. + * + * Note that from this point onwards, we will always release the agf and + * agfl buffers on error. This handles the case where we error out and + * the buffers are clean or may not have been joined to the transaction + * and hence need to be released manually. If they have been joined to + * the transaction, then xfs_trans_brelse() will handle them + * appropriately based on the recursion count and dirty state of the + * buffer. + * + * XXX (dgc): When we have lots of free space, does this buy us + * anything other than extra overhead when we need to put more blocks + * back on the free list? Maybe we should only do this when space is + * getting low or the AGFL is more than half full? */ - while (be32_to_cpu(agf->agf_flcount) > need) { - xfs_buf_t *bp; + while (pag->pagf_flcount > need) { + struct xfs_buf *bp; error = xfs_alloc_get_freelist(tp, agbp, &bno, 0); if (error) - return error; - if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1))) - return error; + goto out_agbp_relse; + error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1); + if (error) + goto out_agbp_relse; bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); xfs_trans_binval(tp, bp); } - /* - * Initialize the args structure. - */ + memset(&targs, 0, sizeof(targs)); targs.tp = tp; targs.mp = mp; @@ -1971,21 +2022,20 @@ xfs_alloc_fix_freelist( targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; targs.type = XFS_ALLOCTYPE_THIS_AG; targs.pag = pag; - if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp))) - return error; - /* - * Make the freelist longer if it's too short. - */ - while (be32_to_cpu(agf->agf_flcount) < need) { + error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp); + if (error) + goto out_agbp_relse; + + /* Make the freelist longer if it's too short. */ + while (pag->pagf_flcount < need) { targs.agbno = 0; - targs.maxlen = need - be32_to_cpu(agf->agf_flcount); - /* - * Allocate as many blocks as possible at once. - */ - if ((error = xfs_alloc_ag_vextent(&targs))) { - xfs_trans_brelse(tp, agflbp); - return error; - } + targs.maxlen = need - pag->pagf_flcount; + + /* Allocate as many blocks as possible at once. */ + error = xfs_alloc_ag_vextent(&targs); + if (error) + goto out_agflbp_relse; + /* * Stop if we run out. Won't happen if callers are obeying * the restrictions correctly. Can happen for free calls @@ -1994,9 +2044,7 @@ xfs_alloc_fix_freelist( if (targs.agbno == NULLAGBLOCK) { if (flags & XFS_ALLOC_FLAG_FREEING) break; - xfs_trans_brelse(tp, agflbp); - args->agbp = NULL; - return 0; + goto out_agflbp_relse; } /* * Put each allocated block on the list. @@ -2005,12 +2053,21 @@ xfs_alloc_fix_freelist( error = xfs_alloc_put_freelist(tp, agbp, agflbp, bno, 0); if (error) - return error; + goto out_agflbp_relse; } } xfs_trans_brelse(tp, agflbp); args->agbp = agbp; return 0; + +out_agflbp_relse: + xfs_trans_brelse(tp, agflbp); +out_agbp_relse: + if (agbp) + xfs_trans_brelse(tp, agbp); +out_no_agbp: + args->agbp = NULL; + return error; } /* diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index d1b4b6a5c894..ca1c8168373a 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -112,6 +112,8 @@ typedef struct xfs_alloc_arg { xfs_extlen_t total; /* total blocks needed in xaction */ xfs_extlen_t alignment; /* align answer to multiple of this */ xfs_extlen_t minalignslop; /* slop for minlen+alignment calcs */ + xfs_agblock_t min_agbno; /* set an agbno range for NEAR allocs */ + xfs_agblock_t max_agbno; /* ... */ xfs_extlen_t len; /* output: actual size of extent */ xfs_alloctype_t type; /* allocation type XFS_ALLOCTYPE_... */ xfs_alloctype_t otype; /* original allocation type */ @@ -128,11 +130,9 @@ typedef struct xfs_alloc_arg { #define XFS_ALLOC_USERDATA 1 /* allocation is for user data*/ #define XFS_ALLOC_INITIAL_USER_DATA 2 /* special case start of file */ -/* - * Find the length of the longest extent in an AG. - */ -xfs_extlen_t -xfs_alloc_longest_free_extent(struct xfs_mount *mp, +xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp, + struct xfs_perag *pag, xfs_extlen_t need); +unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp, struct xfs_perag *pag); /* diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 0a472fbe06d4..3349c9a1e845 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -266,7 +266,7 @@ xfs_attr_set( tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; error = xfs_trans_reserve(args.trans, &tres, args.total, 0); if (error) { - xfs_trans_cancel(args.trans, 0); + xfs_trans_cancel(args.trans); return error; } xfs_ilock(dp, XFS_ILOCK_EXCL); @@ -276,7 +276,7 @@ xfs_attr_set( XFS_QMOPT_RES_REGBLKS); if (error) { xfs_iunlock(dp, XFS_ILOCK_EXCL); - xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES); + xfs_trans_cancel(args.trans); return error; } @@ -320,8 +320,7 @@ xfs_attr_set( xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); } - err2 = xfs_trans_commit(args.trans, - XFS_TRANS_RELEASE_LOG_RES); + err2 = xfs_trans_commit(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error ? error : err2; @@ -383,16 +382,14 @@ xfs_attr_set( * Commit the last in the sequence of transactions. */ xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); - error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error; out: - if (args.trans) { - xfs_trans_cancel(args.trans, - XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); - } + if (args.trans) + xfs_trans_cancel(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error; } @@ -462,7 +459,7 @@ xfs_attr_remove( error = xfs_trans_reserve(args.trans, &M_RES(mp)->tr_attrrm, XFS_ATTRRM_SPACE_RES(mp), 0); if (error) { - xfs_trans_cancel(args.trans, 0); + xfs_trans_cancel(args.trans); return error; } @@ -501,16 +498,14 @@ xfs_attr_remove( * Commit the last in the sequence of transactions. */ xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); - error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error; out: - if (args.trans) { - xfs_trans_cancel(args.trans, - XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); - } + if (args.trans) + xfs_trans_cancel(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error; } diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index f1026e86dabc..63e05b663380 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1112,7 +1112,6 @@ xfs_bmap_add_attrfork( int committed; /* xaction was committed */ int logflags; /* logging flags */ int error; /* error return value */ - int cancel_flags = 0; ASSERT(XFS_IFORK_Q(ip) == 0); @@ -1124,17 +1123,15 @@ xfs_bmap_add_attrfork( tp->t_flags |= XFS_TRANS_RESERVE; error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0); if (error) { - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); return error; } - cancel_flags = XFS_TRANS_RELEASE_LOG_RES; xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : XFS_QMOPT_RES_REGBLKS); if (error) goto trans_cancel; - cancel_flags |= XFS_TRANS_ABORT; if (XFS_IFORK_Q(ip)) goto trans_cancel; if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) { @@ -1218,14 +1215,14 @@ xfs_bmap_add_attrfork( error = xfs_bmap_finish(&tp, &flist, &committed); if (error) goto bmap_cancel; - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; bmap_cancel: xfs_bmap_cancel(&flist); trans_cancel: - xfs_trans_cancel(tp, cancel_flags); + xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } @@ -3521,7 +3518,8 @@ xfs_bmap_longest_free_extent( } } - longest = xfs_alloc_longest_free_extent(mp, pag); + longest = xfs_alloc_longest_free_extent(mp, pag, + xfs_alloc_min_freelist(mp, pag)); if (*blen < longest) *blen = longest; @@ -4424,7 +4422,15 @@ xfs_bmapi_convert_unwritten( error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, &bma->cur, mval, bma->firstblock, bma->flist, &tmp_logflags); - bma->logflags |= tmp_logflags; + /* + * Log the inode core unconditionally in the unwritten extent conversion + * path because the conversion might not have done so (e.g., if the + * extent count hasn't changed). We need to make sure the inode is dirty + * in the transaction for the sake of fsync(), even if nothing has + * changed, because fsync() will not force the log for this transaction + * unless it sees the inode pinned. + */ + bma->logflags |= tmp_logflags | XFS_ILOG_CORE; if (error) return error; @@ -5918,7 +5924,7 @@ xfs_bmap_split_extent( error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, XFS_DIOSTRAT_SPACE_RES(mp, 0), 0); if (error) { - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); return error; } @@ -5936,10 +5942,9 @@ xfs_bmap_split_extent( if (error) goto out; - return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); - + return xfs_trans_commit(tp); out: - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); + xfs_trans_cancel(tp); return error; } diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 4daaa662337b..a0ae572051de 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -170,7 +170,7 @@ typedef struct xfs_sb { __uint32_t sb_features_log_incompat; __uint32_t sb_crc; /* superblock crc */ - __uint32_t sb_pad; + xfs_extlen_t sb_spino_align; /* sparse inode chunk alignment */ xfs_ino_t sb_pquotino; /* project quota inode */ xfs_lsn_t sb_lsn; /* last write sequence */ @@ -256,7 +256,7 @@ typedef struct xfs_dsb { __be32 sb_features_log_incompat; __le32 sb_crc; /* superblock crc */ - __be32 sb_pad; + __be32 sb_spino_align; /* sparse inode chunk alignment */ __be64 sb_pquotino; /* project quota inode */ __be64 sb_lsn; /* last write sequence */ @@ -457,8 +457,10 @@ xfs_sb_has_ro_compat_feature( } #define XFS_SB_FEAT_INCOMPAT_FTYPE (1 << 0) /* filetype in dirent */ +#define XFS_SB_FEAT_INCOMPAT_SPINODES (1 << 1) /* sparse inode chunks */ #define XFS_SB_FEAT_INCOMPAT_ALL \ - (XFS_SB_FEAT_INCOMPAT_FTYPE) + (XFS_SB_FEAT_INCOMPAT_FTYPE| \ + XFS_SB_FEAT_INCOMPAT_SPINODES) #define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL static inline bool @@ -506,6 +508,12 @@ static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp) (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT); } +static inline bool xfs_sb_version_hassparseinodes(struct xfs_sb *sbp) +{ + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && + xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_SPINODES); +} + /* * end of superblock version macros */ @@ -758,19 +766,6 @@ typedef struct xfs_agfl { #define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc) - -#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) -#define XFS_MIN_FREELIST_RAW(bl,cl,mp) \ - (MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + MIN(cl + 1, XFS_AG_MAXLEVELS(mp))) -#define XFS_MIN_FREELIST(a,mp) \ - (XFS_MIN_FREELIST_RAW( \ - be32_to_cpu((a)->agf_levels[XFS_BTNUM_BNOi]), \ - be32_to_cpu((a)->agf_levels[XFS_BTNUM_CNTi]), mp)) -#define XFS_MIN_FREELIST_PAG(pag,mp) \ - (XFS_MIN_FREELIST_RAW( \ - (unsigned int)(pag)->pagf_levels[XFS_BTNUM_BNOi], \ - (unsigned int)(pag)->pagf_levels[XFS_BTNUM_CNTi], mp)) - #define XFS_AGB_TO_FSB(mp,agno,agbno) \ (((xfs_fsblock_t)(agno) << (mp)->m_sb.sb_agblklog) | (agbno)) #define XFS_FSB_TO_AGNO(mp,fsbno) \ @@ -1216,26 +1211,54 @@ typedef __uint64_t xfs_inofree_t; #define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1) #define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i)) +#define XFS_INOBT_HOLEMASK_FULL 0 /* holemask for full chunk */ +#define XFS_INOBT_HOLEMASK_BITS (NBBY * sizeof(__uint16_t)) +#define XFS_INODES_PER_HOLEMASK_BIT \ + (XFS_INODES_PER_CHUNK / (NBBY * sizeof(__uint16_t))) + static inline xfs_inofree_t xfs_inobt_maskn(int i, int n) { return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i; } /* - * Data record structure + * The on-disk inode record structure has two formats. The original "full" + * format uses a 4-byte freecount. The "sparse" format uses a 1-byte freecount + * and replaces the 3 high-order freecount bytes wth the holemask and inode + * count. + * + * The holemask of the sparse record format allows an inode chunk to have holes + * that refer to blocks not owned by the inode record. This facilitates inode + * allocation in the event of severe free space fragmentation. */ typedef struct xfs_inobt_rec { __be32 ir_startino; /* starting inode number */ - __be32 ir_freecount; /* count of free inodes (set bits) */ + union { + struct { + __be32 ir_freecount; /* count of free inodes */ + } f; + struct { + __be16 ir_holemask;/* hole mask for sparse chunks */ + __u8 ir_count; /* total inode count */ + __u8 ir_freecount; /* count of free inodes */ + } sp; + } ir_u; __be64 ir_free; /* free inode mask */ } xfs_inobt_rec_t; typedef struct xfs_inobt_rec_incore { xfs_agino_t ir_startino; /* starting inode number */ - __int32_t ir_freecount; /* count of free inodes (set bits) */ + __uint16_t ir_holemask; /* hole mask for sparse chunks */ + __uint8_t ir_count; /* total inode count */ + __uint8_t ir_freecount; /* count of free inodes (set bits) */ xfs_inofree_t ir_free; /* free inode mask */ } xfs_inobt_rec_incore_t; +static inline bool xfs_inobt_issparse(uint16_t holemask) +{ + /* non-zero holemask represents a sparse rec. */ + return holemask; +} /* * Key structure @@ -1453,8 +1476,8 @@ struct xfs_acl { sizeof(struct xfs_acl_entry) * XFS_ACL_MAX_ENTRIES((mp))) /* On-disk XFS extended attribute names */ -#define SGI_ACL_FILE (unsigned char *)"SGI_ACL_FILE" -#define SGI_ACL_DEFAULT (unsigned char *)"SGI_ACL_DEFAULT" +#define SGI_ACL_FILE "SGI_ACL_FILE" +#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT" #define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1) #define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 18dc721ca19f..89689c6a43e2 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -239,6 +239,7 @@ typedef struct xfs_fsop_resblks { #define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */ #define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */ #define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */ +#define XFS_FSOP_GEOM_FLAGS_SPINODES 0x40000 /* sparse inode chunks */ /* * Minimum and maximum sizes need for growth checks. diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 1c9e75521250..66efc702452a 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -65,6 +65,8 @@ xfs_inobt_lookup( int *stat) /* success/failure */ { cur->bc_rec.i.ir_startino = ino; + cur->bc_rec.i.ir_holemask = 0; + cur->bc_rec.i.ir_count = 0; cur->bc_rec.i.ir_freecount = 0; cur->bc_rec.i.ir_free = 0; return xfs_btree_lookup(cur, dir, stat); @@ -82,7 +84,14 @@ xfs_inobt_update( union xfs_btree_rec rec; rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino); - rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount); + if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) { + rec.inobt.ir_u.sp.ir_holemask = cpu_to_be16(irec->ir_holemask); + rec.inobt.ir_u.sp.ir_count = irec->ir_count; + rec.inobt.ir_u.sp.ir_freecount = irec->ir_freecount; + } else { + /* ir_holemask/ir_count not supported on-disk */ + rec.inobt.ir_u.f.ir_freecount = cpu_to_be32(irec->ir_freecount); + } rec.inobt.ir_free = cpu_to_be64(irec->ir_free); return xfs_btree_update(cur, &rec); } @@ -100,12 +109,27 @@ xfs_inobt_get_rec( int error; error = xfs_btree_get_rec(cur, &rec, stat); - if (!error && *stat == 1) { - irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino); - irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount); - irec->ir_free = be64_to_cpu(rec->inobt.ir_free); + if (error || *stat == 0) + return error; + + irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino); + if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) { + irec->ir_holemask = be16_to_cpu(rec->inobt.ir_u.sp.ir_holemask); + irec->ir_count = rec->inobt.ir_u.sp.ir_count; + irec->ir_freecount = rec->inobt.ir_u.sp.ir_freecount; + } else { + /* + * ir_holemask/ir_count not supported on-disk. Fill in hardcoded + * values for full inode chunks. + */ + irec->ir_holemask = XFS_INOBT_HOLEMASK_FULL; + irec->ir_count = XFS_INODES_PER_CHUNK; + irec->ir_freecount = + be32_to_cpu(rec->inobt.ir_u.f.ir_freecount); } - return error; + irec->ir_free = be64_to_cpu(rec->inobt.ir_free); + + return 0; } /* @@ -114,10 +138,14 @@ xfs_inobt_get_rec( STATIC int xfs_inobt_insert_rec( struct xfs_btree_cur *cur, + __uint16_t holemask, + __uint8_t count, __int32_t freecount, xfs_inofree_t free, int *stat) { + cur->bc_rec.i.ir_holemask = holemask; + cur->bc_rec.i.ir_count = count; cur->bc_rec.i.ir_freecount = freecount; cur->bc_rec.i.ir_free = free; return xfs_btree_insert(cur, stat); @@ -154,7 +182,9 @@ xfs_inobt_insert( } ASSERT(i == 0); - error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK, + error = xfs_inobt_insert_rec(cur, XFS_INOBT_HOLEMASK_FULL, + XFS_INODES_PER_CHUNK, + XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i); if (error) { xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); @@ -220,6 +250,7 @@ xfs_ialloc_inode_init( struct xfs_mount *mp, struct xfs_trans *tp, struct list_head *buffer_list, + int icount, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_agblock_t length, @@ -275,7 +306,7 @@ xfs_ialloc_inode_init( * they track in the AIL as if they were physically logged. */ if (tp) - xfs_icreate_log(tp, agno, agbno, mp->m_ialloc_inos, + xfs_icreate_log(tp, agno, agbno, icount, mp->m_sb.sb_inodesize, length, gen); } else version = 2; @@ -347,6 +378,214 @@ xfs_ialloc_inode_init( } /* + * Align startino and allocmask for a recently allocated sparse chunk such that + * they are fit for insertion (or merge) into the on-disk inode btrees. + * + * Background: + * + * When enabled, sparse inode support increases the inode alignment from cluster + * size to inode chunk size. This means that the minimum range between two + * non-adjacent inode records in the inobt is large enough for a full inode + * record. This allows for cluster sized, cluster aligned block allocation + * without need to worry about whether the resulting inode record overlaps with + * another record in the tree. Without this basic rule, we would have to deal + * with the consequences of overlap by potentially undoing recent allocations in + * the inode allocation codepath. + * + * Because of this alignment rule (which is enforced on mount), there are two + * inobt possibilities for newly allocated sparse chunks. One is that the + * aligned inode record for the chunk covers a range of inodes not already + * covered in the inobt (i.e., it is safe to insert a new sparse record). The + * other is that a record already exists at the aligned startino that considers + * the newly allocated range as sparse. In the latter case, record content is + * merged in hope that sparse inode chunks fill to full chunks over time. + */ +STATIC void +xfs_align_sparse_ino( + struct xfs_mount *mp, + xfs_agino_t *startino, + uint16_t *allocmask) +{ + xfs_agblock_t agbno; + xfs_agblock_t mod; + int offset; + + agbno = XFS_AGINO_TO_AGBNO(mp, *startino); + mod = agbno % mp->m_sb.sb_inoalignmt; + if (!mod) + return; + + /* calculate the inode offset and align startino */ + offset = mod << mp->m_sb.sb_inopblog; + *startino -= offset; + + /* + * Since startino has been aligned down, left shift allocmask such that + * it continues to represent the same physical inodes relative to the + * new startino. + */ + *allocmask <<= offset / XFS_INODES_PER_HOLEMASK_BIT; +} + +/* + * Determine whether the source inode record can merge into the target. Both + * records must be sparse, the inode ranges must match and there must be no + * allocation overlap between the records. + */ +STATIC bool +__xfs_inobt_can_merge( + struct xfs_inobt_rec_incore *trec, /* tgt record */ + struct xfs_inobt_rec_incore *srec) /* src record */ +{ + uint64_t talloc; + uint64_t salloc; + + /* records must cover the same inode range */ + if (trec->ir_startino != srec->ir_startino) + return false; + + /* both records must be sparse */ + if (!xfs_inobt_issparse(trec->ir_holemask) || + !xfs_inobt_issparse(srec->ir_holemask)) + return false; + + /* both records must track some inodes */ + if (!trec->ir_count || !srec->ir_count) + return false; + + /* can't exceed capacity of a full record */ + if (trec->ir_count + srec->ir_count > XFS_INODES_PER_CHUNK) + return false; + + /* verify there is no allocation overlap */ + talloc = xfs_inobt_irec_to_allocmask(trec); + salloc = xfs_inobt_irec_to_allocmask(srec); + if (talloc & salloc) + return false; + + return true; +} + +/* + * Merge the source inode record into the target. The caller must call + * __xfs_inobt_can_merge() to ensure the merge is valid. + */ +STATIC void +__xfs_inobt_rec_merge( + struct xfs_inobt_rec_incore *trec, /* target */ + struct xfs_inobt_rec_incore *srec) /* src */ +{ + ASSERT(trec->ir_startino == srec->ir_startino); + + /* combine the counts */ + trec->ir_count += srec->ir_count; + trec->ir_freecount += srec->ir_freecount; + + /* + * Merge the holemask and free mask. For both fields, 0 bits refer to + * allocated inodes. We combine the allocated ranges with bitwise AND. + */ + trec->ir_holemask &= srec->ir_holemask; + trec->ir_free &= srec->ir_free; +} + +/* + * Insert a new sparse inode chunk into the associated inode btree. The inode + * record for the sparse chunk is pre-aligned to a startino that should match + * any pre-existing sparse inode record in the tree. This allows sparse chunks + * to fill over time. + * + * This function supports two modes of handling preexisting records depending on + * the merge flag. If merge is true, the provided record is merged with the + * existing record and updated in place. The merged record is returned in nrec. + * If merge is false, an existing record is replaced with the provided record. + * If no preexisting record exists, the provided record is always inserted. + * + * It is considered corruption if a merge is requested and not possible. Given + * the sparse inode alignment constraints, this should never happen. + */ +STATIC int +xfs_inobt_insert_sprec( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buf *agbp, + int btnum, + struct xfs_inobt_rec_incore *nrec, /* in/out: new/merged rec. */ + bool merge) /* merge or replace */ +{ + struct xfs_btree_cur *cur; + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); + int error; + int i; + struct xfs_inobt_rec_incore rec; + + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum); + + /* the new record is pre-aligned so we know where to look */ + error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i); + if (error) + goto error; + /* if nothing there, insert a new record and return */ + if (i == 0) { + error = xfs_inobt_insert_rec(cur, nrec->ir_holemask, + nrec->ir_count, nrec->ir_freecount, + nrec->ir_free, &i); + if (error) + goto error; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error); + + goto out; + } + + /* + * A record exists at this startino. Merge or replace the record + * depending on what we've been asked to do. + */ + if (merge) { + error = xfs_inobt_get_rec(cur, &rec, &i); + if (error) + goto error; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error); + XFS_WANT_CORRUPTED_GOTO(mp, + rec.ir_startino == nrec->ir_startino, + error); + + /* + * This should never fail. If we have coexisting records that + * cannot merge, something is seriously wrong. + */ + XFS_WANT_CORRUPTED_GOTO(mp, __xfs_inobt_can_merge(nrec, &rec), + error); + + trace_xfs_irec_merge_pre(mp, agno, rec.ir_startino, + rec.ir_holemask, nrec->ir_startino, + nrec->ir_holemask); + + /* merge to nrec to output the updated record */ + __xfs_inobt_rec_merge(nrec, &rec); + + trace_xfs_irec_merge_post(mp, agno, nrec->ir_startino, + nrec->ir_holemask); + + error = xfs_inobt_rec_check_count(mp, nrec); + if (error) + goto error; + } + + error = xfs_inobt_update(cur, nrec); + if (error) + goto error; + +out: + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + return 0; +error: + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; +} + +/* * Allocate new inodes in the allocation group specified by agbp. * Return 0 for success, else error code. */ @@ -364,11 +603,22 @@ xfs_ialloc_ag_alloc( xfs_agino_t newlen; /* new number of inodes */ int isaligned = 0; /* inode allocation at stripe unit */ /* boundary */ + uint16_t allocmask = (uint16_t) -1; /* init. to full chunk */ + struct xfs_inobt_rec_incore rec; struct xfs_perag *pag; + int do_sparse = 0; memset(&args, 0, sizeof(args)); args.tp = tp; args.mp = tp->t_mountp; + args.fsbno = NULLFSBLOCK; + +#ifdef DEBUG + /* randomly do sparse inode allocations */ + if (xfs_sb_version_hassparseinodes(&tp->t_mountp->m_sb) && + args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks) + do_sparse = prandom_u32() & 1; +#endif /* * Locking will ensure that we don't have two callers in here @@ -390,6 +640,8 @@ xfs_ialloc_ag_alloc( agno = be32_to_cpu(agi->agi_seqno); args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + args.mp->m_ialloc_blks; + if (do_sparse) + goto sparse_alloc; if (likely(newino != NULLAGINO && (args.agbno < be32_to_cpu(agi->agi_length)))) { args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); @@ -428,8 +680,7 @@ xfs_ialloc_ag_alloc( * subsequent requests. */ args.minalignslop = 0; - } else - args.fsbno = NULLFSBLOCK; + } if (unlikely(args.fsbno == NULLFSBLOCK)) { /* @@ -480,6 +731,47 @@ xfs_ialloc_ag_alloc( return error; } + /* + * Finally, try a sparse allocation if the filesystem supports it and + * the sparse allocation length is smaller than a full chunk. + */ + if (xfs_sb_version_hassparseinodes(&args.mp->m_sb) && + args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks && + args.fsbno == NULLFSBLOCK) { +sparse_alloc: + args.type = XFS_ALLOCTYPE_NEAR_BNO; + args.agbno = be32_to_cpu(agi->agi_root); + args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); + args.alignment = args.mp->m_sb.sb_spino_align; + args.prod = 1; + + args.minlen = args.mp->m_ialloc_min_blks; + args.maxlen = args.minlen; + + /* + * The inode record will be aligned to full chunk size. We must + * prevent sparse allocation from AG boundaries that result in + * invalid inode records, such as records that start at agbno 0 + * or extend beyond the AG. + * + * Set min agbno to the first aligned, non-zero agbno and max to + * the last aligned agbno that is at least one full chunk from + * the end of the AG. + */ + args.min_agbno = args.mp->m_sb.sb_inoalignmt; + args.max_agbno = round_down(args.mp->m_sb.sb_agblocks, + args.mp->m_sb.sb_inoalignmt) - + args.mp->m_ialloc_blks; + + error = xfs_alloc_vextent(&args); + if (error) + return error; + + newlen = args.len << args.mp->m_sb.sb_inopblog; + ASSERT(newlen <= XFS_INODES_PER_CHUNK); + allocmask = (1 << (newlen / XFS_INODES_PER_HOLEMASK_BIT)) - 1; + } + if (args.fsbno == NULLFSBLOCK) { *alloc = 0; return 0; @@ -495,8 +787,8 @@ xfs_ialloc_ag_alloc( * rather than a linear progression to prevent the next generation * number from being easily guessable. */ - error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno, - args.len, prandom_u32()); + error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, agno, + args.agbno, args.len, prandom_u32()); if (error) return error; @@ -504,6 +796,73 @@ xfs_ialloc_ag_alloc( * Convert the results. */ newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); + + if (xfs_inobt_issparse(~allocmask)) { + /* + * We've allocated a sparse chunk. Align the startino and mask. + */ + xfs_align_sparse_ino(args.mp, &newino, &allocmask); + + rec.ir_startino = newino; + rec.ir_holemask = ~allocmask; + rec.ir_count = newlen; + rec.ir_freecount = newlen; + rec.ir_free = XFS_INOBT_ALL_FREE; + + /* + * Insert the sparse record into the inobt and allow for a merge + * if necessary. If a merge does occur, rec is updated to the + * merged record. + */ + error = xfs_inobt_insert_sprec(args.mp, tp, agbp, XFS_BTNUM_INO, + &rec, true); + if (error == -EFSCORRUPTED) { + xfs_alert(args.mp, + "invalid sparse inode record: ino 0x%llx holemask 0x%x count %u", + XFS_AGINO_TO_INO(args.mp, agno, + rec.ir_startino), + rec.ir_holemask, rec.ir_count); + xfs_force_shutdown(args.mp, SHUTDOWN_CORRUPT_INCORE); + } + if (error) + return error; + + /* + * We can't merge the part we've just allocated as for the inobt + * due to finobt semantics. The original record may or may not + * exist independent of whether physical inodes exist in this + * sparse chunk. + * + * We must update the finobt record based on the inobt record. + * rec contains the fully merged and up to date inobt record + * from the previous call. Set merge false to replace any + * existing record with this one. + */ + if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) { + error = xfs_inobt_insert_sprec(args.mp, tp, agbp, + XFS_BTNUM_FINO, &rec, + false); + if (error) + return error; + } + } else { + /* full chunk - insert new records to both btrees */ + error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen, + XFS_BTNUM_INO); + if (error) + return error; + + if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) { + error = xfs_inobt_insert(args.mp, tp, agbp, newino, + newlen, XFS_BTNUM_FINO); + if (error) + return error; + } + } + + /* + * Update AGI counts and newino. + */ be32_add_cpu(&agi->agi_count, newlen); be32_add_cpu(&agi->agi_freecount, newlen); pag = xfs_perag_get(args.mp, agno); @@ -512,20 +871,6 @@ xfs_ialloc_ag_alloc( agi->agi_newino = cpu_to_be32(newino); /* - * Insert records describing the new inode chunk into the btrees. - */ - error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen, - XFS_BTNUM_INO); - if (error) - return error; - - if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) { - error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen, - XFS_BTNUM_FINO); - if (error) - return error; - } - /* * Log allocation group header fields */ xfs_ialloc_log_agi(tp, agbp, @@ -645,7 +990,7 @@ xfs_ialloc_ag_select( * if we fail allocation due to alignment issues then it is most * likely a real ENOSPC condition. */ - ineed = mp->m_ialloc_blks; + ineed = mp->m_ialloc_min_blks; if (flags && ineed > 1) ineed += xfs_ialloc_cluster_alignment(mp); longest = pag->pagf_longest; @@ -732,6 +1077,27 @@ xfs_ialloc_get_rec( } /* + * Return the offset of the first free inode in the record. If the inode chunk + * is sparsely allocated, we convert the record holemask to inode granularity + * and mask off the unallocated regions from the inode free mask. + */ +STATIC int +xfs_inobt_first_free_inode( + struct xfs_inobt_rec_incore *rec) +{ + xfs_inofree_t realfree; + + /* if there are no holes, return the first available offset */ + if (!xfs_inobt_issparse(rec->ir_holemask)) + return xfs_lowbit64(rec->ir_free); + + realfree = xfs_inobt_irec_to_allocmask(rec); + realfree &= rec->ir_free; + + return xfs_lowbit64(realfree); +} + +/* * Allocate an inode using the inobt-only algorithm. */ STATIC int @@ -961,7 +1327,7 @@ newino: } alloc_inode: - offset = xfs_lowbit64(rec.ir_free); + offset = xfs_inobt_first_free_inode(&rec); ASSERT(offset >= 0); ASSERT(offset < XFS_INODES_PER_CHUNK); ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % @@ -1210,7 +1576,7 @@ xfs_dialloc_ag( if (error) goto error_cur; - offset = xfs_lowbit64(rec.ir_free); + offset = xfs_inobt_first_free_inode(&rec); ASSERT(offset >= 0); ASSERT(offset < XFS_INODES_PER_CHUNK); ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % @@ -1439,6 +1805,83 @@ out_error: return error; } +/* + * Free the blocks of an inode chunk. We must consider that the inode chunk + * might be sparse and only free the regions that are allocated as part of the + * chunk. + */ +STATIC void +xfs_difree_inode_chunk( + struct xfs_mount *mp, + xfs_agnumber_t agno, + struct xfs_inobt_rec_incore *rec, + struct xfs_bmap_free *flist) +{ + xfs_agblock_t sagbno = XFS_AGINO_TO_AGBNO(mp, rec->ir_startino); + int startidx, endidx; + int nextbit; + xfs_agblock_t agbno; + int contigblk; + DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS); + + if (!xfs_inobt_issparse(rec->ir_holemask)) { + /* not sparse, calculate extent info directly */ + xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno, + XFS_AGINO_TO_AGBNO(mp, rec->ir_startino)), + mp->m_ialloc_blks, flist, mp); + return; + } + + /* holemask is only 16-bits (fits in an unsigned long) */ + ASSERT(sizeof(rec->ir_holemask) <= sizeof(holemask[0])); + holemask[0] = rec->ir_holemask; + + /* + * Find contiguous ranges of zeroes (i.e., allocated regions) in the + * holemask and convert the start/end index of each range to an extent. + * We start with the start and end index both pointing at the first 0 in + * the mask. + */ + startidx = endidx = find_first_zero_bit(holemask, + XFS_INOBT_HOLEMASK_BITS); + nextbit = startidx + 1; + while (startidx < XFS_INOBT_HOLEMASK_BITS) { + nextbit = find_next_zero_bit(holemask, XFS_INOBT_HOLEMASK_BITS, + nextbit); + /* + * If the next zero bit is contiguous, update the end index of + * the current range and continue. + */ + if (nextbit != XFS_INOBT_HOLEMASK_BITS && + nextbit == endidx + 1) { + endidx = nextbit; + goto next; + } + + /* + * nextbit is not contiguous with the current end index. Convert + * the current start/end to an extent and add it to the free + * list. + */ + agbno = sagbno + (startidx * XFS_INODES_PER_HOLEMASK_BIT) / + mp->m_sb.sb_inopblock; + contigblk = ((endidx - startidx + 1) * + XFS_INODES_PER_HOLEMASK_BIT) / + mp->m_sb.sb_inopblock; + + ASSERT(agbno % mp->m_sb.sb_spino_align == 0); + ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); + xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno, agbno), contigblk, + flist, mp); + + /* reset range to current bit and carry on... */ + startidx = endidx = nextbit; + +next: + nextbit++; + } +} + STATIC int xfs_difree_inobt( struct xfs_mount *mp, @@ -1446,8 +1889,7 @@ xfs_difree_inobt( struct xfs_buf *agbp, xfs_agino_t agino, struct xfs_bmap_free *flist, - int *deleted, - xfs_ino_t *first_ino, + struct xfs_icluster *xic, struct xfs_inobt_rec_incore *orec) { struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); @@ -1501,20 +1943,23 @@ xfs_difree_inobt( rec.ir_freecount++; /* - * When an inode cluster is free, it becomes eligible for removal + * When an inode chunk is free, it becomes eligible for removal. Don't + * remove the chunk if the block size is large enough for multiple inode + * chunks (that might not be free). */ if (!(mp->m_flags & XFS_MOUNT_IKEEP) && - (rec.ir_freecount == mp->m_ialloc_inos)) { - - *deleted = 1; - *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); + rec.ir_free == XFS_INOBT_ALL_FREE && + mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) { + xic->deleted = 1; + xic->first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); + xic->alloc = xfs_inobt_irec_to_allocmask(&rec); /* * Remove the inode cluster from the AGI B+Tree, adjust the * AGI and Superblock inode counts, and mark the disk space * to be freed when the transaction is committed. */ - ilen = mp->m_ialloc_inos; + ilen = rec.ir_freecount; be32_add_cpu(&agi->agi_count, -ilen); be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); @@ -1530,11 +1975,9 @@ xfs_difree_inobt( goto error0; } - xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno, - XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)), - mp->m_ialloc_blks, flist, mp); + xfs_difree_inode_chunk(mp, agno, &rec, flist); } else { - *deleted = 0; + xic->deleted = 0; error = xfs_inobt_update(cur, &rec); if (error) { @@ -1599,7 +2042,9 @@ xfs_difree_finobt( */ XFS_WANT_CORRUPTED_GOTO(mp, ibtrec->ir_freecount == 1, error); - error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount, + error = xfs_inobt_insert_rec(cur, ibtrec->ir_holemask, + ibtrec->ir_count, + ibtrec->ir_freecount, ibtrec->ir_free, &i); if (error) goto error; @@ -1634,8 +2079,13 @@ xfs_difree_finobt( * free inode. Hence, if all of the inodes are free and we aren't * keeping inode chunks permanently on disk, remove the record. * Otherwise, update the record with the new information. + * + * Note that we currently can't free chunks when the block size is large + * enough for multiple chunks. Leave the finobt record to remain in sync + * with the inobt. */ - if (rec.ir_freecount == mp->m_ialloc_inos && + if (rec.ir_free == XFS_INOBT_ALL_FREE && + mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK && !(mp->m_flags & XFS_MOUNT_IKEEP)) { error = xfs_btree_delete(cur, &i); if (error) @@ -1671,8 +2121,7 @@ xfs_difree( struct xfs_trans *tp, /* transaction pointer */ xfs_ino_t inode, /* inode to be freed */ struct xfs_bmap_free *flist, /* extents to free */ - int *deleted,/* set if inode cluster was deleted */ - xfs_ino_t *first_ino)/* first inode in deleted cluster */ + struct xfs_icluster *xic) /* cluster info if deleted */ { /* REFERENCED */ xfs_agblock_t agbno; /* block number containing inode */ @@ -1723,8 +2172,7 @@ xfs_difree( /* * Fix up the inode allocation btree. */ - error = xfs_difree_inobt(mp, tp, agbp, agino, flist, deleted, first_ino, - &rec); + error = xfs_difree_inobt(mp, tp, agbp, agino, flist, xic, &rec); if (error) goto error0; diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index 100007d56449..6e450df2979b 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -28,6 +28,13 @@ struct xfs_btree_cur; /* Move inodes in clusters of this size */ #define XFS_INODE_BIG_CLUSTER_SIZE 8192 +struct xfs_icluster { + bool deleted; /* record is deleted */ + xfs_ino_t first_ino; /* first inode number */ + uint64_t alloc; /* inode phys. allocation bitmap for + * sparse chunks */ +}; + /* Calculate and return the number of filesystem blocks per inode cluster */ static inline int xfs_icluster_size_fsb( @@ -44,8 +51,7 @@ xfs_icluster_size_fsb( static inline struct xfs_dinode * xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o) { - return (struct xfs_dinode *) - (xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog)); + return xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog); } /* @@ -90,8 +96,7 @@ xfs_difree( struct xfs_trans *tp, /* transaction pointer */ xfs_ino_t inode, /* inode to be freed */ struct xfs_bmap_free *flist, /* extents to free */ - int *deleted, /* set if inode cluster was deleted */ - xfs_ino_t *first_ino); /* first inode in deleted cluster */ + struct xfs_icluster *ifree); /* cluster info if deleted */ /* * Return the location of the inode in imap, for mapping it into a buffer. @@ -156,7 +161,7 @@ int xfs_inobt_get_rec(struct xfs_btree_cur *cur, * Inode chunk initialisation routine */ int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp, - struct list_head *buffer_list, + struct list_head *buffer_list, int icount, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_agblock_t length, unsigned int gen); diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 964c465ca69c..674ad8f760be 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -167,7 +167,16 @@ xfs_inobt_init_rec_from_cur( union xfs_btree_rec *rec) { rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino); - rec->inobt.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount); + if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) { + rec->inobt.ir_u.sp.ir_holemask = + cpu_to_be16(cur->bc_rec.i.ir_holemask); + rec->inobt.ir_u.sp.ir_count = cur->bc_rec.i.ir_count; + rec->inobt.ir_u.sp.ir_freecount = cur->bc_rec.i.ir_freecount; + } else { + /* ir_holemask/ir_count not supported on-disk */ + rec->inobt.ir_u.f.ir_freecount = + cpu_to_be32(cur->bc_rec.i.ir_freecount); + } rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free); } @@ -418,3 +427,85 @@ xfs_inobt_maxrecs( return blocklen / sizeof(xfs_inobt_rec_t); return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t)); } + +/* + * Convert the inode record holemask to an inode allocation bitmap. The inode + * allocation bitmap is inode granularity and specifies whether an inode is + * physically allocated on disk (not whether the inode is considered allocated + * or free by the fs). + * + * A bit value of 1 means the inode is allocated, a value of 0 means it is free. + */ +uint64_t +xfs_inobt_irec_to_allocmask( + struct xfs_inobt_rec_incore *rec) +{ + uint64_t bitmap = 0; + uint64_t inodespbit; + int nextbit; + uint allocbitmap; + + /* + * The holemask has 16-bits for a 64 inode record. Therefore each + * holemask bit represents multiple inodes. Create a mask of bits to set + * in the allocmask for each holemask bit. + */ + inodespbit = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1; + + /* + * Allocated inodes are represented by 0 bits in holemask. Invert the 0 + * bits to 1 and convert to a uint so we can use xfs_next_bit(). Mask + * anything beyond the 16 holemask bits since this casts to a larger + * type. + */ + allocbitmap = ~rec->ir_holemask & ((1 << XFS_INOBT_HOLEMASK_BITS) - 1); + + /* + * allocbitmap is the inverted holemask so every set bit represents + * allocated inodes. To expand from 16-bit holemask granularity to + * 64-bit (e.g., bit-per-inode), set inodespbit bits in the target + * bitmap for every holemask bit. + */ + nextbit = xfs_next_bit(&allocbitmap, 1, 0); + while (nextbit != -1) { + ASSERT(nextbit < (sizeof(rec->ir_holemask) * NBBY)); + + bitmap |= (inodespbit << + (nextbit * XFS_INODES_PER_HOLEMASK_BIT)); + + nextbit = xfs_next_bit(&allocbitmap, 1, nextbit + 1); + } + + return bitmap; +} + +#if defined(DEBUG) || defined(XFS_WARN) +/* + * Verify that an in-core inode record has a valid inode count. + */ +int +xfs_inobt_rec_check_count( + struct xfs_mount *mp, + struct xfs_inobt_rec_incore *rec) +{ + int inocount = 0; + int nextbit = 0; + uint64_t allocbmap; + int wordsz; + + wordsz = sizeof(allocbmap) / sizeof(unsigned int); + allocbmap = xfs_inobt_irec_to_allocmask(rec); + + nextbit = xfs_next_bit((uint *) &allocbmap, wordsz, nextbit); + while (nextbit != -1) { + inocount++; + nextbit = xfs_next_bit((uint *) &allocbmap, wordsz, + nextbit + 1); + } + + if (inocount != rec->ir_count) + return -EFSCORRUPTED; + + return 0; +} +#endif /* DEBUG */ diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index d7ebea72c2d0..bd88453217ce 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h @@ -62,4 +62,14 @@ extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, xfs_btnum_t); extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); +/* ir_holemask to inode allocation bitmap conversion */ +uint64_t xfs_inobt_irec_to_allocmask(struct xfs_inobt_rec_incore *); + +#if defined(DEBUG) || defined(XFS_WARN) +int xfs_inobt_rec_check_count(struct xfs_mount *, + struct xfs_inobt_rec_incore *); +#else +#define xfs_inobt_rec_check_count(mp, rec) 0 +#endif /* DEBUG */ + #endif /* __XFS_IALLOC_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 002b6b3a1988..6526e7696184 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -46,8 +46,7 @@ xfs_inobp_check( j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; for (i = 0; i < j; i++) { - dip = (xfs_dinode_t *)xfs_buf_offset(bp, - i * mp->m_sb.sb_inodesize); + dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize); if (!dip->di_next_unlinked) { xfs_alert(mp, "Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.", @@ -86,8 +85,7 @@ xfs_inode_buf_verify( int di_ok; xfs_dinode_t *dip; - dip = (struct xfs_dinode *)xfs_buf_offset(bp, - (i << mp->m_sb.sb_inodelog)); + dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && XFS_DINODE_GOOD_VERSION(dip->di_version); if (unlikely(XFS_TEST_ERROR(!di_ok, mp, @@ -186,7 +184,7 @@ xfs_imap_to_bp( } *bpp = bp; - *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset); + *dipp = xfs_buf_offset(bp, imap->im_boffset); return 0; } diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index dc4bfc5d88fc..df9851c46b5c 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -174,6 +174,27 @@ xfs_mount_validate_sb( return -EFSCORRUPTED; } + /* + * Full inode chunks must be aligned to inode chunk size when + * sparse inodes are enabled to support the sparse chunk + * allocation algorithm and prevent overlapping inode records. + */ + if (xfs_sb_version_hassparseinodes(sbp)) { + uint32_t align; + + xfs_alert(mp, + "EXPERIMENTAL sparse inode feature enabled. Use at your own risk!"); + + align = XFS_INODES_PER_CHUNK * sbp->sb_inodesize + >> sbp->sb_blocklog; + if (sbp->sb_inoalignmt != align) { + xfs_warn(mp, +"Inode block alignment (%u) must match chunk size (%u) for sparse inodes.", + sbp->sb_inoalignmt, align); + return -EINVAL; + } + } + if (unlikely( sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { xfs_warn(mp, @@ -374,7 +395,7 @@ __xfs_sb_from_disk( be32_to_cpu(from->sb_features_log_incompat); /* crc is only used on disk, not in memory; just init to 0 here. */ to->sb_crc = 0; - to->sb_pad = 0; + to->sb_spino_align = be32_to_cpu(from->sb_spino_align); to->sb_pquotino = be64_to_cpu(from->sb_pquotino); to->sb_lsn = be64_to_cpu(from->sb_lsn); /* Convert on-disk flags to in-memory flags? */ @@ -516,7 +537,7 @@ xfs_sb_to_disk( cpu_to_be32(from->sb_features_incompat); to->sb_features_log_incompat = cpu_to_be32(from->sb_features_log_incompat); - to->sb_pad = 0; + to->sb_spino_align = cpu_to_be32(from->sb_spino_align); to->sb_lsn = cpu_to_be64(from->sb_lsn); } } @@ -689,6 +710,11 @@ xfs_sb_mount_common( mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK, sbp->sb_inopblock); mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog; + + if (sbp->sb_spino_align) + mp->m_ialloc_min_blks = sbp->sb_spino_align; + else + mp->m_ialloc_min_blks = mp->m_ialloc_blks; } /* @@ -792,12 +818,12 @@ xfs_sync_sb( tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_CHANGE, KM_SLEEP); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); if (error) { - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); return error; } xfs_log_sb(tp); if (wait) xfs_trans_set_sync(tp); - return xfs_trans_commit(tp, 0); + return xfs_trans_commit(tp); } diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index 8dda4b321343..5be529707903 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -182,12 +182,6 @@ int xfs_log_calc_minimum_size(struct xfs_mount *); #define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer count in superblock */ /* - * Values for call flags parameter. - */ -#define XFS_TRANS_RELEASE_LOG_RES 0x4 -#define XFS_TRANS_ABORT 0x8 - -/* * Field values for xfs_trans_mod_sb. */ #define XFS_TRANS_SB_ICOUNT 0x00000001 diff --git a/fs/xfs/libxfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h index 2d5bdfce6d8f..797815012c0e 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.h +++ b/fs/xfs/libxfs/xfs_trans_resv.h @@ -73,9 +73,9 @@ struct xfs_trans_resv { * 2 trees * (2 blocks/level * max depth - 1) * block size */ #define XFS_ALLOCFREE_LOG_RES(mp,nx) \ - ((nx) * (2 * XFS_FSB_TO_B((mp), 2 * XFS_AG_MAXLEVELS(mp) - 1))) + ((nx) * (2 * XFS_FSB_TO_B((mp), 2 * (mp)->m_ag_maxlevels - 1))) #define XFS_ALLOCFREE_LOG_COUNT(mp,nx) \ - ((nx) * (2 * (2 * XFS_AG_MAXLEVELS(mp) - 1))) + ((nx) * (2 * (2 * (mp)->m_ag_maxlevels - 1))) /* * Per-directory log reservation for any directory change. diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h index bf9c4579334d..41e0428d8175 100644 --- a/fs/xfs/libxfs/xfs_trans_space.h +++ b/fs/xfs/libxfs/xfs_trans_space.h @@ -67,7 +67,7 @@ #define XFS_DIOSTRAT_SPACE_RES(mp, v) \ (XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + (v)) #define XFS_GROWFS_SPACE_RES(mp) \ - (2 * XFS_AG_MAXLEVELS(mp)) + (2 * (mp)->m_ag_maxlevels) #define XFS_GROWFSRT_SPACE_RES(mp,b) \ ((b) + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK)) #define XFS_LINK_SPACE_RES(mp,nl) \ diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index e5099f268032..3859f5e27a4d 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -109,7 +109,7 @@ xfs_setfilesize_trans_alloc( error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); if (error) { - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); return error; } @@ -145,7 +145,7 @@ xfs_setfilesize( isize = xfs_new_eof(ip, offset + size); if (!isize) { xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); return 0; } @@ -155,7 +155,7 @@ xfs_setfilesize( xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - return xfs_trans_commit(tp, 0); + return xfs_trans_commit(tp); } STATIC int @@ -1348,7 +1348,7 @@ __xfs_get_blocks( sector_t iblock, struct buffer_head *bh_result, int create, - int direct) + bool direct) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; @@ -1413,6 +1413,7 @@ __xfs_get_blocks( if (error) return error; new = 1; + } else { /* * Delalloc reservations do not require a transaction, @@ -1507,49 +1508,29 @@ xfs_get_blocks( struct buffer_head *bh_result, int create) { - return __xfs_get_blocks(inode, iblock, bh_result, create, 0); + return __xfs_get_blocks(inode, iblock, bh_result, create, false); } -STATIC int +int xfs_get_blocks_direct( struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { - return __xfs_get_blocks(inode, iblock, bh_result, create, 1); + return __xfs_get_blocks(inode, iblock, bh_result, create, true); } -/* - * Complete a direct I/O write request. - * - * The ioend structure is passed from __xfs_get_blocks() to tell us what to do. - * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite - * wholly within the EOF and so there is nothing for us to do. Note that in this - * case the completion can be called in interrupt context, whereas if we have an - * ioend we will always be called in task context (i.e. from a workqueue). - */ -STATIC void -xfs_end_io_direct_write( - struct kiocb *iocb, +static void +__xfs_end_io_direct_write( + struct inode *inode, + struct xfs_ioend *ioend, loff_t offset, - ssize_t size, - void *private) + ssize_t size) { - struct inode *inode = file_inode(iocb->ki_filp); - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - struct xfs_ioend *ioend = private; - - trace_xfs_gbmap_direct_endio(ip, offset, size, - ioend ? ioend->io_type : 0, NULL); + struct xfs_mount *mp = XFS_I(inode)->i_mount; - if (!ioend) { - ASSERT(offset + size <= i_size_read(inode)); - return; - } - - if (XFS_FORCED_SHUTDOWN(mp)) + if (XFS_FORCED_SHUTDOWN(mp) || ioend->io_error) goto out_end_io; /* @@ -1586,10 +1567,10 @@ xfs_end_io_direct_write( * here can result in EOF moving backwards and Bad Things Happen when * that occurs. */ - spin_lock(&ip->i_flags_lock); + spin_lock(&XFS_I(inode)->i_flags_lock); if (offset + size > i_size_read(inode)) i_size_write(inode, offset + size); - spin_unlock(&ip->i_flags_lock); + spin_unlock(&XFS_I(inode)->i_flags_lock); /* * If we are doing an append IO that needs to update the EOF on disk, @@ -1606,6 +1587,98 @@ out_end_io: return; } +/* + * Complete a direct I/O write request. + * + * The ioend structure is passed from __xfs_get_blocks() to tell us what to do. + * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite + * wholly within the EOF and so there is nothing for us to do. Note that in this + * case the completion can be called in interrupt context, whereas if we have an + * ioend we will always be called in task context (i.e. from a workqueue). + */ +STATIC void +xfs_end_io_direct_write( + struct kiocb *iocb, + loff_t offset, + ssize_t size, + void *private) +{ + struct inode *inode = file_inode(iocb->ki_filp); + struct xfs_ioend *ioend = private; + + trace_xfs_gbmap_direct_endio(XFS_I(inode), offset, size, + ioend ? ioend->io_type : 0, NULL); + + if (!ioend) { + ASSERT(offset + size <= i_size_read(inode)); + return; + } + + __xfs_end_io_direct_write(inode, ioend, offset, size); +} + +/* + * For DAX we need a mapping buffer callback for unwritten extent conversion + * when page faults allocate blocks and then zero them. Note that in this + * case the mapping indicated by the ioend may extend beyond EOF. We most + * definitely do not want to extend EOF here, so we trim back the ioend size to + * EOF. + */ +#ifdef CONFIG_FS_DAX +void +xfs_end_io_dax_write( + struct buffer_head *bh, + int uptodate) +{ + struct xfs_ioend *ioend = bh->b_private; + struct inode *inode = ioend->io_inode; + ssize_t size = ioend->io_size; + + ASSERT(IS_DAX(ioend->io_inode)); + + /* if there was an error zeroing, then don't convert it */ + if (!uptodate) + ioend->io_error = -EIO; + + /* + * Trim update to EOF, so we don't extend EOF during unwritten extent + * conversion of partial EOF blocks. + */ + spin_lock(&XFS_I(inode)->i_flags_lock); + if (ioend->io_offset + size > i_size_read(inode)) + size = i_size_read(inode) - ioend->io_offset; + spin_unlock(&XFS_I(inode)->i_flags_lock); + + __xfs_end_io_direct_write(inode, ioend, ioend->io_offset, size); + +} +#else +void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate) { } +#endif + +static inline ssize_t +xfs_vm_do_dio( + struct inode *inode, + struct kiocb *iocb, + struct iov_iter *iter, + loff_t offset, + void (*endio)(struct kiocb *iocb, + loff_t offset, + ssize_t size, + void *private), + int flags) +{ + struct block_device *bdev; + + if (IS_DAX(inode)) + return dax_do_io(iocb, inode, iter, offset, + xfs_get_blocks_direct, endio, 0); + + bdev = xfs_find_bdev_for_inode(inode); + return __blockdev_direct_IO(iocb, inode, bdev, iter, offset, + xfs_get_blocks_direct, endio, NULL, flags); +} + STATIC ssize_t xfs_vm_direct_IO( struct kiocb *iocb, @@ -1613,16 +1686,11 @@ xfs_vm_direct_IO( loff_t offset) { struct inode *inode = iocb->ki_filp->f_mapping->host; - struct block_device *bdev = xfs_find_bdev_for_inode(inode); - if (iov_iter_rw(iter) == WRITE) { - return __blockdev_direct_IO(iocb, inode, bdev, iter, offset, - xfs_get_blocks_direct, - xfs_end_io_direct_write, NULL, - DIO_ASYNC_EXTEND); - } - return __blockdev_direct_IO(iocb, inode, bdev, iter, offset, - xfs_get_blocks_direct, NULL, NULL, 0); + if (iov_iter_rw(iter) == WRITE) + return xfs_vm_do_dio(inode, iocb, iter, offset, + xfs_end_io_direct_write, DIO_ASYNC_EXTEND); + return xfs_vm_do_dio(inode, iocb, iter, offset, NULL, 0); } /* diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index ac644e0137a4..86afd1ac7895 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -53,7 +53,12 @@ typedef struct xfs_ioend { } xfs_ioend_t; extern const struct address_space_operations xfs_address_space_operations; -extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); + +int xfs_get_blocks(struct inode *inode, sector_t offset, + struct buffer_head *map_bh, int create); +int xfs_get_blocks_direct(struct inode *inode, sector_t offset, + struct buffer_head *map_bh, int create); +void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate); extern void xfs_count_page_state(struct page *, int *, int *); diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 3fbf167cfb4c..2bb959ada45b 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -394,7 +394,6 @@ xfs_attr_inactive( { struct xfs_trans *trans; struct xfs_mount *mp; - int cancel_flags = 0; int lock_mode = XFS_ILOCK_SHARED; int error = 0; @@ -423,7 +422,6 @@ xfs_attr_inactive( goto out_cancel; lock_mode = XFS_ILOCK_EXCL; - cancel_flags = XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT; xfs_ilock(dp, lock_mode); if (!XFS_IFORK_Q(dp)) @@ -435,8 +433,14 @@ xfs_attr_inactive( */ xfs_trans_ijoin(trans, dp, 0); - /* invalidate and truncate the attribute fork extents */ - if (dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) { + /* + * Invalidate and truncate the attribute fork extents. Make sure the + * fork actually has attributes as otherwise the invalidation has no + * blocks to read and returns an error. In this case, just do the fork + * removal below. + */ + if (xfs_inode_hasattr(dp) && + dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) { error = xfs_attr3_root_inactive(&trans, dp); if (error) goto out_cancel; @@ -449,12 +453,12 @@ xfs_attr_inactive( /* Reset the attribute fork - this also destroys the in-core fork */ xfs_attr_fork_remove(dp, trans); - error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(trans); xfs_iunlock(dp, lock_mode); return error; out_cancel: - xfs_trans_cancel(trans, cancel_flags); + xfs_trans_cancel(trans); out_destroy_fork: /* kill the in-core attr fork before we drop the inode lock */ if (dp->i_afp) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index a52bbd3abc7d..0f34886cf726 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -75,28 +75,20 @@ xfs_bmap_finish( xfs_efi_log_item_t *efi; /* extent free intention */ int error; /* error return value */ xfs_bmap_free_item_t *free; /* free extent item */ - struct xfs_trans_res tres; /* new log reservation */ xfs_mount_t *mp; /* filesystem mount structure */ xfs_bmap_free_item_t *next; /* next item on free list */ - xfs_trans_t *ntp; /* new transaction pointer */ ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); if (flist->xbf_count == 0) { *committed = 0; return 0; } - ntp = *tp; - efi = xfs_trans_get_efi(ntp, flist->xbf_count); + efi = xfs_trans_get_efi(*tp, flist->xbf_count); for (free = flist->xbf_first; free; free = free->xbfi_next) - xfs_trans_log_efi_extent(ntp, efi, free->xbfi_startblock, + xfs_trans_log_efi_extent(*tp, efi, free->xbfi_startblock, free->xbfi_blockcount); - tres.tr_logres = ntp->t_log_res; - tres.tr_logcount = ntp->t_log_count; - tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; - ntp = xfs_trans_dup(*tp); - error = xfs_trans_commit(*tp, 0); - *tp = ntp; + error = xfs_trans_roll(tp, NULL); *committed = 1; /* * We have a new transaction, so we should return committed=1, @@ -105,19 +97,10 @@ xfs_bmap_finish( if (error) return error; - /* - * transaction commit worked ok so we can drop the extra ticket - * reference that we gained in xfs_trans_dup() - */ - xfs_log_ticket_put(ntp->t_ticket); - - error = xfs_trans_reserve(ntp, &tres, 0, 0); - if (error) - return error; - efd = xfs_trans_get_efd(ntp, efi, flist->xbf_count); + efd = xfs_trans_get_efd(*tp, efi, flist->xbf_count); for (free = flist->xbf_first; free != NULL; free = next) { next = free->xbfi_next; - if ((error = xfs_free_extent(ntp, free->xbfi_startblock, + if ((error = xfs_free_extent(*tp, free->xbfi_startblock, free->xbfi_blockcount))) { /* * The bmap free list will be cleaned up at a @@ -127,7 +110,7 @@ xfs_bmap_finish( * happens, since this transaction may not be * dirty yet. */ - mp = ntp->t_mountp; + mp = (*tp)->t_mountp; if (!XFS_FORCED_SHUTDOWN(mp)) xfs_force_shutdown(mp, (error == -EFSCORRUPTED) ? @@ -135,7 +118,7 @@ xfs_bmap_finish( SHUTDOWN_META_IO_ERROR); return error; } - xfs_trans_log_efd_extent(ntp, efd, free->xbfi_startblock, + xfs_trans_log_efd_extent(*tp, efd, free->xbfi_startblock, free->xbfi_blockcount); xfs_bmap_del_free(flist, NULL, free); } @@ -878,7 +861,7 @@ xfs_free_eofblocks( if (need_iolock) { if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); return -EAGAIN; } } @@ -886,7 +869,7 @@ xfs_free_eofblocks( error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); if (error) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); if (need_iolock) xfs_iunlock(ip, XFS_IOLOCK_EXCL); return error; @@ -908,12 +891,9 @@ xfs_free_eofblocks( * If we get an error at this point we simply don't * bother truncating the file. */ - xfs_trans_cancel(tp, - (XFS_TRANS_RELEASE_LOG_RES | - XFS_TRANS_ABORT)); + xfs_trans_cancel(tp); } else { - error = xfs_trans_commit(tp, - XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp); if (!error) xfs_inode_clear_eofblocks_tag(ip); } @@ -1026,7 +1006,7 @@ xfs_alloc_file_space( * Free the transaction structure. */ ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); break; } xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -1053,7 +1033,7 @@ xfs_alloc_file_space( goto error0; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) { break; @@ -1077,7 +1057,7 @@ error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); error1: /* Just cancel transaction */ - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); + xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } @@ -1133,14 +1113,29 @@ xfs_zero_remaining_bytes( break; ASSERT(imap.br_blockcount >= 1); ASSERT(imap.br_startoff == offset_fsb); + ASSERT(imap.br_startblock != DELAYSTARTBLOCK); + + if (imap.br_startblock == HOLESTARTBLOCK || + imap.br_state == XFS_EXT_UNWRITTEN) { + /* skip the entire extent */ + lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + + imap.br_blockcount) - 1; + continue; + } + lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1; if (lastoffset > endoff) lastoffset = endoff; - if (imap.br_startblock == HOLESTARTBLOCK) - continue; - ASSERT(imap.br_startblock != DELAYSTARTBLOCK); - if (imap.br_state == XFS_EXT_UNWRITTEN) + + /* DAX can just zero the backing device directly */ + if (IS_DAX(VFS_I(ip))) { + error = dax_zero_page_range(VFS_I(ip), offset, + lastoffset - offset + 1, + xfs_get_blocks_direct); + if (error) + return error; continue; + } error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp, @@ -1289,7 +1284,7 @@ xfs_free_file_space( * Free the transaction structure. */ ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); break; } xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -1320,7 +1315,7 @@ xfs_free_file_space( goto error0; } - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); } @@ -1330,7 +1325,7 @@ xfs_free_file_space( error0: xfs_bmap_cancel(&free_list); error1: - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); + xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); goto out; } @@ -1462,7 +1457,7 @@ xfs_shift_file_space( error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, XFS_DIOSTRAT_SPACE_RES(mp, 0), 0); if (error) { - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); break; } @@ -1492,13 +1487,13 @@ xfs_shift_file_space( if (error) goto out; - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp); } return error; out: - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); + xfs_trans_cancel(tp); return error; } @@ -1718,7 +1713,7 @@ xfs_swap_extents( tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); if (error) { - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); goto out_unlock; } @@ -1901,7 +1896,7 @@ xfs_swap_extents( if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); + error = xfs_trans_commit(tp); trace_xfs_swap_extent_after(ip, 0); trace_xfs_swap_extent_after(tip, 1); @@ -1915,6 +1910,6 @@ out_unlock: goto out; out_trans_cancel: - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); goto out; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 1790b00bea7a..a4b7d92e946c 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1419,9 +1419,9 @@ xfs_buf_submit_wait( return error; } -xfs_caddr_t +void * xfs_buf_offset( - xfs_buf_t *bp, + struct xfs_buf *bp, size_t offset) { struct page *page; @@ -1431,7 +1431,7 @@ xfs_buf_offset( offset += bp->b_offset; page = bp->b_pages[offset >> PAGE_SHIFT]; - return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1)); + return page_address(page) + (offset & (PAGE_SIZE-1)); } /* diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 75ff5d5a7d2e..331c1ccf8264 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -299,7 +299,7 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) /* Buffer Utility Routines */ -extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); +extern void *xfs_buf_offset(struct xfs_buf *, size_t); /* Delayed Write Buffer Routines */ extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *); diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 02c01bbbc789..4143dc75dca4 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -568,8 +568,6 @@ xfs_qm_dqread( struct xfs_buf *bp; struct xfs_trans *tp = NULL; int error; - int cancelflags = 0; - dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP); @@ -617,7 +615,6 @@ xfs_qm_dqread( XFS_QM_DQALLOC_SPACE_RES(mp), 0); if (error) goto error1; - cancelflags = XFS_TRANS_RELEASE_LOG_RES; } /* @@ -632,7 +629,6 @@ xfs_qm_dqread( * allocate (ENOENT). */ trace_xfs_dqread_fail(dqp); - cancelflags |= XFS_TRANS_ABORT; goto error1; } @@ -670,7 +666,7 @@ xfs_qm_dqread( xfs_trans_brelse(tp, bp); if (tp) { - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp); if (error) goto error0; } @@ -680,7 +676,7 @@ xfs_qm_dqread( error1: if (tp) - xfs_trans_cancel(tp, cancelflags); + xfs_trans_cancel(tp); error0: xfs_qm_dqdestroy(dqp); *O_dqpp = NULL; diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 338e50bbfd1e..74d0e5966ebc 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -127,7 +127,7 @@ xfs_error_report( struct xfs_mount *mp, const char *filename, int linenum, - inst_t *ra) + void *ra) { if (level <= xfs_error_level) { xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, @@ -146,7 +146,7 @@ xfs_corruption_error( void *p, const char *filename, int linenum, - inst_t *ra) + void *ra) { if (level <= xfs_error_level) xfs_hex_dump(p, 64); diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index c0394ed126fc..4ed3042a0f16 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -21,10 +21,10 @@ struct xfs_mount; extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp, - const char *filename, int linenum, inst_t *ra); + const char *filename, int linenum, void *ra); extern void xfs_corruption_error(const char *tag, int level, struct xfs_mount *mp, void *p, const char *filename, - int linenum, inst_t *ra); + int linenum, void *ra); extern void xfs_verifier_error(struct xfs_buf *bp); #define XFS_ERROR_REPORT(e, lvl, mp) \ diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index cb7fe64cdbfa..adc8f8fdd145 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -239,7 +239,7 @@ xfs_efi_init( xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops); efip->efi_format.efi_nextents = nextents; - efip->efi_format.efi_id = (__psint_t)(void*)efip; + efip->efi_format.efi_id = (uintptr_t)(void *)efip; atomic_set(&efip->efi_next_extent, 0); atomic_set(&efip->efi_refcount, 2); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 7c62fca53e2f..874507de3485 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -80,14 +80,15 @@ xfs_rw_ilock_demote( } /* - * xfs_iozero + * xfs_iozero clears the specified range supplied via the page cache (except in + * the DAX case). Writes through the page cache will allocate blocks over holes, + * though the callers usually map the holes first and avoid them. If a block is + * not completely zeroed, then it will be read from disk before being partially + * zeroed. * - * xfs_iozero clears the specified range of buffer supplied, - * and marks all the affected blocks as valid and modified. If - * an affected block is not allocated, it will be allocated. If - * an affected block is not completely overwritten, and is not - * valid before the operation, it will be read from disk before - * being partially zeroed. + * In the DAX case, we can just directly write to the underlying pages. This + * will not allocate blocks, but will avoid holes and unwritten extents and so + * not do unnecessary work. */ int xfs_iozero( @@ -97,7 +98,8 @@ xfs_iozero( { struct page *page; struct address_space *mapping; - int status; + int status = 0; + mapping = VFS_I(ip)->i_mapping; do { @@ -109,20 +111,27 @@ xfs_iozero( if (bytes > count) bytes = count; - status = pagecache_write_begin(NULL, mapping, pos, bytes, - AOP_FLAG_UNINTERRUPTIBLE, - &page, &fsdata); - if (status) - break; + if (IS_DAX(VFS_I(ip))) { + status = dax_zero_page_range(VFS_I(ip), pos, bytes, + xfs_get_blocks_direct); + if (status) + break; + } else { + status = pagecache_write_begin(NULL, mapping, pos, bytes, + AOP_FLAG_UNINTERRUPTIBLE, + &page, &fsdata); + if (status) + break; - zero_user(page, offset, bytes); + zero_user(page, offset, bytes); - status = pagecache_write_end(NULL, mapping, pos, bytes, bytes, - page, fsdata); - WARN_ON(status <= 0); /* can't return less than zero! */ + status = pagecache_write_end(NULL, mapping, pos, bytes, + bytes, page, fsdata); + WARN_ON(status <= 0); /* can't return less than zero! */ + status = 0; + } pos += bytes; count -= bytes; - status = 0; } while (count); return status; @@ -139,7 +148,7 @@ xfs_update_prealloc_flags( tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID); error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0); if (error) { - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); return error; } @@ -161,7 +170,7 @@ xfs_update_prealloc_flags( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); if (flags & XFS_PREALLOC_SYNC) xfs_trans_set_sync(tp); - return xfs_trans_commit(tp, 0); + return xfs_trans_commit(tp); } /* @@ -285,7 +294,7 @@ xfs_file_read_iter( if (file->f_mode & FMODE_NOCMTIME) ioflags |= XFS_IO_INVIS; - if (unlikely(ioflags & XFS_IO_ISDIRECT)) { + if ((ioflags & XFS_IO_ISDIRECT) && !IS_DAX(inode)) { xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; @@ -379,7 +388,11 @@ xfs_file_splice_read( trace_xfs_file_splice_read(ip, count, *ppos, ioflags); - ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); + /* for dax, we need to avoid the page cache */ + if (IS_DAX(VFS_I(ip))) + ret = default_file_splice_read(infilp, ppos, pipe, count, flags); + else + ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); if (ret > 0) XFS_STATS_ADD(xs_read_bytes, ret); @@ -673,7 +686,7 @@ xfs_file_dio_aio_write( mp->m_rtdev_targp : mp->m_ddev_targp; /* DIO must be aligned to device logical sector size */ - if ((pos | count) & target->bt_logical_sectormask) + if (!IS_DAX(inode) && ((pos | count) & target->bt_logical_sectormask)) return -EINVAL; /* "unaligned" here means not aligned to a filesystem block */ @@ -759,8 +772,11 @@ xfs_file_dio_aio_write( out: xfs_rw_iunlock(ip, iolock); - /* No fallback to buffered IO on errors for XFS. */ - ASSERT(ret < 0 || ret == count); + /* + * No fallback to buffered IO on errors for XFS. DAX can result in + * partial writes, but direct IO will either complete fully or fail. + */ + ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip))); return ret; } @@ -843,7 +859,7 @@ xfs_file_write_iter( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; - if (unlikely(iocb->ki_flags & IOCB_DIRECT)) + if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode)) ret = xfs_file_dio_aio_write(iocb, from); else ret = xfs_file_buffered_aio_write(iocb, from); @@ -1064,17 +1080,6 @@ xfs_file_readdir( return xfs_readdir(ip, ctx, bufsize); } -STATIC int -xfs_file_mmap( - struct file *filp, - struct vm_area_struct *vma) -{ - vma->vm_ops = &xfs_file_vm_ops; - - file_accessed(filp); - return 0; -} - /* * This type is designed to indicate the type of offset we would like * to search from page cache for xfs_seek_hole_data(). @@ -1455,48 +1460,83 @@ xfs_file_llseek( * ordering of: * * mmap_sem (MM) - * i_mmap_lock (XFS - truncate serialisation) - * page_lock (MM) - * i_lock (XFS - extent map serialisation) + * sb_start_pagefault(vfs, freeze) + * i_mmap_lock (XFS - truncate serialisation) + * page_lock (MM) + * i_lock (XFS - extent map serialisation) + */ + +/* + * mmap()d file has taken write protection fault and is being made writable. We + * can set the page state up correctly for a writable page, which means we can + * do correct delalloc accounting (ENOSPC checking!) and unwritten extent + * mapping. */ STATIC int -xfs_filemap_fault( +xfs_filemap_page_mkwrite( struct vm_area_struct *vma, struct vm_fault *vmf) { - struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host); - int error; + struct inode *inode = file_inode(vma->vm_file); + int ret; - trace_xfs_filemap_fault(ip); + trace_xfs_filemap_page_mkwrite(XFS_I(inode)); - xfs_ilock(ip, XFS_MMAPLOCK_SHARED); - error = filemap_fault(vma, vmf); - xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); - return error; + if (IS_DAX(inode)) { + ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_direct, + xfs_end_io_dax_write); + } else { + ret = __block_page_mkwrite(vma, vmf, xfs_get_blocks); + ret = block_page_mkwrite_return(ret); + } + + xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); + sb_end_pagefault(inode->i_sb); + + return ret; } -/* - * mmap()d file has taken write protection fault and is being made writable. We - * can set the page state up correctly for a writable page, which means we can - * do correct delalloc accounting (ENOSPC checking!) and unwritten extent - * mapping. - */ STATIC int -xfs_filemap_page_mkwrite( +xfs_filemap_fault( struct vm_area_struct *vma, struct vm_fault *vmf) { - struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host); - int error; + struct xfs_inode *ip = XFS_I(file_inode(vma->vm_file)); + int ret; + + trace_xfs_filemap_fault(ip); - trace_xfs_filemap_page_mkwrite(ip); + /* DAX can shortcut the normal fault path on write faults! */ + if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(VFS_I(ip))) + return xfs_filemap_page_mkwrite(vma, vmf); xfs_ilock(ip, XFS_MMAPLOCK_SHARED); - error = block_page_mkwrite(vma, vmf, xfs_get_blocks); + ret = filemap_fault(vma, vmf); xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); - return error; + return ret; +} + +static const struct vm_operations_struct xfs_file_vm_ops = { + .fault = xfs_filemap_fault, + .map_pages = filemap_map_pages, + .page_mkwrite = xfs_filemap_page_mkwrite, +}; + +STATIC int +xfs_file_mmap( + struct file *filp, + struct vm_area_struct *vma) +{ + file_accessed(filp); + vma->vm_ops = &xfs_file_vm_ops; + if (IS_DAX(file_inode(filp))) + vma->vm_flags |= VM_MIXEDMAP; + return 0; } const struct file_operations xfs_file_operations = { @@ -1527,9 +1567,3 @@ const struct file_operations xfs_dir_file_operations = { #endif .fsync = xfs_dir_fsync, }; - -static const struct vm_operations_struct xfs_file_vm_ops = { - .fault = xfs_filemap_fault, - .map_pages = filemap_map_pages, - .page_mkwrite = xfs_filemap_page_mkwrite, -}; diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index da82f1cb4b9b..c4c130f9bfb6 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -196,7 +196,8 @@ xfs_filestream_pick_ag( goto next_ag; } - longest = xfs_alloc_longest_free_extent(mp, pag); + longest = xfs_alloc_longest_free_extent(mp, pag, + xfs_alloc_min_freelist(mp, pag)); if (((minlen && longest >= minlen) || (!minlen && pag->pagf_freeblks >= minfree)) && (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) || diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index cb7e8a29dfb6..9b3438a7680f 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -101,7 +101,9 @@ xfs_fs_geometry( (xfs_sb_version_hasftype(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_FTYPE : 0) | (xfs_sb_version_hasfinobt(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_FINOBT : 0); + XFS_FSOP_GEOM_FLAGS_FINOBT : 0) | + (xfs_sb_version_hassparseinodes(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_SPINODES : 0); geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? mp->m_sb.sb_logsectsize : BBSIZE; geo->rtsectsize = mp->m_sb.sb_blocksize; @@ -201,7 +203,7 @@ xfs_growfs_data_private( error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata, XFS_GROWFS_SPACE_RES(mp), 0); if (error) { - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); return error; } @@ -489,7 +491,7 @@ xfs_growfs_data_private( if (dpct) xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct); xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); + error = xfs_trans_commit(tp); if (error) return error; @@ -557,7 +559,7 @@ xfs_growfs_data_private( return saved_error ? saved_error : error; error0: - xfs_trans_cancel(tp, XFS_TRANS_ABORT); + xfs_trans_cancel(tp); return error; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 539a85fddbc2..3da9f4da4f3d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -905,7 +905,6 @@ xfs_dir_ialloc( { xfs_trans_t *tp; - xfs_trans_t *ntp; xfs_inode_t *ip; xfs_buf_t *ialloc_context = NULL; int code; @@ -954,8 +953,6 @@ xfs_dir_ialloc( * to succeed the second time. */ if (ialloc_context) { - struct xfs_trans_res tres; - /* * Normally, xfs_trans_commit releases all the locks. * We call bhold to hang on to the ialloc_context across @@ -964,12 +961,6 @@ xfs_dir_ialloc( * allocation group. */ xfs_trans_bhold(tp, ialloc_context); - /* - * Save the log reservation so we can use - * them in the next transaction. - */ - tres.tr_logres = xfs_trans_get_log_res(tp); - tres.tr_logcount = xfs_trans_get_log_count(tp); /* * We want the quota changes to be associated with the next @@ -985,35 +976,9 @@ xfs_dir_ialloc( tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY); } - ntp = xfs_trans_dup(tp); - code = xfs_trans_commit(tp, 0); - tp = ntp; - if (committed != NULL) { + code = xfs_trans_roll(&tp, 0); + if (committed != NULL) *committed = 1; - } - /* - * If we get an error during the commit processing, - * release the buffer that is still held and return - * to the caller. - */ - if (code) { - xfs_buf_relse(ialloc_context); - if (dqinfo) { - tp->t_dqinfo = dqinfo; - xfs_trans_free_dqinfo(tp); - } - *tpp = ntp; - *ipp = NULL; - return code; - } - - /* - * transaction commit worked ok so we can drop the extra ticket - * reference that we gained in xfs_trans_dup() - */ - xfs_log_ticket_put(tp->t_ticket); - tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; - code = xfs_trans_reserve(tp, &tres, 0, 0); /* * Re-attach the quota info that we detached from prev trx. @@ -1025,7 +990,7 @@ xfs_dir_ialloc( if (code) { xfs_buf_relse(ialloc_context); - *tpp = ntp; + *tpp = tp; *ipp = NULL; return code; } @@ -1127,7 +1092,6 @@ xfs_create( xfs_bmap_free_t free_list; xfs_fsblock_t first_block; bool unlock_dp_on_error = false; - uint cancel_flags; int committed; prid_t prid; struct xfs_dquot *udqp = NULL; @@ -1164,8 +1128,6 @@ xfs_create( tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); } - cancel_flags = XFS_TRANS_RELEASE_LOG_RES; - /* * Initially assume that the file does not exist and * reserve the resources for that case. If that is not @@ -1183,10 +1145,9 @@ xfs_create( resblks = 0; error = xfs_trans_reserve(tp, tres, 0, 0); } - if (error) { - cancel_flags = 0; + if (error) goto out_trans_cancel; - } + xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); unlock_dp_on_error = true; @@ -1217,7 +1178,7 @@ xfs_create( if (error) { if (error == -ENOSPC) goto out_trans_cancel; - goto out_trans_abort; + goto out_trans_cancel; } /* @@ -1235,7 +1196,7 @@ xfs_create( resblks - XFS_IALLOC_SPACE_RES(mp) : 0); if (error) { ASSERT(error != -ENOSPC); - goto out_trans_abort; + goto out_trans_cancel; } xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); @@ -1269,7 +1230,7 @@ xfs_create( if (error) goto out_bmap_cancel; - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp); if (error) goto out_release_inode; @@ -1282,10 +1243,8 @@ xfs_create( out_bmap_cancel: xfs_bmap_cancel(&free_list); - out_trans_abort: - cancel_flags |= XFS_TRANS_ABORT; out_trans_cancel: - xfs_trans_cancel(tp, cancel_flags); + xfs_trans_cancel(tp); out_release_inode: /* * Wait until after the current transaction is aborted to finish the @@ -1317,7 +1276,6 @@ xfs_create_tmpfile( struct xfs_inode *ip = NULL; struct xfs_trans *tp = NULL; int error; - uint cancel_flags = XFS_TRANS_RELEASE_LOG_RES; prid_t prid; struct xfs_dquot *udqp = NULL; struct xfs_dquot *gdqp = NULL; @@ -1350,10 +1308,8 @@ xfs_create_tmpfile( resblks = 0; error = xfs_trans_reserve(tp, tres, 0, 0); } - if (error) { - cancel_flags = 0; + if (error) goto out_trans_cancel; - } error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, pdqp, resblks, 1, 0); @@ -1365,7 +1321,7 @@ xfs_create_tmpfile( if (error) { if (error == -ENOSPC) goto out_trans_cancel; - goto out_trans_abort; + goto out_trans_cancel; } if (mp->m_flags & XFS_MOUNT_WSYNC) @@ -1381,9 +1337,9 @@ xfs_create_tmpfile( ip->i_d.di_nlink--; error = xfs_iunlink(tp, ip); if (error) - goto out_trans_abort; + goto out_trans_cancel; - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp); if (error) goto out_release_inode; @@ -1394,10 +1350,8 @@ xfs_create_tmpfile( *ipp = ip; return 0; - out_trans_abort: - cancel_flags |= XFS_TRANS_ABORT; out_trans_cancel: - xfs_trans_cancel(tp, cancel_flags); + xfs_trans_cancel(tp); out_release_inode: /* * Wait until after the current transaction is aborted to finish the @@ -1427,7 +1381,6 @@ xfs_link( int error; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; - int cancel_flags; int committed; int resblks; @@ -1447,17 +1400,14 @@ xfs_link( goto std_return; tp = xfs_trans_alloc(mp, XFS_TRANS_LINK); - cancel_flags = XFS_TRANS_RELEASE_LOG_RES; resblks = XFS_LINK_SPACE_RES(mp, target_name->len); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0); if (error == -ENOSPC) { resblks = 0; error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0); } - if (error) { - cancel_flags = 0; + if (error) goto error_return; - } xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); @@ -1486,19 +1436,19 @@ xfs_link( if (sip->i_d.di_nlink == 0) { error = xfs_iunlink_remove(tp, sip); if (error) - goto abort_return; + goto error_return; } error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, &first_block, &free_list, resblks); if (error) - goto abort_return; + goto error_return; xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); error = xfs_bumplink(tp, sip); if (error) - goto abort_return; + goto error_return; /* * If this is a synchronous mount, make sure that the @@ -1512,15 +1462,13 @@ xfs_link( error = xfs_bmap_finish (&tp, &free_list, &committed); if (error) { xfs_bmap_cancel(&free_list); - goto abort_return; + goto error_return; } - return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + return xfs_trans_commit(tp); - abort_return: - cancel_flags |= XFS_TRANS_ABORT; error_return: - xfs_trans_cancel(tp, cancel_flags); + xfs_trans_cancel(tp); std_return: return error; } @@ -1555,7 +1503,6 @@ xfs_itruncate_extents( { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp = *tpp; - struct xfs_trans *ntp; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; xfs_fileoff_t first_unmap_block; @@ -1613,29 +1560,7 @@ xfs_itruncate_extents( if (error) goto out_bmap_cancel; - if (committed) { - /* - * Mark the inode dirty so it will be logged and - * moved forward in the log as part of every commit. - */ - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - } - - ntp = xfs_trans_dup(tp); - error = xfs_trans_commit(tp, 0); - tp = ntp; - - xfs_trans_ijoin(tp, ip, 0); - - if (error) - goto out; - - /* - * Transaction commit worked ok so we can drop the extra ticket - * reference that we gained in xfs_trans_dup() - */ - xfs_log_ticket_put(tp->t_ticket); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); + error = xfs_trans_roll(&tp, ip); if (error) goto out; } @@ -1756,7 +1681,7 @@ xfs_inactive_truncate( error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); if (error) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); return error; } @@ -1777,7 +1702,7 @@ xfs_inactive_truncate( ASSERT(ip->i_d.di_nextents == 0); - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp); if (error) goto error_unlock; @@ -1785,7 +1710,7 @@ xfs_inactive_truncate( return 0; error_trans_cancel: - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); + xfs_trans_cancel(tp); error_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; @@ -1835,7 +1760,7 @@ xfs_inactive_ifree( } else { ASSERT(XFS_FORCED_SHUTDOWN(mp)); } - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); + xfs_trans_cancel(tp); return error; } @@ -1855,7 +1780,7 @@ xfs_inactive_ifree( __func__, error); xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); } - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); + xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } @@ -1874,7 +1799,7 @@ xfs_inactive_ifree( if (error) xfs_notice(mp, "%s: xfs_bmap_finish returned error %d", __func__, error); - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp); if (error) xfs_notice(mp, "%s: xfs_trans_commit returned error %d", __func__, error); @@ -2235,28 +2160,42 @@ xfs_iunlink_remove( */ STATIC int xfs_ifree_cluster( - xfs_inode_t *free_ip, - xfs_trans_t *tp, - xfs_ino_t inum) + xfs_inode_t *free_ip, + xfs_trans_t *tp, + struct xfs_icluster *xic) { xfs_mount_t *mp = free_ip->i_mount; int blks_per_cluster; int inodes_per_cluster; int nbufs; int i, j; + int ioffset; xfs_daddr_t blkno; xfs_buf_t *bp; xfs_inode_t *ip; xfs_inode_log_item_t *iip; xfs_log_item_t *lip; struct xfs_perag *pag; + xfs_ino_t inum; + inum = xic->first_ino; pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); blks_per_cluster = xfs_icluster_size_fsb(mp); inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog; nbufs = mp->m_ialloc_blks / blks_per_cluster; for (j = 0; j < nbufs; j++, inum += inodes_per_cluster) { + /* + * The allocation bitmap tells us which inodes of the chunk were + * physically allocated. Skip the cluster if an inode falls into + * a sparse region. + */ + ioffset = inum - xic->first_ino; + if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) { + ASSERT(do_mod(ioffset, inodes_per_cluster) == 0); + continue; + } + blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), XFS_INO_TO_AGBNO(mp, inum)); @@ -2414,8 +2353,7 @@ xfs_ifree( xfs_bmap_free_t *flist) { int error; - int delete; - xfs_ino_t first_ino; + struct xfs_icluster xic = { 0 }; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT(ip->i_d.di_nlink == 0); @@ -2431,7 +2369,7 @@ xfs_ifree( if (error) return error; - error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino); + error = xfs_difree(tp, ip->i_ino, flist, &xic); if (error) return error; @@ -2448,8 +2386,8 @@ xfs_ifree( ip->i_d.di_gen++; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - if (delete) - error = xfs_ifree_cluster(ip, tp, first_ino); + if (xic.deleted) + error = xfs_ifree_cluster(ip, tp, &xic); return error; } @@ -2536,7 +2474,6 @@ xfs_remove( int error = 0; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; - int cancel_flags; int committed; uint resblks; @@ -2557,7 +2494,6 @@ xfs_remove( tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR); else tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); - cancel_flags = XFS_TRANS_RELEASE_LOG_RES; /* * We try to get the real space reservation first, @@ -2576,7 +2512,6 @@ xfs_remove( } if (error) { ASSERT(error != -ENOSPC); - cancel_flags = 0; goto out_trans_cancel; } @@ -2588,7 +2523,6 @@ xfs_remove( /* * If we're removing a directory perform some additional validation. */ - cancel_flags |= XFS_TRANS_ABORT; if (is_dir) { ASSERT(ip->i_d.di_nlink >= 2); if (ip->i_d.di_nlink != 2) { @@ -2644,7 +2578,7 @@ xfs_remove( if (error) goto out_bmap_cancel; - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp); if (error) goto std_return; @@ -2656,7 +2590,7 @@ xfs_remove( out_bmap_cancel: xfs_bmap_cancel(&free_list); out_trans_cancel: - xfs_trans_cancel(tp, cancel_flags); + xfs_trans_cancel(tp); std_return: return error; } @@ -2730,11 +2664,11 @@ xfs_finish_rename( error = xfs_bmap_finish(&tp, free_list, &committed); if (error) { xfs_bmap_cancel(free_list); - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); + xfs_trans_cancel(tp); return error; } - return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + return xfs_trans_commit(tp); } /* @@ -2855,7 +2789,7 @@ xfs_cross_rename( out_trans_abort: xfs_bmap_cancel(free_list); - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); + xfs_trans_cancel(tp); return error; } @@ -2915,7 +2849,6 @@ xfs_rename( int num_inodes = __XFS_SORT_INODES; bool new_parent = (src_dp != target_dp); bool src_is_directory = S_ISDIR(src_ip->i_d.di_mode); - int cancel_flags = 0; int spaceres; int error; @@ -2951,7 +2884,6 @@ xfs_rename( } if (error) goto out_trans_cancel; - cancel_flags = XFS_TRANS_RELEASE_LOG_RES; /* * Attach the dquots to the inodes @@ -3022,10 +2954,8 @@ xfs_rename( error = xfs_dir_createname(tp, target_dp, target_name, src_ip->i_ino, &first_block, &free_list, spaceres); - if (error == -ENOSPC) - goto out_bmap_cancel; if (error) - goto out_trans_abort; + goto out_bmap_cancel; xfs_trans_ichgtime(tp, target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -3033,7 +2963,7 @@ xfs_rename( if (new_parent && src_is_directory) { error = xfs_bumplink(tp, target_dp); if (error) - goto out_trans_abort; + goto out_bmap_cancel; } } else { /* target_ip != NULL */ /* @@ -3065,7 +2995,7 @@ xfs_rename( src_ip->i_ino, &first_block, &free_list, spaceres); if (error) - goto out_trans_abort; + goto out_bmap_cancel; xfs_trans_ichgtime(tp, target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -3076,7 +3006,7 @@ xfs_rename( */ error = xfs_droplink(tp, target_ip); if (error) - goto out_trans_abort; + goto out_bmap_cancel; if (src_is_directory) { /* @@ -3084,7 +3014,7 @@ xfs_rename( */ error = xfs_droplink(tp, target_ip); if (error) - goto out_trans_abort; + goto out_bmap_cancel; } } /* target_ip != NULL */ @@ -3101,7 +3031,7 @@ xfs_rename( &first_block, &free_list, spaceres); ASSERT(error != -EEXIST); if (error) - goto out_trans_abort; + goto out_bmap_cancel; } /* @@ -3127,7 +3057,7 @@ xfs_rename( */ error = xfs_droplink(tp, src_dp); if (error) - goto out_trans_abort; + goto out_bmap_cancel; } /* @@ -3142,7 +3072,7 @@ xfs_rename( error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, &first_block, &free_list, spaceres); if (error) - goto out_trans_abort; + goto out_bmap_cancel; /* * For whiteouts, we need to bump the link count on the whiteout inode. @@ -3156,10 +3086,10 @@ xfs_rename( ASSERT(VFS_I(wip)->i_nlink == 0 && wip->i_d.di_nlink == 0); error = xfs_bumplink(tp, wip); if (error) - goto out_trans_abort; + goto out_bmap_cancel; error = xfs_iunlink_remove(tp, wip); if (error) - goto out_trans_abort; + goto out_bmap_cancel; xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE); /* @@ -3180,12 +3110,10 @@ xfs_rename( IRELE(wip); return error; -out_trans_abort: - cancel_flags |= XFS_TRANS_ABORT; out_bmap_cancel: xfs_bmap_cancel(&free_list); out_trans_cancel: - xfs_trans_cancel(tp, cancel_flags); + xfs_trans_cancel(tp); if (wip) IRELE(wip); return error; @@ -3464,7 +3392,7 @@ xfs_iflush_int( ASSERT(ip->i_d.di_version > 1); /* set *dip = inode's place in the buffer */ - dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); + dip = xfs_buf_offset(bp, ip->i_imap.im_boffset); if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 87f67c6b654c..ea7d85af5310 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -336,7 +336,7 @@ xfs_set_dmattrs( tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); if (error) { - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -346,7 +346,7 @@ xfs_set_dmattrs( ip->i_d.di_dmstate = state; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = xfs_trans_commit(tp, 0); + error = xfs_trans_commit(tp); return error; } @@ -1076,7 +1076,7 @@ xfs_ioctl_setattr_get_trans( return tp; out_cancel: - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); return ERR_PTR(error); } @@ -1253,7 +1253,7 @@ xfs_ioctl_setattr( else ip->i_d.di_extsize = 0; - code = xfs_trans_commit(tp, 0); + code = xfs_trans_commit(tp); /* * Release any dquot(s) the inode had kept before chown. @@ -1265,7 +1265,7 @@ xfs_ioctl_setattr( return code; error_trans_cancel: - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); error_free_dquots: xfs_qm_dqrele(udqp); xfs_qm_dqrele(pdqp); @@ -1338,11 +1338,11 @@ xfs_ioc_setxflags( error = xfs_ioctl_setattr_xflags(tp, ip, &fa); if (error) { - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); goto out_drop_write; } - error = xfs_trans_commit(tp, 0); + error = xfs_trans_commit(tp); out_drop_write: mnt_drop_write_file(filp); return error; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 38e633bad8c2..1f86033171c8 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -183,7 +183,7 @@ xfs_iomap_write_direct( * Check for running out of space, note: need lock to return */ if (error) { - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); return error; } @@ -213,7 +213,7 @@ xfs_iomap_write_direct( error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) goto out_bmap_cancel; - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp); if (error) goto out_unlock; @@ -236,7 +236,7 @@ out_bmap_cancel: xfs_bmap_cancel(&free_list); xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); out_trans_cancel: - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); + xfs_trans_cancel(tp); goto out_unlock; } @@ -690,7 +690,7 @@ xfs_iomap_write_allocate( error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, nres, 0); if (error) { - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -760,7 +760,7 @@ xfs_iomap_write_allocate( if (error) goto trans_cancel; - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp); if (error) goto error0; @@ -791,7 +791,7 @@ xfs_iomap_write_allocate( trans_cancel: xfs_bmap_cancel(&free_list); - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); + xfs_trans_cancel(tp); error0: xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; @@ -853,7 +853,7 @@ xfs_iomap_write_unwritten( error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0); if (error) { - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); return error; } @@ -890,7 +890,7 @@ xfs_iomap_write_unwritten( if (error) goto error_on_bmapi_transaction; - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) return error; @@ -914,7 +914,7 @@ xfs_iomap_write_unwritten( error_on_bmapi_transaction: xfs_bmap_cancel(&free_list); - xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); + xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 7f51f39f8acc..766b23f86ce9 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -699,7 +699,7 @@ xfs_setattr_nonsize( if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); + error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -730,7 +730,7 @@ xfs_setattr_nonsize( return 0; out_trans_cancel: - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); out_dqrele: xfs_qm_dqrele(udqp); @@ -752,7 +752,6 @@ xfs_setattr_size( struct xfs_trans *tp; int error; uint lock_flags = 0; - uint commit_flags = 0; bool did_zeroing = false; trace_xfs_setattr(ip); @@ -848,7 +847,11 @@ xfs_setattr_size( * to hope that the caller sees ENOMEM and retries the truncate * operation. */ - error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); + if (IS_DAX(inode)) + error = dax_truncate_page(inode, newsize, xfs_get_blocks_direct); + else + error = block_truncate_page(inode->i_mapping, newsize, + xfs_get_blocks); if (error) return error; truncate_setsize(inode, newsize); @@ -858,7 +861,6 @@ xfs_setattr_size( if (error) goto out_trans_cancel; - commit_flags = XFS_TRANS_RELEASE_LOG_RES; lock_flags |= XFS_ILOCK_EXCL; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); @@ -898,7 +900,7 @@ xfs_setattr_size( if (newsize <= oldsize) { error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize); if (error) - goto out_trans_abort; + goto out_trans_cancel; /* * Truncated "down", so we're removing references to old data @@ -925,16 +927,14 @@ xfs_setattr_size( if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp); out_unlock: if (lock_flags) xfs_iunlock(ip, lock_flags); return error; -out_trans_abort: - commit_flags |= XFS_TRANS_ABORT; out_trans_cancel: - xfs_trans_cancel(tp, commit_flags); + xfs_trans_cancel(tp); goto out_unlock; } @@ -981,7 +981,7 @@ xfs_vn_update_time( tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); if (error) { - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); return error; } @@ -1003,7 +1003,7 @@ xfs_vn_update_time( } xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); - return xfs_trans_commit(tp, 0); + return xfs_trans_commit(tp); } #define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) @@ -1188,22 +1188,22 @@ xfs_diflags_to_iflags( struct inode *inode, struct xfs_inode *ip) { - if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) + uint16_t flags = ip->i_d.di_flags; + + inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC | + S_NOATIME | S_DAX); + + if (flags & XFS_DIFLAG_IMMUTABLE) inode->i_flags |= S_IMMUTABLE; - else - inode->i_flags &= ~S_IMMUTABLE; - if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) + if (flags & XFS_DIFLAG_APPEND) inode->i_flags |= S_APPEND; - else - inode->i_flags &= ~S_APPEND; - if (ip->i_d.di_flags & XFS_DIFLAG_SYNC) + if (flags & XFS_DIFLAG_SYNC) inode->i_flags |= S_SYNC; - else - inode->i_flags &= ~S_SYNC; - if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME) + if (flags & XFS_DIFLAG_NOATIME) inode->i_flags |= S_NOATIME; - else - inode->i_flags &= ~S_NOATIME; + /* XXX: Also needs an on-disk per inode flag! */ + if (ip->i_mount->m_flags & XFS_MOUNT_DAX) + inode->i_flags |= S_DAX; } /* diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 80429891dc9b..f41b0c3fddab 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -252,7 +252,7 @@ xfs_bulkstat_grab_ichunk( } irec->ir_free |= xfs_inobt_maskn(0, idx); - *icount = XFS_INODES_PER_CHUNK - irec->ir_freecount; + *icount = irec->ir_count - irec->ir_freecount; } return 0; @@ -415,6 +415,8 @@ xfs_bulkstat( goto del_cursor; if (icount) { irbp->ir_startino = r.ir_startino; + irbp->ir_holemask = r.ir_holemask; + irbp->ir_count = r.ir_count; irbp->ir_freecount = r.ir_freecount; irbp->ir_free = r.ir_free; irbp++; @@ -447,13 +449,15 @@ xfs_bulkstat( * If this chunk has any allocated inodes, save it. * Also start read-ahead now for this chunk. */ - if (r.ir_freecount < XFS_INODES_PER_CHUNK) { + if (r.ir_freecount < r.ir_count) { xfs_bulkstat_ichunk_ra(mp, agno, &r); irbp->ir_startino = r.ir_startino; + irbp->ir_holemask = r.ir_holemask; + irbp->ir_count = r.ir_count; irbp->ir_freecount = r.ir_freecount; irbp->ir_free = r.ir_free; irbp++; - icount += XFS_INODES_PER_CHUNK - r.ir_freecount; + icount += r.ir_count - r.ir_freecount; } error = xfs_btree_increment(cur, 0, &stat); if (error || stat == 0) { @@ -599,8 +603,7 @@ xfs_inumbers( agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1; buffer[bufidx].xi_startino = XFS_AGINO_TO_INO(mp, agno, r.ir_startino); - buffer[bufidx].xi_alloccount = - XFS_INODES_PER_CHUNK - r.ir_freecount; + buffer[bufidx].xi_alloccount = r.ir_count - r.ir_freecount; buffer[bufidx].xi_allocmask = ~r.ir_free; if (++bufidx == bcount) { long written; diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 7c7842c85a08..85f883dd6207 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -32,26 +32,12 @@ typedef unsigned int __uint32_t; typedef signed long long int __int64_t; typedef unsigned long long int __uint64_t; -typedef __uint32_t inst_t; /* an instruction */ - typedef __s64 xfs_off_t; /* <file offset> type */ typedef unsigned long long xfs_ino_t; /* <inode> type */ typedef __s64 xfs_daddr_t; /* <disk address> type */ -typedef char * xfs_caddr_t; /* <core address> type */ typedef __u32 xfs_dev_t; typedef __u32 xfs_nlink_t; -/* __psint_t is the same size as a pointer */ -#if (BITS_PER_LONG == 32) -typedef __int32_t __psint_t; -typedef __uint32_t __psunsigned_t; -#elif (BITS_PER_LONG == 64) -typedef __int64_t __psint_t; -typedef __uint64_t __psunsigned_t; -#else -#error BITS_PER_LONG must be 32 or 64 -#endif - #include "xfs_types.h" #include "kmem.h" diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index bcc7cfabb787..08d4fe46f0fa 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -109,7 +109,7 @@ xlog_ungrant_log_space( STATIC void xlog_verify_dest_ptr( struct xlog *log, - char *ptr); + void *ptr); STATIC void xlog_verify_grant_tail( struct xlog *log); @@ -513,7 +513,7 @@ xfs_log_done( struct xfs_mount *mp, struct xlog_ticket *ticket, struct xlog_in_core **iclog, - uint flags) + bool regrant) { struct xlog *log = mp->m_log; xfs_lsn_t lsn = 0; @@ -526,14 +526,11 @@ xfs_log_done( (((ticket->t_flags & XLOG_TIC_INITED) == 0) && (xlog_commit_record(log, ticket, iclog, &lsn)))) { lsn = (xfs_lsn_t) -1; - if (ticket->t_flags & XLOG_TIC_PERM_RESERV) { - flags |= XFS_LOG_REL_PERM_RESERV; - } + regrant = false; } - if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 || - (flags & XFS_LOG_REL_PERM_RESERV)) { + if (!regrant) { trace_xfs_log_done_nonperm(log, ticket); /* @@ -541,7 +538,6 @@ xfs_log_done( * request has been made to release a permanent reservation. */ xlog_ungrant_log_space(log, ticket); - xfs_log_ticket_put(ticket); } else { trace_xfs_log_done_perm(log, ticket); @@ -553,6 +549,7 @@ xfs_log_done( ticket->t_flags |= XLOG_TIC_INITED; } + xfs_log_ticket_put(ticket); return lsn; } @@ -1447,7 +1444,7 @@ xlog_alloc_log( iclog->ic_bp = bp; iclog->ic_data = bp->b_addr; #ifdef DEBUG - log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); + log->l_iclog_bak[i] = &iclog->ic_header; #endif head = &iclog->ic_header; memset(head, 0, sizeof(xlog_rec_header_t)); @@ -1602,7 +1599,7 @@ xlog_pack_data( int i, j, k; int size = iclog->ic_offset + roundoff; __be32 cycle_lsn; - xfs_caddr_t dp; + char *dp; cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn); @@ -3664,7 +3661,7 @@ xlog_ticket_alloc( void xlog_verify_dest_ptr( struct xlog *log, - char *ptr) + void *ptr) { int i; int good_ptr = 0; @@ -3767,9 +3764,8 @@ xlog_verify_iclog( xlog_op_header_t *ophead; xlog_in_core_t *icptr; xlog_in_core_2_t *xhdr; - xfs_caddr_t ptr; - xfs_caddr_t base_ptr; - __psint_t field_offset; + void *base_ptr, *ptr, *p; + ptrdiff_t field_offset; __uint8_t clientid; int len, i, j, k, op_len; int idx; @@ -3788,9 +3784,9 @@ xlog_verify_iclog( if (iclog->ic_header.h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) xfs_emerg(log->l_mp, "%s: invalid magic num", __func__); - ptr = (xfs_caddr_t) &iclog->ic_header; - for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count; - ptr += BBSIZE) { + base_ptr = ptr = &iclog->ic_header; + p = &iclog->ic_header; + for (ptr += BBSIZE; ptr < base_ptr + count; ptr += BBSIZE) { if (*(__be32 *)ptr == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) xfs_emerg(log->l_mp, "%s: unexpected magic num", __func__); @@ -3798,20 +3794,19 @@ xlog_verify_iclog( /* check fields */ len = be32_to_cpu(iclog->ic_header.h_num_logops); - ptr = iclog->ic_datap; - base_ptr = ptr; - ophead = (xlog_op_header_t *)ptr; + base_ptr = ptr = iclog->ic_datap; + ophead = ptr; xhdr = iclog->ic_data; for (i = 0; i < len; i++) { - ophead = (xlog_op_header_t *)ptr; + ophead = ptr; /* clientid is only 1 byte */ - field_offset = (__psint_t) - ((xfs_caddr_t)&(ophead->oh_clientid) - base_ptr); + p = &ophead->oh_clientid; + field_offset = p - base_ptr; if (!syncing || (field_offset & 0x1ff)) { clientid = ophead->oh_clientid; } else { - idx = BTOBBT((xfs_caddr_t)&(ophead->oh_clientid) - iclog->ic_datap); + idx = BTOBBT((char *)&ophead->oh_clientid - iclog->ic_datap); if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) { j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); @@ -3829,13 +3824,13 @@ xlog_verify_iclog( (unsigned long)field_offset); /* check length */ - field_offset = (__psint_t) - ((xfs_caddr_t)&(ophead->oh_len) - base_ptr); + p = &ophead->oh_len; + field_offset = p - base_ptr; if (!syncing || (field_offset & 0x1ff)) { op_len = be32_to_cpu(ophead->oh_len); } else { - idx = BTOBBT((__psint_t)&ophead->oh_len - - (__psint_t)iclog->ic_datap); + idx = BTOBBT((uintptr_t)&ophead->oh_len - + (uintptr_t)iclog->ic_datap); if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) { j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 84e0deb95abd..fa27aaec72cb 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -111,15 +111,6 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) #define XFS_LSN_CMP(x,y) _lsn_cmp(x,y) /* - * Macros, structures, prototypes for interface to the log manager. - */ - -/* - * Flags to xfs_log_done() - */ -#define XFS_LOG_REL_PERM_RESERV 0x1 - -/* * Flags to xfs_log_force() * * XFS_LOG_SYNC: Synchronous force in-core log to disk @@ -138,7 +129,7 @@ struct xfs_log_callback; xfs_lsn_t xfs_log_done(struct xfs_mount *mp, struct xlog_ticket *ticket, struct xlog_in_core **iclog, - uint flags); + bool regrant); int _xfs_log_force(struct xfs_mount *mp, uint flags, int *log_forced); @@ -183,7 +174,7 @@ struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); void xfs_log_ticket_put(struct xlog_ticket *ticket); void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_lsn_t *commit_lsn, int flags); + xfs_lsn_t *commit_lsn, bool regrant); bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); void xfs_log_work_queue(struct xfs_mount *mp); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 45cc0ce18adf..abc2ccbff739 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -624,7 +624,7 @@ restart: spin_unlock(&cil->xc_push_lock); /* xfs_log_done always frees the ticket on error. */ - commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0); + commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, false); if (commit_lsn == -1) goto out_abort; @@ -773,14 +773,10 @@ xfs_log_commit_cil( struct xfs_mount *mp, struct xfs_trans *tp, xfs_lsn_t *commit_lsn, - int flags) + bool regrant) { struct xlog *log = mp->m_log; struct xfs_cil *cil = log->l_cilp; - int log_flags = 0; - - if (flags & XFS_TRANS_RELEASE_LOG_RES) - log_flags = XFS_LOG_REL_PERM_RESERV; /* lock out background commit */ down_read(&cil->xc_ctx_lock); @@ -795,7 +791,7 @@ xfs_log_commit_cil( if (commit_lsn) *commit_lsn = tp->t_commit_lsn; - xfs_log_done(mp, tp->t_ticket, NULL, log_flags); + xfs_log_done(mp, tp->t_ticket, NULL, regrant); xfs_trans_unreserve_and_mod_sb(tp); /* @@ -809,7 +805,7 @@ xfs_log_commit_cil( * the log items. This affects (at least) processing of stale buffers, * inodes and EFIs. */ - xfs_trans_free_items(tp, tp->t_commit_lsn, 0); + xfs_trans_free_items(tp, tp->t_commit_lsn, false); xlog_cil_push_background(log); diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index db7cbdeb2b42..1c87c8abfbed 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -409,7 +409,7 @@ struct xlog { /* The following field are used for debugging; need to hold icloglock */ #ifdef DEBUG - char *l_iclog_bak[XLOG_MAX_ICLOGS]; + void *l_iclog_bak[XLOG_MAX_ICLOGS]; #endif }; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 4f5784f85a5b..01dd228ca05e 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -147,7 +147,7 @@ xlog_put_bp( * Return the address of the start of the given block number's data * in a log buffer. The buffer covers a log sector-aligned region. */ -STATIC xfs_caddr_t +STATIC char * xlog_align( struct xlog *log, xfs_daddr_t blk_no, @@ -203,7 +203,7 @@ xlog_bread( xfs_daddr_t blk_no, int nbblks, struct xfs_buf *bp, - xfs_caddr_t *offset) + char **offset) { int error; @@ -225,9 +225,9 @@ xlog_bread_offset( xfs_daddr_t blk_no, /* block to read from */ int nbblks, /* blocks to read */ struct xfs_buf *bp, - xfs_caddr_t offset) + char *offset) { - xfs_caddr_t orig_offset = bp->b_addr; + char *orig_offset = bp->b_addr; int orig_len = BBTOB(bp->b_length); int error, error2; @@ -396,7 +396,7 @@ xlog_find_cycle_start( xfs_daddr_t *last_blk, uint cycle) { - xfs_caddr_t offset; + char *offset; xfs_daddr_t mid_blk; xfs_daddr_t end_blk; uint mid_cycle; @@ -443,7 +443,7 @@ xlog_find_verify_cycle( uint cycle; xfs_buf_t *bp; xfs_daddr_t bufblks; - xfs_caddr_t buf = NULL; + char *buf = NULL; int error = 0; /* @@ -509,7 +509,7 @@ xlog_find_verify_log_record( { xfs_daddr_t i; xfs_buf_t *bp; - xfs_caddr_t offset = NULL; + char *offset = NULL; xlog_rec_header_t *head = NULL; int error = 0; int smallmem = 0; @@ -616,7 +616,7 @@ xlog_find_head( xfs_daddr_t *return_head_blk) { xfs_buf_t *bp; - xfs_caddr_t offset; + char *offset; xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; int num_scan_bblks; uint first_half_cycle, last_half_cycle; @@ -891,7 +891,7 @@ xlog_find_tail( { xlog_rec_header_t *rhead; xlog_op_header_t *op_head; - xfs_caddr_t offset = NULL; + char *offset = NULL; xfs_buf_t *bp; int error, i, found; xfs_daddr_t umount_data_blk; @@ -1099,7 +1099,7 @@ xlog_find_zeroed( xfs_daddr_t *blk_no) { xfs_buf_t *bp; - xfs_caddr_t offset; + char *offset; uint first_cycle, last_cycle; xfs_daddr_t new_blk, last_blk, start_blk; xfs_daddr_t num_scan_bblks; @@ -1199,7 +1199,7 @@ bp_err: STATIC void xlog_add_record( struct xlog *log, - xfs_caddr_t buf, + char *buf, int cycle, int block, int tail_cycle, @@ -1227,7 +1227,7 @@ xlog_write_log_records( int tail_cycle, int tail_block) { - xfs_caddr_t offset; + char *offset; xfs_buf_t *bp; int balign, ealign; int sectbb = log->l_sectBBsize; @@ -1789,8 +1789,7 @@ xlog_recover_do_inode_buffer( return -EFSCORRUPTED; } - buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, - next_unlinked_offset); + buffer_nextp = xfs_buf_offset(bp, next_unlinked_offset); *buffer_nextp = *logged_nextp; /* @@ -1798,7 +1797,7 @@ xlog_recover_do_inode_buffer( * have to leave the inode in a consistent state for whoever * reads it next.... */ - xfs_dinode_calc_crc(mp, (struct xfs_dinode *) + xfs_dinode_calc_crc(mp, xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize)); } @@ -2503,8 +2502,8 @@ xlog_recover_inode_pass2( xfs_buf_t *bp; xfs_dinode_t *dip; int len; - xfs_caddr_t src; - xfs_caddr_t dest; + char *src; + char *dest; int error; int attr_index; uint fields; @@ -2546,7 +2545,7 @@ xlog_recover_inode_pass2( goto out_release; } ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); - dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); + dip = xfs_buf_offset(bp, in_f->ilf_boffset); /* * Make sure the place we're flushing out to really looks @@ -2885,7 +2884,7 @@ xlog_recover_dquot_pass2( return error; ASSERT(bp); - ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset); + ddq = xfs_buf_offset(bp, dq_f->qlf_boffset); /* * If the dquot has an LSN in it, recover the dquot only if it's less @@ -3068,12 +3067,22 @@ xlog_recover_do_icreate_pass2( return -EINVAL; } - /* existing allocation is fixed value */ - ASSERT(count == mp->m_ialloc_inos); - ASSERT(length == mp->m_ialloc_blks); - if (count != mp->m_ialloc_inos || - length != mp->m_ialloc_blks) { - xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2"); + /* + * The inode chunk is either full or sparse and we only support + * m_ialloc_min_blks sized sparse allocations at this time. + */ + if (length != mp->m_ialloc_blks && + length != mp->m_ialloc_min_blks) { + xfs_warn(log->l_mp, + "%s: unsupported chunk length", __FUNCTION__); + return -EINVAL; + } + + /* verify inode count is consistent with extent length */ + if ((count >> mp->m_sb.sb_inopblog) != length) { + xfs_warn(log->l_mp, + "%s: inconsistent inode count and chunk length", + __FUNCTION__); return -EINVAL; } @@ -3091,8 +3100,8 @@ xlog_recover_do_icreate_pass2( XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0)) return 0; - xfs_ialloc_inode_init(mp, NULL, buffer_list, agno, agbno, length, - be32_to_cpu(icl->icl_gen)); + xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno, length, + be32_to_cpu(icl->icl_gen)); return 0; } @@ -3364,17 +3373,17 @@ STATIC int xlog_recover_add_to_cont_trans( struct xlog *log, struct xlog_recover *trans, - xfs_caddr_t dp, + char *dp, int len) { xlog_recover_item_t *item; - xfs_caddr_t ptr, old_ptr; + char *ptr, *old_ptr; int old_len; if (list_empty(&trans->r_itemq)) { /* finish copying rest of trans header */ xlog_recover_add_item(&trans->r_itemq); - ptr = (xfs_caddr_t) &trans->r_theader + + ptr = (char *)&trans->r_theader + sizeof(xfs_trans_header_t) - len; memcpy(ptr, dp, len); return 0; @@ -3410,12 +3419,12 @@ STATIC int xlog_recover_add_to_trans( struct xlog *log, struct xlog_recover *trans, - xfs_caddr_t dp, + char *dp, int len) { xfs_inode_log_format_t *in_f; /* any will do */ xlog_recover_item_t *item; - xfs_caddr_t ptr; + char *ptr; if (!len) return 0; @@ -3504,7 +3513,7 @@ STATIC int xlog_recovery_process_trans( struct xlog *log, struct xlog_recover *trans, - xfs_caddr_t dp, + char *dp, unsigned int len, unsigned int flags, int pass) @@ -3611,8 +3620,8 @@ xlog_recover_process_ophdr( struct hlist_head rhash[], struct xlog_rec_header *rhead, struct xlog_op_header *ohead, - xfs_caddr_t dp, - xfs_caddr_t end, + char *dp, + char *end, int pass) { struct xlog_recover *trans; @@ -3661,11 +3670,11 @@ xlog_recover_process_data( struct xlog *log, struct hlist_head rhash[], struct xlog_rec_header *rhead, - xfs_caddr_t dp, + char *dp, int pass) { struct xlog_op_header *ohead; - xfs_caddr_t end; + char *end; int num_logops; int error; @@ -3751,11 +3760,11 @@ xlog_recover_process_efi( } set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); - error = xfs_trans_commit(tp, 0); + error = xfs_trans_commit(tp); return error; abort_error: - xfs_trans_cancel(tp, XFS_TRANS_ABORT); + xfs_trans_cancel(tp); return error; } @@ -3857,13 +3866,13 @@ xlog_recover_clear_agi_bucket( xfs_trans_log_buf(tp, agibp, offset, (offset + sizeof(xfs_agino_t) - 1)); - error = xfs_trans_commit(tp, 0); + error = xfs_trans_commit(tp); if (error) goto out_error; return; out_abort: - xfs_trans_cancel(tp, XFS_TRANS_ABORT); + xfs_trans_cancel(tp); out_error: xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno); return; @@ -4010,7 +4019,7 @@ xlog_recover_process_iunlinks( STATIC int xlog_unpack_data_crc( struct xlog_rec_header *rhead, - xfs_caddr_t dp, + char *dp, struct xlog *log) { __le32 crc; @@ -4040,7 +4049,7 @@ xlog_unpack_data_crc( STATIC int xlog_unpack_data( struct xlog_rec_header *rhead, - xfs_caddr_t dp, + char *dp, struct xlog *log) { int i, j, k; @@ -4122,7 +4131,7 @@ xlog_do_recovery_pass( { xlog_rec_header_t *rhead; xfs_daddr_t blk_no; - xfs_caddr_t offset; + char *offset; xfs_buf_t *hbp, *dbp; int error = 0, h_size; int bblks, split_bblks; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 6f23fbdfb365..461e791efad7 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -725,6 +725,22 @@ xfs_mountfs( } /* + * If enabled, sparse inode chunk alignment is expected to match the + * cluster size. Full inode chunk alignment must match the chunk size, + * but that is checked on sb read verification... + */ + if (xfs_sb_version_hassparseinodes(&mp->m_sb) && + mp->m_sb.sb_spino_align != + XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) { + xfs_warn(mp, + "Sparse inode block alignment (%u) must match cluster size (%llu).", + mp->m_sb.sb_spino_align, + XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)); + error = -EINVAL; + goto out_remove_uuid; + } + + /* * Set inode alignment fields */ xfs_set_inoalignment(mp); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 8c995a2ccb6f..7999e91cd49a 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -101,6 +101,8 @@ typedef struct xfs_mount { __uint64_t m_flags; /* global mount flags */ int m_ialloc_inos; /* inodes in inode allocation */ int m_ialloc_blks; /* blocks in inode allocation */ + int m_ialloc_min_blks;/* min blocks in sparse inode + * allocation */ int m_inoalign_mask;/* mask sb_inoalignmt if used */ uint m_qflags; /* quota status flags */ struct xfs_trans_resv m_resv; /* precomputed res values */ @@ -179,6 +181,8 @@ typedef struct xfs_mount { allocator */ #define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */ +#define XFS_MOUNT_DAX (1ULL << 62) /* TEST ONLY! */ + /* * Default minimum read and write sizes. diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 981a657eca39..ab4a6066f7ca 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -306,7 +306,7 @@ xfs_fs_commit_blocks( tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); if (error) { - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); goto out_drop_iolock; } @@ -321,7 +321,7 @@ xfs_fs_commit_blocks( } xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); + error = xfs_trans_commit(tp); out_drop_iolock: xfs_iunlock(ip, XFS_IOLOCK_EXCL); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 5538468c7f63..eac9549efd52 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -756,7 +756,7 @@ xfs_qm_qino_alloc( error = xfs_trans_reserve(tp, &M_RES(mp)->tr_create, XFS_QM_QINOCREATE_SPACE_RES(mp), 0); if (error) { - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); return error; } @@ -764,8 +764,7 @@ xfs_qm_qino_alloc( error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed); if (error) { - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | - XFS_TRANS_ABORT); + xfs_trans_cancel(tp); return error; } } @@ -796,7 +795,7 @@ xfs_qm_qino_alloc( spin_unlock(&mp->m_sb_lock); xfs_log_sb(tp); - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp); if (error) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); xfs_alert(mp, "%s failed (error %d)!", __func__, error); diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 9a25c9275fb3..3640c6e896af 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -239,7 +239,7 @@ xfs_qm_scall_trunc_qfile( tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); if (error) { - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_IOLOCK_EXCL); goto out_put; } @@ -252,15 +252,14 @@ xfs_qm_scall_trunc_qfile( error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0); if (error) { - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | - XFS_TRANS_ABORT); + xfs_trans_cancel(tp); goto out_unlock; } ASSERT(ip->i_d.di_nextents == 0); xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp); out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); @@ -437,7 +436,7 @@ xfs_qm_scall_setqlim( tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_setqlim, 0, 0); if (error) { - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); goto out_rele; } @@ -548,7 +547,7 @@ xfs_qm_scall_setqlim( dqp->dq_flags |= XFS_DQ_DIRTY; xfs_trans_log_dquot(tp, dqp); - error = xfs_trans_commit(tp, 0); + error = xfs_trans_commit(tp); out_rele: xfs_qm_dqrele(dqp); @@ -571,7 +570,7 @@ xfs_qm_log_quotaoff_end( error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_equotaoff, 0, 0); if (error) { - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); return error; } @@ -585,8 +584,7 @@ xfs_qm_log_quotaoff_end( * We don't care about quotoff's performance. */ xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); - return error; + return xfs_trans_commit(tp); } @@ -605,7 +603,7 @@ xfs_qm_log_quotaoff( tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_quotaoff, 0, 0); if (error) { - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); goto out; } @@ -624,7 +622,7 @@ xfs_qm_log_quotaoff( * We don't care about quotoff's performance. */ xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); + error = xfs_trans_commit(tp); if (error) goto out; diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 5376dd406ba2..ce6506adab7b 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -55,7 +55,6 @@ struct xfs_trans; typedef struct xfs_dqtrx { struct xfs_dquot *qt_dquot; /* the dquot this refers to */ ulong qt_blk_res; /* blks reserved on a dquot */ - ulong qt_blk_res_used; /* blks used from the reservation */ ulong qt_ino_res; /* inode reserved on a dquot */ ulong qt_ino_res_used; /* inodes used from the reservation */ long qt_bcount_delta; /* dquot blk count changes */ diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index f2079b6911cc..f4e8c06eee26 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -780,7 +780,6 @@ xfs_growfs_rt_alloc( * Allocate space to the file, as necessary. */ while (oblocks < nblocks) { - int cancelflags = 0; xfs_trans_t *tp; tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC); @@ -792,7 +791,6 @@ xfs_growfs_rt_alloc( resblks, 0); if (error) goto error_cancel; - cancelflags = XFS_TRANS_RELEASE_LOG_RES; /* * Lock the inode. */ @@ -804,7 +802,6 @@ xfs_growfs_rt_alloc( * Allocate blocks to the bitmap file. */ nmap = 1; - cancelflags |= XFS_TRANS_ABORT; error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks, XFS_BMAPI_METADATA, &firstblock, resblks, &map, &nmap, &flist); @@ -818,14 +815,13 @@ xfs_growfs_rt_alloc( error = xfs_bmap_finish(&tp, &flist, &committed); if (error) goto error_cancel; - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp); if (error) goto error; /* * Now we need to clear the allocated blocks. * Do this one block per transaction, to keep it simple. */ - cancelflags = 0; for (bno = map.br_startoff, fsbno = map.br_startblock; bno < map.br_startoff + map.br_blockcount; bno++, fsbno++) { @@ -851,7 +847,7 @@ xfs_growfs_rt_alloc( if (bp == NULL) { error = -EIO; error_cancel: - xfs_trans_cancel(tp, cancelflags); + xfs_trans_cancel(tp); goto error; } memset(bp->b_addr, 0, mp->m_sb.sb_blocksize); @@ -859,7 +855,7 @@ error_cancel: /* * Commit the transaction. */ - error = xfs_trans_commit(tp, 0); + error = xfs_trans_commit(tp); if (error) goto error; } @@ -973,7 +969,6 @@ xfs_growfs_rt( bmbno < nrbmblocks; bmbno++) { xfs_trans_t *tp; - int cancelflags = 0; *nmp = *mp; nsbp = &nmp->m_sb; @@ -1015,7 +1010,6 @@ xfs_growfs_rt( mp->m_rbmip->i_d.di_size = nsbp->sb_rbmblocks * nsbp->sb_blocksize; xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); - cancelflags |= XFS_TRANS_ABORT; /* * Get the summary inode into the transaction. */ @@ -1062,7 +1056,7 @@ xfs_growfs_rt( nsbp->sb_rextents - sbp->sb_rextents, &bp, &sumbno); if (error) { error_cancel: - xfs_trans_cancel(tp, cancelflags); + xfs_trans_cancel(tp); break; } /* @@ -1076,7 +1070,7 @@ error_cancel: mp->m_rsumlevels = nrsumlevels; mp->m_rsumsize = nrsumsize; - error = xfs_trans_commit(tp, 0); + error = xfs_trans_commit(tp); if (error) break; } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 858e1e62bbaa..1fb16562c159 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -112,6 +112,8 @@ static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */ #define MNTOPT_DISCARD "discard" /* Discard unused blocks */ #define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */ +#define MNTOPT_DAX "dax" /* Enable direct access to bdev pages */ + /* * Table driven mount option parser. * @@ -363,6 +365,10 @@ xfs_parseargs( mp->m_flags |= XFS_MOUNT_DISCARD; } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { mp->m_flags &= ~XFS_MOUNT_DISCARD; +#ifdef CONFIG_FS_DAX + } else if (!strcmp(this_char, MNTOPT_DAX)) { + mp->m_flags |= XFS_MOUNT_DAX; +#endif } else { xfs_warn(mp, "unknown mount option [%s].", this_char); return -EINVAL; @@ -452,8 +458,8 @@ done: } struct proc_xfs_info { - int flag; - char *str; + uint64_t flag; + char *str; }; STATIC int @@ -474,6 +480,7 @@ xfs_showargs( { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD }, { XFS_MOUNT_SMALL_INUMS, "," MNTOPT_32BITINODE }, + { XFS_MOUNT_DAX, "," MNTOPT_DAX }, { 0, NULL } }; static struct proc_xfs_info xfs_info_unset[] = { @@ -1507,6 +1514,20 @@ xfs_fs_fill_super( if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) sb->s_flags |= MS_I_VERSION; + if (mp->m_flags & XFS_MOUNT_DAX) { + xfs_warn(mp, + "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); + if (sb->s_blocksize != PAGE_SIZE) { + xfs_alert(mp, + "Filesystem block size invalid for DAX Turning DAX off."); + mp->m_flags &= ~XFS_MOUNT_DAX; + } else if (!sb->s_bdev->bd_disk->fops->direct_access) { + xfs_alert(mp, + "Block device does not support DAX Turning DAX off."); + mp->m_flags &= ~XFS_MOUNT_DAX; + } + } + error = xfs_mountfs(mp); if (error) goto out_filestream_unmount; diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 3df411eadb86..4be27b0210af 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -104,7 +104,7 @@ xfs_readlink_bmap( cur_chunk += sizeof(struct xfs_dsymlink_hdr); } - memcpy(link + offset, bp->b_addr, byte_cnt); + memcpy(link + offset, cur_chunk, byte_cnt); pathlen -= byte_cnt; offset += byte_cnt; @@ -178,7 +178,6 @@ xfs_symlink( struct xfs_bmap_free free_list; xfs_fsblock_t first_block; bool unlock_dp_on_error = false; - uint cancel_flags; int committed; xfs_fileoff_t first_fsb; xfs_filblks_t fs_blocks; @@ -224,7 +223,6 @@ xfs_symlink( return error; tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK); - cancel_flags = XFS_TRANS_RELEASE_LOG_RES; /* * The symlink will fit into the inode data fork? * There can't be any attributes so we get the whole variable part. @@ -239,10 +237,8 @@ xfs_symlink( resblks = 0; error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, 0, 0); } - if (error) { - cancel_flags = 0; + if (error) goto out_trans_cancel; - } xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); unlock_dp_on_error = true; @@ -394,7 +390,7 @@ xfs_symlink( if (error) goto out_bmap_cancel; - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp); if (error) goto out_release_inode; @@ -407,9 +403,8 @@ xfs_symlink( out_bmap_cancel: xfs_bmap_cancel(&free_list); - cancel_flags |= XFS_TRANS_ABORT; out_trans_cancel: - xfs_trans_cancel(tp, cancel_flags); + xfs_trans_cancel(tp); out_release_inode: /* * Wait until after the current transaction is aborted to finish the @@ -464,7 +459,7 @@ xfs_inactive_symlink_rmt( tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); if (error) { - xfs_trans_cancel(tp, 0); + xfs_trans_cancel(tp); return error; } @@ -533,7 +528,7 @@ xfs_inactive_symlink_rmt( /* * Commit the transaction containing extent freeing and EFDs. */ - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp); if (error) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); goto error_unlock; @@ -552,7 +547,7 @@ xfs_inactive_symlink_rmt( error_bmap_cancel: xfs_bmap_cancel(&free_list); error_trans_cancel: - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); + xfs_trans_cancel(tp); error_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 615781bf4ee5..8d916d33d93d 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -738,6 +738,53 @@ TRACE_EVENT(xfs_iomap_prealloc_size, __entry->blocks, __entry->shift, __entry->writeio_blocks) ) +TRACE_EVENT(xfs_irec_merge_pre, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t agino, + uint16_t holemask, xfs_agino_t nagino, uint16_t nholemask), + TP_ARGS(mp, agno, agino, holemask, nagino, nholemask), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agino_t, agino) + __field(uint16_t, holemask) + __field(xfs_agino_t, nagino) + __field(uint16_t, nholemask) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->agino = agino; + __entry->holemask = holemask; + __entry->nagino = nagino; + __entry->nholemask = holemask; + ), + TP_printk("dev %d:%d agno %d inobt (%u:0x%x) new (%u:0x%x)", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __entry->agino, __entry->holemask, __entry->nagino, + __entry->nholemask) +) + +TRACE_EVENT(xfs_irec_merge_post, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t agino, + uint16_t holemask), + TP_ARGS(mp, agno, agino, holemask), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agino_t, agino) + __field(uint16_t, holemask) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->agino = agino; + __entry->holemask = holemask; + ), + TP_printk("dev %d:%d agno %d inobt (%u:0x%x)", MAJOR(__entry->dev), + MINOR(__entry->dev), __entry->agno, __entry->agino, + __entry->holemask) +) + #define DEFINE_IREF_EVENT(name) \ DEFINE_EVENT(xfs_iref_class, name, \ TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \ diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 220ef2c906b2..0582a27107d4 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -113,7 +113,7 @@ xfs_trans_free( * blocks. Locks and log items, however, are no inherited. They must * be added to the new transaction explicitly. */ -xfs_trans_t * +STATIC xfs_trans_t * xfs_trans_dup( xfs_trans_t *tp) { @@ -251,14 +251,7 @@ xfs_trans_reserve( */ undo_log: if (resp->tr_logres > 0) { - int log_flags; - - if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES) { - log_flags = XFS_LOG_REL_PERM_RESERV; - } else { - log_flags = 0; - } - xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, log_flags); + xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, false); tp->t_ticket = NULL; tp->t_log_res = 0; tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES; @@ -744,7 +737,7 @@ void xfs_trans_free_items( struct xfs_trans *tp, xfs_lsn_t commit_lsn, - int flags) + bool abort) { struct xfs_log_item_desc *lidp, *next; @@ -755,7 +748,7 @@ xfs_trans_free_items( if (commit_lsn != NULLCOMMITLSN) lip->li_ops->iop_committing(lip, commit_lsn); - if (flags & XFS_TRANS_ABORT) + if (abort) lip->li_flags |= XFS_LI_ABORTED; lip->li_ops->iop_unlock(lip); @@ -892,27 +885,17 @@ xfs_trans_committed_bulk( * have already been unlocked as if the commit had succeeded. * Do not reference the transaction structure after this call. */ -int -xfs_trans_commit( +static int +__xfs_trans_commit( struct xfs_trans *tp, - uint flags) + bool regrant) { struct xfs_mount *mp = tp->t_mountp; xfs_lsn_t commit_lsn = -1; int error = 0; - int log_flags = 0; int sync = tp->t_flags & XFS_TRANS_SYNC; /* - * Determine whether this commit is releasing a permanent - * log reservation or not. - */ - if (flags & XFS_TRANS_RELEASE_LOG_RES) { - ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); - log_flags = XFS_LOG_REL_PERM_RESERV; - } - - /* * If there is nothing to be logged by the transaction, * then unlock all of the items associated with the * transaction and free the transaction structure. @@ -936,7 +919,7 @@ xfs_trans_commit( xfs_trans_apply_sb_deltas(tp); xfs_trans_apply_dquot_deltas(tp); - xfs_log_commit_cil(mp, tp, &commit_lsn, flags); + xfs_log_commit_cil(mp, tp, &commit_lsn, regrant); current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); xfs_trans_free(tp); @@ -964,18 +947,25 @@ out_unreserve: */ xfs_trans_unreserve_and_mod_dquots(tp); if (tp->t_ticket) { - commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, log_flags); + commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, regrant); if (commit_lsn == -1 && !error) error = -EIO; } current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); - xfs_trans_free_items(tp, NULLCOMMITLSN, error ? XFS_TRANS_ABORT : 0); + xfs_trans_free_items(tp, NULLCOMMITLSN, !!error); xfs_trans_free(tp); XFS_STATS_INC(xs_trans_empty); return error; } +int +xfs_trans_commit( + struct xfs_trans *tp) +{ + return __xfs_trans_commit(tp, false); +} + /* * Unlock all of the transaction's items and free the transaction. * The transaction must not have modified any of its items, because @@ -986,29 +976,22 @@ out_unreserve: */ void xfs_trans_cancel( - xfs_trans_t *tp, - int flags) + struct xfs_trans *tp) { - int log_flags; - xfs_mount_t *mp = tp->t_mountp; + struct xfs_mount *mp = tp->t_mountp; + bool dirty = (tp->t_flags & XFS_TRANS_DIRTY); /* - * See if the caller is being too lazy to figure out if - * the transaction really needs an abort. - */ - if ((flags & XFS_TRANS_ABORT) && !(tp->t_flags & XFS_TRANS_DIRTY)) - flags &= ~XFS_TRANS_ABORT; - /* * See if the caller is relying on us to shut down the * filesystem. This happens in paths where we detect * corruption and decide to give up. */ - if ((tp->t_flags & XFS_TRANS_DIRTY) && !XFS_FORCED_SHUTDOWN(mp)) { + if (dirty && !XFS_FORCED_SHUTDOWN(mp)) { XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); } #ifdef DEBUG - if (!(flags & XFS_TRANS_ABORT) && !XFS_FORCED_SHUTDOWN(mp)) { + if (!dirty && !XFS_FORCED_SHUTDOWN(mp)) { struct xfs_log_item_desc *lidp; list_for_each_entry(lidp, &tp->t_items, lid_trans) @@ -1018,27 +1001,20 @@ xfs_trans_cancel( xfs_trans_unreserve_and_mod_sb(tp); xfs_trans_unreserve_and_mod_dquots(tp); - if (tp->t_ticket) { - if (flags & XFS_TRANS_RELEASE_LOG_RES) { - ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); - log_flags = XFS_LOG_REL_PERM_RESERV; - } else { - log_flags = 0; - } - xfs_log_done(mp, tp->t_ticket, NULL, log_flags); - } + if (tp->t_ticket) + xfs_log_done(mp, tp->t_ticket, NULL, false); /* mark this thread as no longer being in a transaction */ current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); - xfs_trans_free_items(tp, NULLCOMMITLSN, flags); + xfs_trans_free_items(tp, NULLCOMMITLSN, dirty); xfs_trans_free(tp); } /* * Roll from one trans in the sequence of PERMANENT transactions to * the next: permanent transactions are only flushed out when - * committed with XFS_TRANS_RELEASE_LOG_RES, but we still want as soon + * committed with xfs_trans_commit(), but we still want as soon * as possible to let chunks of it go to the log. So we commit the * chunk we've been working on and get a new transaction to continue. */ @@ -1055,7 +1031,8 @@ xfs_trans_roll( * Ensure that the inode is always logged. */ trans = *tpp; - xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE); + if (dp) + xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE); /* * Copy the critical parameters from one trans to the next. @@ -1071,20 +1048,13 @@ xfs_trans_roll( * is in progress. The caller takes the responsibility to cancel * the duplicate transaction that gets returned. */ - error = xfs_trans_commit(trans, 0); + error = __xfs_trans_commit(trans, true); if (error) return error; trans = *tpp; /* - * transaction commit worked ok so we can drop the extra ticket - * reference that we gained in xfs_trans_dup() - */ - xfs_log_ticket_put(trans->t_ticket); - - - /* * Reserve space in the log for th next transaction. * This also pushes items in the "AIL", the list of logged items, * out to disk if they are taking up space at the tail of the log @@ -1100,6 +1070,7 @@ xfs_trans_roll( if (error) return error; - xfs_trans_ijoin(trans, dp, 0); + if (dp) + xfs_trans_ijoin(trans, dp, 0); return 0; } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index b5bc1ab3c4da..3b21b4e5e467 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -133,8 +133,6 @@ typedef struct xfs_trans { * XFS transaction mechanism exported interfaces that are * actually macros. */ -#define xfs_trans_get_log_res(tp) ((tp)->t_log_res) -#define xfs_trans_get_log_count(tp) ((tp)->t_log_count) #define xfs_trans_get_block_res(tp) ((tp)->t_blk_res) #define xfs_trans_set_sync(tp) ((tp)->t_flags |= XFS_TRANS_SYNC) @@ -153,7 +151,6 @@ typedef struct xfs_trans { */ xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint); xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, xfs_km_flags_t); -xfs_trans_t *xfs_trans_dup(xfs_trans_t *); int xfs_trans_reserve(struct xfs_trans *, struct xfs_trans_res *, uint, uint); void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t); @@ -228,9 +225,9 @@ void xfs_trans_log_efd_extent(xfs_trans_t *, struct xfs_efd_log_item *, xfs_fsblock_t, xfs_extlen_t); -int xfs_trans_commit(xfs_trans_t *, uint flags); +int xfs_trans_commit(struct xfs_trans *); int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); -void xfs_trans_cancel(xfs_trans_t *, int); +void xfs_trans_cancel(xfs_trans_t *); int xfs_trans_ail_init(struct xfs_mount *); void xfs_trans_ail_destroy(struct xfs_mount *); diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 573aefb5a573..1098cf490189 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -159,7 +159,7 @@ xfs_trans_ail_cursor_next( { struct xfs_log_item *lip = cur->item; - if ((__psint_t)lip & 1) + if ((uintptr_t)lip & 1) lip = xfs_ail_min(ailp); if (lip) cur->item = xfs_ail_next(ailp, lip); @@ -196,7 +196,7 @@ xfs_trans_ail_cursor_clear( list_for_each_entry(cur, &ailp->xa_cursors, list) { if (cur->item == lip) cur->item = (struct xfs_log_item *) - ((__psint_t)cur->item | 1); + ((uintptr_t)cur->item | 1); } } @@ -287,7 +287,7 @@ xfs_ail_splice( * find the place in the AIL where the items belong. */ lip = cur ? cur->item : NULL; - if (!lip || (__psint_t) lip & 1) + if (!lip || (uintptr_t)lip & 1) lip = __xfs_trans_ail_cursor_last(ailp, lsn); /* diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 76a16df55ef7..ce78534a047e 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -90,8 +90,9 @@ xfs_trans_dup_dqinfo( xfs_trans_t *ntp) { xfs_dqtrx_t *oq, *nq; - int i,j; + int i, j; xfs_dqtrx_t *oqa, *nqa; + ulong blk_res_used; if (!otp->t_dqinfo) return; @@ -102,18 +103,23 @@ xfs_trans_dup_dqinfo( * Because the quota blk reservation is carried forward, * it is also necessary to carry forward the DQ_DIRTY flag. */ - if(otp->t_flags & XFS_TRANS_DQ_DIRTY) + if (otp->t_flags & XFS_TRANS_DQ_DIRTY) ntp->t_flags |= XFS_TRANS_DQ_DIRTY; for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { oqa = otp->t_dqinfo->dqs[j]; nqa = ntp->t_dqinfo->dqs[j]; for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { + blk_res_used = 0; + if (oqa[i].qt_dquot == NULL) break; oq = &oqa[i]; nq = &nqa[i]; + if (oq->qt_blk_res && oq->qt_bcount_delta > 0) + blk_res_used = oq->qt_bcount_delta; + nq->qt_dquot = oq->qt_dquot; nq->qt_bcount_delta = nq->qt_icount_delta = 0; nq->qt_rtbcount_delta = 0; @@ -121,8 +127,8 @@ xfs_trans_dup_dqinfo( /* * Transfer whatever is left of the reservations. */ - nq->qt_blk_res = oq->qt_blk_res - oq->qt_blk_res_used; - oq->qt_blk_res = oq->qt_blk_res_used; + nq->qt_blk_res = oq->qt_blk_res - blk_res_used; + oq->qt_blk_res = blk_res_used; nq->qt_rtblk_res = oq->qt_rtblk_res - oq->qt_rtblk_res_used; @@ -239,10 +245,6 @@ xfs_trans_mod_dquot( * disk blocks used. */ case XFS_TRANS_DQ_BCOUNT: - if (qtrx->qt_blk_res && delta > 0) { - qtrx->qt_blk_res_used += (ulong)delta; - ASSERT(qtrx->qt_blk_res >= qtrx->qt_blk_res_used); - } qtrx->qt_bcount_delta += delta; break; @@ -423,15 +425,19 @@ xfs_trans_apply_dquot_deltas( * reservation that a transaction structure knows of. */ if (qtrx->qt_blk_res != 0) { - if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) { - if (qtrx->qt_blk_res > - qtrx->qt_blk_res_used) + ulong blk_res_used = 0; + + if (qtrx->qt_bcount_delta > 0) + blk_res_used = qtrx->qt_bcount_delta; + + if (qtrx->qt_blk_res != blk_res_used) { + if (qtrx->qt_blk_res > blk_res_used) dqp->q_res_bcount -= (xfs_qcnt_t) (qtrx->qt_blk_res - - qtrx->qt_blk_res_used); + blk_res_used); else dqp->q_res_bcount -= (xfs_qcnt_t) - (qtrx->qt_blk_res_used - + (blk_res_used - qtrx->qt_blk_res); } } else { diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index bd1281862ad7..1b736294558a 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -30,7 +30,7 @@ void xfs_trans_init(struct xfs_mount *); void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); void xfs_trans_del_item(struct xfs_log_item *); void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, - int flags); + bool abort); void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv, diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index b43276f339ef..83061cac719b 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -420,7 +420,7 @@ static inline bool is_acpi_node(struct fwnode_handle *fwnode) return fwnode && fwnode->type == FWNODE_ACPI; } -static inline struct acpi_device *acpi_node(struct fwnode_handle *fwnode) +static inline struct acpi_device *to_acpi_node(struct fwnode_handle *fwnode) { return is_acpi_node(fwnode) ? container_of(fwnode, struct acpi_device, fwnode) : NULL; diff --git a/include/acpi/video.h b/include/acpi/video.h index a7d7f1043e9c..e840b294c6f5 100644 --- a/include/acpi/video.h +++ b/include/acpi/video.h @@ -43,7 +43,7 @@ static inline enum acpi_backlight_type acpi_video_get_backlight_type(void) { return acpi_backlight_vendor; } -static void acpi_video_set_dmi_backlight_type(enum acpi_backlight_type type) +static inline void acpi_video_set_dmi_backlight_type(enum acpi_backlight_type type) { } #endif diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index e6a83d712ef6..55e3abc2d027 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -55,17 +55,43 @@ #endif #ifdef CONFIG_SMP + +#ifndef smp_mb #define smp_mb() mb() +#endif + +#ifndef smp_rmb #define smp_rmb() rmb() +#endif + +#ifndef smp_wmb #define smp_wmb() wmb() +#endif + +#ifndef smp_read_barrier_depends #define smp_read_barrier_depends() read_barrier_depends() -#else +#endif + +#else /* !CONFIG_SMP */ + +#ifndef smp_mb #define smp_mb() barrier() +#endif + +#ifndef smp_rmb #define smp_rmb() barrier() +#endif + +#ifndef smp_wmb #define smp_wmb() barrier() +#endif + +#ifndef smp_read_barrier_depends #define smp_read_barrier_depends() do { } while (0) #endif +#endif /* CONFIG_SMP */ + #ifndef smp_store_mb #define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) #endif diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 1618cdfb38c7..c471dfc93b71 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -53,7 +53,7 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev) return adev ? adev->handle : NULL; } -#define ACPI_COMPANION(dev) acpi_node((dev)->fwnode) +#define ACPI_COMPANION(dev) to_acpi_node((dev)->fwnode) #define ACPI_COMPANION_SET(dev, adev) set_primary_fwnode(dev, (adev) ? \ acpi_fwnode_handle(adev) : NULL) #define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev)) @@ -454,7 +454,7 @@ static inline bool is_acpi_node(struct fwnode_handle *fwnode) return false; } -static inline struct acpi_device *acpi_node(struct fwnode_handle *fwnode) +static inline struct acpi_device *to_acpi_node(struct fwnode_handle *fwnode) { return NULL; } diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 26fc8bc77f85..7f8ad9593da7 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -475,6 +475,21 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s (volatile typeof(x) *)&(x); }) #define ACCESS_ONCE(x) (*__ACCESS_ONCE(x)) +/** + * lockless_dereference() - safely load a pointer for later dereference + * @p: The pointer to load + * + * Similar to rcu_dereference(), but for situations where the pointed-to + * object's lifetime is managed by something other than RCU. That + * "something other" might be reference counting or simple immortality. + */ +#define lockless_dereference(p) \ +({ \ + typeof(p) _________p1 = READ_ONCE(p); \ + smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ + (_________p1); \ +}) + /* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */ #ifdef CONFIG_KPROBES # define __kprobes __attribute__((__section__(".kprobes.text"))) diff --git a/include/linux/fs.h b/include/linux/fs.h index e351da4a934f..3f1a84635da8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -70,6 +70,7 @@ typedef int (get_block_t)(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, ssize_t bytes, void *private); +typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate); #define MAY_EXEC 0x00000001 #define MAY_WRITE 0x00000002 @@ -2655,9 +2656,13 @@ ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t, int dax_clear_blocks(struct inode *, sector_t block, long size); int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); int dax_truncate_page(struct inode *, loff_t from, get_block_t); -int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); +int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, + dax_iodone_t); +int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, + dax_iodone_t); int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *); -#define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb) +#define dax_mkwrite(vma, vmf, gb, iod) dax_fault(vma, vmf, gb, iod) +#define __dax_mkwrite(vma, vmf, gb, iod) __dax_fault(vma, vmf, gb, iod) #ifdef CONFIG_BLOCK typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, diff --git a/include/linux/irq.h b/include/linux/irq.h index 812149160d3b..92188b0225bb 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -407,7 +407,6 @@ enum { IRQCHIP_EOI_THREADED = (1 << 6), }; -/* This include will go away once we isolated irq_desc usage to core code */ #include <linux/irqdesc.h> /* diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index c52d1480f272..624a668e61f1 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -3,9 +3,6 @@ /* * Core internal functions to deal with irq descriptors - * - * This include will move to kernel/irq once we cleaned up the tree. - * For now it's included from <linux/irq.h> */ struct irq_affinity_notify; @@ -103,6 +100,11 @@ static inline struct irq_desc *irq_data_to_desc(struct irq_data *data) #endif } +static inline unsigned int irq_desc_get_irq(struct irq_desc *desc) +{ + return desc->irq_data.irq; +} + static inline struct irq_data *irq_desc_get_irq_data(struct irq_desc *desc) { return &desc->irq_data; @@ -188,6 +190,47 @@ __irq_set_chip_handler_name_locked(unsigned int irq, struct irq_chip *chip, desc->name = name; } +/** + * irq_set_handler_locked - Set irq handler from a locked region + * @data: Pointer to the irq_data structure which identifies the irq + * @handler: Flow control handler function for this interrupt + * + * Sets the handler in the irq descriptor associated to @data. + * + * Must be called with irq_desc locked and valid parameters. Typical + * call site is the irq_set_type() callback. + */ +static inline void irq_set_handler_locked(struct irq_data *data, + irq_flow_handler_t handler) +{ + struct irq_desc *desc = irq_data_to_desc(data); + + desc->handle_irq = handler; +} + +/** + * irq_set_chip_handler_name_locked - Set chip, handler and name from a locked region + * @data: Pointer to the irq_data structure for which the chip is set + * @chip: Pointer to the new irq chip + * @handler: Flow control handler function for this interrupt + * @name: Name of the interrupt + * + * Replace the irq chip at the proper hierarchy level in @data and + * sets the handler and name in the associated irq descriptor. + * + * Must be called with irq_desc locked and valid parameters. + */ +static inline void +irq_set_chip_handler_name_locked(struct irq_data *data, struct irq_chip *chip, + irq_flow_handler_t handler, const char *name) +{ + struct irq_desc *desc = irq_data_to_desc(data); + + desc->handle_irq = handler; + desc->name = name; + data->chip = chip; +} + static inline int irq_balancing_disabled(unsigned int irq) { struct irq_desc *desc; diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index fdd5cc16c9c4..9669bf9d4f48 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -23,12 +23,6 @@ unsigned int irq_get_next_irq(unsigned int offset); ; \ else -#ifdef CONFIG_SMP -#define irq_node(irq) (irq_get_irq_data(irq)->node) -#else -#define irq_node(irq) 0 -#endif - # define for_each_active_irq(irq) \ for (irq = irq_get_next_irq(0); irq < nr_irqs; \ irq = irq_get_next_irq(irq + 1)) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 0dfa4e31563d..5f0be58640ea 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -816,13 +816,15 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } #endif /* Permissions on a sysfs file: you didn't miss the 0 prefix did you? */ -#define VERIFY_OCTAL_PERMISSIONS(perms) \ - (BUILD_BUG_ON_ZERO((perms) < 0) + \ - BUILD_BUG_ON_ZERO((perms) > 0777) + \ - /* User perms >= group perms >= other perms */ \ - BUILD_BUG_ON_ZERO(((perms) >> 6) < (((perms) >> 3) & 7)) + \ - BUILD_BUG_ON_ZERO((((perms) >> 3) & 7) < ((perms) & 7)) + \ - /* Other writable? Generally considered a bad idea. */ \ - BUILD_BUG_ON_ZERO((perms) & 2) + \ +#define VERIFY_OCTAL_PERMISSIONS(perms) \ + (BUILD_BUG_ON_ZERO((perms) < 0) + \ + BUILD_BUG_ON_ZERO((perms) > 0777) + \ + /* USER_READABLE >= GROUP_READABLE >= OTHER_READABLE */ \ + BUILD_BUG_ON_ZERO((((perms) >> 6) & 4) < (((perms) >> 3) & 4)) + \ + BUILD_BUG_ON_ZERO((((perms) >> 3) & 4) < ((perms) & 4)) + \ + /* USER_WRITABLE >= GROUP_WRITABLE */ \ + BUILD_BUG_ON_ZERO((((perms) >> 6) & 2) < (((perms) >> 3) & 2)) + \ + /* OTHER_WRITABLE? Generally considered a bad idea. */ \ + BUILD_BUG_ON_ZERO((perms) & 2) + \ (perms)) #endif diff --git a/include/linux/module.h b/include/linux/module.h index 7ffe0851d244..d67b1932cc59 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -17,6 +17,7 @@ #include <linux/moduleparam.h> #include <linux/jump_label.h> #include <linux/export.h> +#include <linux/rbtree_latch.h> #include <linux/percpu.h> #include <asm/module.h> @@ -210,6 +211,13 @@ enum module_state { MODULE_STATE_UNFORMED, /* Still setting it up. */ }; +struct module; + +struct mod_tree_node { + struct module *mod; + struct latch_tree_node node; +}; + struct module { enum module_state state; @@ -232,6 +240,9 @@ struct module { unsigned int num_syms; /* Kernel parameters. */ +#ifdef CONFIG_SYSFS + struct mutex param_lock; +#endif struct kernel_param *kp; unsigned int num_kp; @@ -271,8 +282,15 @@ struct module { /* Startup function. */ int (*init)(void); - /* If this is non-NULL, vfree after init() returns */ - void *module_init; + /* + * If this is non-NULL, vfree() after init() returns. + * + * Cacheline align here, such that: + * module_init, module_core, init_size, core_size, + * init_text_size, core_text_size and mtn_core::{mod,node[0]} + * are on the same cacheline. + */ + void *module_init ____cacheline_aligned; /* Here is the actual code + data, vfree'd on unload. */ void *module_core; @@ -283,6 +301,16 @@ struct module { /* The size of the executable code in each section. */ unsigned int init_text_size, core_text_size; +#ifdef CONFIG_MODULES_TREE_LOOKUP + /* + * We want mtn_core::{mod,node[0]} to be in the same cacheline as the + * above entries such that a regular lookup will only touch one + * cacheline. + */ + struct mod_tree_node mtn_core; + struct mod_tree_node mtn_init; +#endif + /* Size of RO sections of the module (text+rodata) */ unsigned int init_ro_size, core_ro_size; @@ -369,7 +397,7 @@ struct module { ctor_fn_t *ctors; unsigned int num_ctors; #endif -}; +} ____cacheline_aligned; #ifndef MODULE_ARCH_INIT #define MODULE_ARCH_INIT {} #endif @@ -423,14 +451,22 @@ struct symsearch { bool unused; }; -/* Search for an exported symbol by name. */ +/* + * Search for an exported symbol by name. + * + * Must be called with module_mutex held or preemption disabled. + */ const struct kernel_symbol *find_symbol(const char *name, struct module **owner, const unsigned long **crc, bool gplok, bool warn); -/* Walk the exported symbol table */ +/* + * Walk the exported symbol table + * + * Must be called with module_mutex held or preemption disabled. + */ bool each_symbol_section(bool (*fn)(const struct symsearch *arr, struct module *owner, void *data), void *data); diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 6480dcaca275..c12f2147c350 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -67,8 +67,9 @@ enum { struct kernel_param { const char *name; + struct module *mod; const struct kernel_param_ops *ops; - u16 perm; + const u16 perm; s8 level; u8 flags; union { @@ -108,7 +109,7 @@ struct kparam_array * * @perm is 0 if the the variable is not to appear in sysfs, or 0444 * for world-readable, 0644 for root-writable, etc. Note that if it - * is writable, you may need to use kparam_block_sysfs_write() around + * is writable, you may need to use kernel_param_lock() around * accesses (esp. charp, which can be kfreed when it changes). * * The @type is simply pasted to refer to a param_ops_##type and a @@ -216,16 +217,16 @@ struct kparam_array parameters. */ #define __module_param_call(prefix, name, ops, arg, perm, level, flags) \ /* Default value instead of permissions? */ \ - static const char __param_str_##name[] = prefix #name; \ + static const char __param_str_##name[] = prefix #name; \ static struct kernel_param __moduleparam_const __param_##name \ __used \ __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \ - = { __param_str_##name, ops, VERIFY_OCTAL_PERMISSIONS(perm), \ - level, flags, { arg } } + = { __param_str_##name, THIS_MODULE, ops, \ + VERIFY_OCTAL_PERMISSIONS(perm), level, flags, { arg } } /* Obsolete - use module_param_cb() */ #define module_param_call(name, set, get, arg, perm) \ - static struct kernel_param_ops __param_ops_##name = \ + static const struct kernel_param_ops __param_ops_##name = \ { .flags = 0, (void *)set, (void *)get }; \ __module_param_call(MODULE_PARAM_PREFIX, \ name, &__param_ops_##name, arg, \ @@ -238,58 +239,14 @@ __check_old_set_param(int (*oldset)(const char *, struct kernel_param *)) return 0; } -/** - * kparam_block_sysfs_write - make sure a parameter isn't written via sysfs. - * @name: the name of the parameter - * - * There's no point blocking write on a paramter that isn't writable via sysfs! - */ -#define kparam_block_sysfs_write(name) \ - do { \ - BUG_ON(!(__param_##name.perm & 0222)); \ - __kernel_param_lock(); \ - } while (0) - -/** - * kparam_unblock_sysfs_write - allows sysfs to write to a parameter again. - * @name: the name of the parameter - */ -#define kparam_unblock_sysfs_write(name) \ - do { \ - BUG_ON(!(__param_##name.perm & 0222)); \ - __kernel_param_unlock(); \ - } while (0) - -/** - * kparam_block_sysfs_read - make sure a parameter isn't read via sysfs. - * @name: the name of the parameter - * - * This also blocks sysfs writes. - */ -#define kparam_block_sysfs_read(name) \ - do { \ - BUG_ON(!(__param_##name.perm & 0444)); \ - __kernel_param_lock(); \ - } while (0) - -/** - * kparam_unblock_sysfs_read - allows sysfs to read a parameter again. - * @name: the name of the parameter - */ -#define kparam_unblock_sysfs_read(name) \ - do { \ - BUG_ON(!(__param_##name.perm & 0444)); \ - __kernel_param_unlock(); \ - } while (0) - #ifdef CONFIG_SYSFS -extern void __kernel_param_lock(void); -extern void __kernel_param_unlock(void); +extern void kernel_param_lock(struct module *mod); +extern void kernel_param_unlock(struct module *mod); #else -static inline void __kernel_param_lock(void) +static inline void kernel_param_lock(struct module *mod) { } -static inline void __kernel_param_unlock(void) +static inline void kernel_param_unlock(struct module *mod) { } #endif @@ -386,64 +343,70 @@ static inline void destroy_params(const struct kernel_param *params, #define __param_check(name, p, type) \ static inline type __always_unused *__check_##name(void) { return(p); } -extern struct kernel_param_ops param_ops_byte; +extern const struct kernel_param_ops param_ops_byte; extern int param_set_byte(const char *val, const struct kernel_param *kp); extern int param_get_byte(char *buffer, const struct kernel_param *kp); #define param_check_byte(name, p) __param_check(name, p, unsigned char) -extern struct kernel_param_ops param_ops_short; +extern const struct kernel_param_ops param_ops_short; extern int param_set_short(const char *val, const struct kernel_param *kp); extern int param_get_short(char *buffer, const struct kernel_param *kp); #define param_check_short(name, p) __param_check(name, p, short) -extern struct kernel_param_ops param_ops_ushort; +extern const struct kernel_param_ops param_ops_ushort; extern int param_set_ushort(const char *val, const struct kernel_param *kp); extern int param_get_ushort(char *buffer, const struct kernel_param *kp); #define param_check_ushort(name, p) __param_check(name, p, unsigned short) -extern struct kernel_param_ops param_ops_int; +extern const struct kernel_param_ops param_ops_int; extern int param_set_int(const char *val, const struct kernel_param *kp); extern int param_get_int(char *buffer, const struct kernel_param *kp); #define param_check_int(name, p) __param_check(name, p, int) -extern struct kernel_param_ops param_ops_uint; +extern const struct kernel_param_ops param_ops_uint; extern int param_set_uint(const char *val, const struct kernel_param *kp); extern int param_get_uint(char *buffer, const struct kernel_param *kp); #define param_check_uint(name, p) __param_check(name, p, unsigned int) -extern struct kernel_param_ops param_ops_long; +extern const struct kernel_param_ops param_ops_long; extern int param_set_long(const char *val, const struct kernel_param *kp); extern int param_get_long(char *buffer, const struct kernel_param *kp); #define param_check_long(name, p) __param_check(name, p, long) -extern struct kernel_param_ops param_ops_ulong; +extern const struct kernel_param_ops param_ops_ulong; extern int param_set_ulong(const char *val, const struct kernel_param *kp); extern int param_get_ulong(char *buffer, const struct kernel_param *kp); #define param_check_ulong(name, p) __param_check(name, p, unsigned long) -extern struct kernel_param_ops param_ops_ullong; +extern const struct kernel_param_ops param_ops_ullong; extern int param_set_ullong(const char *val, const struct kernel_param *kp); extern int param_get_ullong(char *buffer, const struct kernel_param *kp); #define param_check_ullong(name, p) __param_check(name, p, unsigned long long) -extern struct kernel_param_ops param_ops_charp; +extern const struct kernel_param_ops param_ops_charp; extern int param_set_charp(const char *val, const struct kernel_param *kp); extern int param_get_charp(char *buffer, const struct kernel_param *kp); #define param_check_charp(name, p) __param_check(name, p, char *) /* We used to allow int as well as bool. We're taking that away! */ -extern struct kernel_param_ops param_ops_bool; +extern const struct kernel_param_ops param_ops_bool; extern int param_set_bool(const char *val, const struct kernel_param *kp); extern int param_get_bool(char *buffer, const struct kernel_param *kp); #define param_check_bool(name, p) __param_check(name, p, bool) -extern struct kernel_param_ops param_ops_invbool; +extern const struct kernel_param_ops param_ops_bool_enable_only; +extern int param_set_bool_enable_only(const char *val, + const struct kernel_param *kp); +/* getter is the same as for the regular bool */ +#define param_check_bool_enable_only param_check_bool + +extern const struct kernel_param_ops param_ops_invbool; extern int param_set_invbool(const char *val, const struct kernel_param *kp); extern int param_get_invbool(char *buffer, const struct kernel_param *kp); #define param_check_invbool(name, p) __param_check(name, p, bool) /* An int, which can only be set like a bool (though it shows as an int). */ -extern struct kernel_param_ops param_ops_bint; +extern const struct kernel_param_ops param_ops_bint; extern int param_set_bint(const char *val, const struct kernel_param *kp); #define param_get_bint param_get_int #define param_check_bint param_check_int @@ -487,9 +450,9 @@ extern int param_set_bint(const char *val, const struct kernel_param *kp); perm, -1, 0); \ __MODULE_PARM_TYPE(name, "array of " #type) -extern struct kernel_param_ops param_array_ops; +extern const struct kernel_param_ops param_array_ops; -extern struct kernel_param_ops param_ops_string; +extern const struct kernel_param_ops param_ops_string; extern int param_set_copystring(const char *val, const struct kernel_param *); extern int param_get_string(char *buffer, const struct kernel_param *kp); diff --git a/include/linux/of.h b/include/linux/of.h index b871ff9d81d7..f05fdcea4e66 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -128,7 +128,7 @@ static inline bool is_of_node(struct fwnode_handle *fwnode) return fwnode && fwnode->type == FWNODE_OF; } -static inline struct device_node *of_node(struct fwnode_handle *fwnode) +static inline struct device_node *to_of_node(struct fwnode_handle *fwnode) { return fwnode ? container_of(fwnode, struct device_node, fwnode) : NULL; } @@ -387,7 +387,7 @@ static inline bool is_of_node(struct fwnode_handle *fwnode) return false; } -static inline struct device_node *of_node(struct fwnode_handle *fwnode) +static inline struct device_node *to_of_node(struct fwnode_handle *fwnode) { return NULL; } diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index fb31765e935a..830c4992088d 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -31,6 +31,7 @@ #include <linux/kernel.h> #include <linux/stddef.h> +#include <linux/rcupdate.h> struct rb_node { unsigned long __rb_parent_color; @@ -73,11 +74,11 @@ extern struct rb_node *rb_first_postorder(const struct rb_root *); extern struct rb_node *rb_next_postorder(const struct rb_node *); /* Fast replacement of a single node without remove/rebalance/add/rebalance */ -extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, +extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root); -static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, - struct rb_node ** rb_link) +static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, + struct rb_node **rb_link) { node->__rb_parent_color = (unsigned long)parent; node->rb_left = node->rb_right = NULL; @@ -85,6 +86,15 @@ static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, *rb_link = node; } +static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent, + struct rb_node **rb_link) +{ + node->__rb_parent_color = (unsigned long)parent; + node->rb_left = node->rb_right = NULL; + + rcu_assign_pointer(*rb_link, node); +} + #define rb_entry_safe(ptr, type, member) \ ({ typeof(ptr) ____ptr = (ptr); \ ____ptr ? rb_entry(____ptr, type, member) : NULL; \ diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index 378c5ee75f78..14d7b831b63a 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -123,11 +123,11 @@ __rb_change_child(struct rb_node *old, struct rb_node *new, { if (parent) { if (parent->rb_left == old) - parent->rb_left = new; + WRITE_ONCE(parent->rb_left, new); else - parent->rb_right = new; + WRITE_ONCE(parent->rb_right, new); } else - root->rb_node = new; + WRITE_ONCE(root->rb_node, new); } extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, @@ -137,7 +137,8 @@ static __always_inline struct rb_node * __rb_erase_augmented(struct rb_node *node, struct rb_root *root, const struct rb_augment_callbacks *augment) { - struct rb_node *child = node->rb_right, *tmp = node->rb_left; + struct rb_node *child = node->rb_right; + struct rb_node *tmp = node->rb_left; struct rb_node *parent, *rebalance; unsigned long pc; @@ -167,6 +168,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root, tmp = parent; } else { struct rb_node *successor = child, *child2; + tmp = child->rb_left; if (!tmp) { /* @@ -180,6 +182,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root, */ parent = successor; child2 = successor->rb_right; + augment->copy(node, successor); } else { /* @@ -201,19 +204,23 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root, successor = tmp; tmp = tmp->rb_left; } while (tmp); - parent->rb_left = child2 = successor->rb_right; - successor->rb_right = child; + child2 = successor->rb_right; + WRITE_ONCE(parent->rb_left, child2); + WRITE_ONCE(successor->rb_right, child); rb_set_parent(child, successor); + augment->copy(node, successor); augment->propagate(parent, successor); } - successor->rb_left = tmp = node->rb_left; + tmp = node->rb_left; + WRITE_ONCE(successor->rb_left, tmp); rb_set_parent(tmp, successor); pc = node->__rb_parent_color; tmp = __rb_parent(pc); __rb_change_child(node, successor, tmp, root); + if (child2) { successor->__rb_parent_color = pc; rb_set_parent_color(child2, parent, RB_BLACK); diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h new file mode 100644 index 000000000000..4f3432c61d12 --- /dev/null +++ b/include/linux/rbtree_latch.h @@ -0,0 +1,212 @@ +/* + * Latched RB-trees + * + * Copyright (C) 2015 Intel Corp., Peter Zijlstra <peterz@infradead.org> + * + * Since RB-trees have non-atomic modifications they're not immediately suited + * for RCU/lockless queries. Even though we made RB-tree lookups non-fatal for + * lockless lookups; we cannot guarantee they return a correct result. + * + * The simplest solution is a seqlock + RB-tree, this will allow lockless + * lookups; but has the constraint (inherent to the seqlock) that read sides + * cannot nest in write sides. + * + * If we need to allow unconditional lookups (say as required for NMI context + * usage) we need a more complex setup; this data structure provides this by + * employing the latch technique -- see @raw_write_seqcount_latch -- to + * implement a latched RB-tree which does allow for unconditional lookups by + * virtue of always having (at least) one stable copy of the tree. + * + * However, while we have the guarantee that there is at all times one stable + * copy, this does not guarantee an iteration will not observe modifications. + * What might have been a stable copy at the start of the iteration, need not + * remain so for the duration of the iteration. + * + * Therefore, this does require a lockless RB-tree iteration to be non-fatal; + * see the comment in lib/rbtree.c. Note however that we only require the first + * condition -- not seeing partial stores -- because the latch thing isolates + * us from loops. If we were to interrupt a modification the lookup would be + * pointed at the stable tree and complete while the modification was halted. + */ + +#ifndef RB_TREE_LATCH_H +#define RB_TREE_LATCH_H + +#include <linux/rbtree.h> +#include <linux/seqlock.h> + +struct latch_tree_node { + struct rb_node node[2]; +}; + +struct latch_tree_root { + seqcount_t seq; + struct rb_root tree[2]; +}; + +/** + * latch_tree_ops - operators to define the tree order + * @less: used for insertion; provides the (partial) order between two elements. + * @comp: used for lookups; provides the order between the search key and an element. + * + * The operators are related like: + * + * comp(a->key,b) < 0 := less(a,b) + * comp(a->key,b) > 0 := less(b,a) + * comp(a->key,b) == 0 := !less(a,b) && !less(b,a) + * + * If these operators define a partial order on the elements we make no + * guarantee on which of the elements matching the key is found. See + * latch_tree_find(). + */ +struct latch_tree_ops { + bool (*less)(struct latch_tree_node *a, struct latch_tree_node *b); + int (*comp)(void *key, struct latch_tree_node *b); +}; + +static __always_inline struct latch_tree_node * +__lt_from_rb(struct rb_node *node, int idx) +{ + return container_of(node, struct latch_tree_node, node[idx]); +} + +static __always_inline void +__lt_insert(struct latch_tree_node *ltn, struct latch_tree_root *ltr, int idx, + bool (*less)(struct latch_tree_node *a, struct latch_tree_node *b)) +{ + struct rb_root *root = <r->tree[idx]; + struct rb_node **link = &root->rb_node; + struct rb_node *node = <n->node[idx]; + struct rb_node *parent = NULL; + struct latch_tree_node *ltp; + + while (*link) { + parent = *link; + ltp = __lt_from_rb(parent, idx); + + if (less(ltn, ltp)) + link = &parent->rb_left; + else + link = &parent->rb_right; + } + + rb_link_node_rcu(node, parent, link); + rb_insert_color(node, root); +} + +static __always_inline void +__lt_erase(struct latch_tree_node *ltn, struct latch_tree_root *ltr, int idx) +{ + rb_erase(<n->node[idx], <r->tree[idx]); +} + +static __always_inline struct latch_tree_node * +__lt_find(void *key, struct latch_tree_root *ltr, int idx, + int (*comp)(void *key, struct latch_tree_node *node)) +{ + struct rb_node *node = rcu_dereference_raw(ltr->tree[idx].rb_node); + struct latch_tree_node *ltn; + int c; + + while (node) { + ltn = __lt_from_rb(node, idx); + c = comp(key, ltn); + + if (c < 0) + node = rcu_dereference_raw(node->rb_left); + else if (c > 0) + node = rcu_dereference_raw(node->rb_right); + else + return ltn; + } + + return NULL; +} + +/** + * latch_tree_insert() - insert @node into the trees @root + * @node: nodes to insert + * @root: trees to insert @node into + * @ops: operators defining the node order + * + * It inserts @node into @root in an ordered fashion such that we can always + * observe one complete tree. See the comment for raw_write_seqcount_latch(). + * + * The inserts use rcu_assign_pointer() to publish the element such that the + * tree structure is stored before we can observe the new @node. + * + * All modifications (latch_tree_insert, latch_tree_remove) are assumed to be + * serialized. + */ +static __always_inline void +latch_tree_insert(struct latch_tree_node *node, + struct latch_tree_root *root, + const struct latch_tree_ops *ops) +{ + raw_write_seqcount_latch(&root->seq); + __lt_insert(node, root, 0, ops->less); + raw_write_seqcount_latch(&root->seq); + __lt_insert(node, root, 1, ops->less); +} + +/** + * latch_tree_erase() - removes @node from the trees @root + * @node: nodes to remote + * @root: trees to remove @node from + * @ops: operators defining the node order + * + * Removes @node from the trees @root in an ordered fashion such that we can + * always observe one complete tree. See the comment for + * raw_write_seqcount_latch(). + * + * It is assumed that @node will observe one RCU quiescent state before being + * reused of freed. + * + * All modifications (latch_tree_insert, latch_tree_remove) are assumed to be + * serialized. + */ +static __always_inline void +latch_tree_erase(struct latch_tree_node *node, + struct latch_tree_root *root, + const struct latch_tree_ops *ops) +{ + raw_write_seqcount_latch(&root->seq); + __lt_erase(node, root, 0); + raw_write_seqcount_latch(&root->seq); + __lt_erase(node, root, 1); +} + +/** + * latch_tree_find() - find the node matching @key in the trees @root + * @key: search key + * @root: trees to search for @key + * @ops: operators defining the node order + * + * Does a lockless lookup in the trees @root for the node matching @key. + * + * It is assumed that this is called while holding the appropriate RCU read + * side lock. + * + * If the operators define a partial order on the elements (there are multiple + * elements which have the same key value) it is undefined which of these + * elements will be found. Nor is it possible to iterate the tree to find + * further elements with the same key value. + * + * Returns: a pointer to the node matching @key or NULL. + */ +static __always_inline struct latch_tree_node * +latch_tree_find(void *key, struct latch_tree_root *root, + const struct latch_tree_ops *ops) +{ + struct latch_tree_node *node; + unsigned int seq; + + do { + seq = raw_read_seqcount_latch(&root->seq); + node = __lt_find(key, root, seq & 1, ops->comp); + } while (read_seqcount_retry(&root->seq, seq)); + + return node; +} + +#endif /* RB_TREE_LATCH_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 33a056bb886f..4cf5f51b4c9c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -633,21 +633,6 @@ static inline void rcu_preempt_sleep_check(void) #define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) /** - * lockless_dereference() - safely load a pointer for later dereference - * @p: The pointer to load - * - * Similar to rcu_dereference(), but for situations where the pointed-to - * object's lifetime is managed by something other than RCU. That - * "something other" might be reference counting or simple immortality. - */ -#define lockless_dereference(p) \ -({ \ - typeof(p) _________p1 = READ_ONCE(p); \ - smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ - (_________p1); \ -}) - -/** * rcu_assign_pointer() - assign to RCU-protected pointer * @p: pointer to assign to * @v: value to assign (publish) diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 486e685a226a..e0582106ef4f 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -35,6 +35,7 @@ #include <linux/spinlock.h> #include <linux/preempt.h> #include <linux/lockdep.h> +#include <linux/compiler.h> #include <asm/processor.h> /* @@ -274,9 +275,87 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s) s->sequence++; } -/* +static inline int raw_read_seqcount_latch(seqcount_t *s) +{ + return lockless_dereference(s->sequence); +} + +/** * raw_write_seqcount_latch - redirect readers to even/odd copy * @s: pointer to seqcount_t + * + * The latch technique is a multiversion concurrency control method that allows + * queries during non-atomic modifications. If you can guarantee queries never + * interrupt the modification -- e.g. the concurrency is strictly between CPUs + * -- you most likely do not need this. + * + * Where the traditional RCU/lockless data structures rely on atomic + * modifications to ensure queries observe either the old or the new state the + * latch allows the same for non-atomic updates. The trade-off is doubling the + * cost of storage; we have to maintain two copies of the entire data + * structure. + * + * Very simply put: we first modify one copy and then the other. This ensures + * there is always one copy in a stable state, ready to give us an answer. + * + * The basic form is a data structure like: + * + * struct latch_struct { + * seqcount_t seq; + * struct data_struct data[2]; + * }; + * + * Where a modification, which is assumed to be externally serialized, does the + * following: + * + * void latch_modify(struct latch_struct *latch, ...) + * { + * smp_wmb(); <- Ensure that the last data[1] update is visible + * latch->seq++; + * smp_wmb(); <- Ensure that the seqcount update is visible + * + * modify(latch->data[0], ...); + * + * smp_wmb(); <- Ensure that the data[0] update is visible + * latch->seq++; + * smp_wmb(); <- Ensure that the seqcount update is visible + * + * modify(latch->data[1], ...); + * } + * + * The query will have a form like: + * + * struct entry *latch_query(struct latch_struct *latch, ...) + * { + * struct entry *entry; + * unsigned seq, idx; + * + * do { + * seq = lockless_dereference(latch->seq); + * + * idx = seq & 0x01; + * entry = data_query(latch->data[idx], ...); + * + * smp_rmb(); + * } while (seq != latch->seq); + * + * return entry; + * } + * + * So during the modification, queries are first redirected to data[1]. Then we + * modify data[0]. When that is complete, we redirect queries back to data[0] + * and we can modify data[1]. + * + * NOTE: The non-requirement for atomic modifications does _NOT_ include + * the publishing of new entries in the case where data is a dynamic + * data structure. + * + * An iteration might start in data[0] and get suspended long enough + * to miss an entire modification sequence, once it resumes it might + * observe the new entry. + * + * NOTE: When data is a dynamic data structure; one should use regular RCU + * patterns to manage the lifetimes of the objects within. */ static inline void raw_write_seqcount_latch(seqcount_t *s) { diff --git a/include/net/ax25.h b/include/net/ax25.h index 16a923a3a43a..e602f8177ebf 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <linux/atomic.h> #include <net/neighbour.h> +#include <net/sock.h> #define AX25_T1CLAMPLO 1 #define AX25_T1CLAMPHI (30 * HZ) @@ -246,7 +247,20 @@ typedef struct ax25_cb { atomic_t refcount; } ax25_cb; -#define ax25_sk(__sk) ((ax25_cb *)(__sk)->sk_protinfo) +struct ax25_sock { + struct sock sk; + struct ax25_cb *cb; +}; + +static inline struct ax25_sock *ax25_sk(const struct sock *sk) +{ + return (struct ax25_sock *) sk; +} + +static inline struct ax25_cb *sk_to_ax25(const struct sock *sk) +{ + return ax25_sk(sk)->cb; +} #define ax25_for_each(__ax25, list) \ hlist_for_each_entry(__ax25, list, ax25_node) diff --git a/include/net/sock.h b/include/net/sock.h index 14d539c040d7..05a8c1aea251 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -277,7 +277,6 @@ struct cg_proto; * @sk_incoming_cpu: record cpu processing incoming packets * @sk_txhash: computed flow hash for use on transmit * @sk_filter: socket filtering instructions - * @sk_protinfo: private area, net family specific, when not using slab * @sk_timer: sock cleanup timer * @sk_stamp: time stamp of last packet received * @sk_tsflags: SO_TIMESTAMPING socket options @@ -416,7 +415,6 @@ struct sock { const struct cred *sk_peer_cred; long sk_rcvtimeo; long sk_sndtimeo; - void *sk_protinfo; struct timer_list sk_timer; ktime_t sk_stamp; u16 sk_tsflags; diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 7f79cf459591..0b73af9be12f 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1117,61 +1117,6 @@ DEFINE_EVENT(btrfs__workqueue_done, btrfs_workqueue_destroy, TP_ARGS(wq) ); -#define show_oper_type(type) \ - __print_symbolic(type, \ - { BTRFS_QGROUP_OPER_ADD_EXCL, "OPER_ADD_EXCL" }, \ - { BTRFS_QGROUP_OPER_ADD_SHARED, "OPER_ADD_SHARED" }, \ - { BTRFS_QGROUP_OPER_SUB_EXCL, "OPER_SUB_EXCL" }, \ - { BTRFS_QGROUP_OPER_SUB_SHARED, "OPER_SUB_SHARED" }) - -DECLARE_EVENT_CLASS(btrfs_qgroup_oper, - - TP_PROTO(struct btrfs_qgroup_operation *oper), - - TP_ARGS(oper), - - TP_STRUCT__entry( - __field( u64, ref_root ) - __field( u64, bytenr ) - __field( u64, num_bytes ) - __field( u64, seq ) - __field( int, type ) - __field( u64, elem_seq ) - ), - - TP_fast_assign( - __entry->ref_root = oper->ref_root; - __entry->bytenr = oper->bytenr, - __entry->num_bytes = oper->num_bytes; - __entry->seq = oper->seq; - __entry->type = oper->type; - __entry->elem_seq = oper->elem.seq; - ), - - TP_printk("ref_root = %llu, bytenr = %llu, num_bytes = %llu, " - "seq = %llu, elem.seq = %llu, type = %s", - (unsigned long long)__entry->ref_root, - (unsigned long long)__entry->bytenr, - (unsigned long long)__entry->num_bytes, - (unsigned long long)__entry->seq, - (unsigned long long)__entry->elem_seq, - show_oper_type(__entry->type)) -); - -DEFINE_EVENT(btrfs_qgroup_oper, btrfs_qgroup_account, - - TP_PROTO(struct btrfs_qgroup_operation *oper), - - TP_ARGS(oper) -); - -DEFINE_EVENT(btrfs_qgroup_oper, btrfs_qgroup_record_ref, - - TP_PROTO(struct btrfs_qgroup_operation *oper), - - TP_ARGS(oper) -); - #endif /* _TRACE_BTRFS_H */ /* This part must be outside protection */ diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 83d6236a2f08..eaf94919291a 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -19,8 +19,10 @@ #define _UAPI_LINUX_IN_H #include <linux/types.h> +#include <linux/libc-compat.h> #include <linux/socket.h> +#if __UAPI_DEF_IN_IPPROTO /* Standard well-defined IP protocols. */ enum { IPPROTO_IP = 0, /* Dummy protocol for TCP */ @@ -75,12 +77,14 @@ enum { #define IPPROTO_RAW IPPROTO_RAW IPPROTO_MAX }; +#endif - +#if __UAPI_DEF_IN_ADDR /* Internet address. */ struct in_addr { __be32 s_addr; }; +#endif #define IP_TOS 1 #define IP_TTL 2 @@ -158,6 +162,7 @@ struct in_addr { /* Request struct for multicast socket ops */ +#if __UAPI_DEF_IP_MREQ struct ip_mreq { struct in_addr imr_multiaddr; /* IP multicast address of group */ struct in_addr imr_interface; /* local IP address of interface */ @@ -209,14 +214,18 @@ struct group_filter { #define GROUP_FILTER_SIZE(numsrc) \ (sizeof(struct group_filter) - sizeof(struct __kernel_sockaddr_storage) \ + (numsrc) * sizeof(struct __kernel_sockaddr_storage)) +#endif +#if __UAPI_DEF_IN_PKTINFO struct in_pktinfo { int ipi_ifindex; struct in_addr ipi_spec_dst; struct in_addr ipi_addr; }; +#endif /* Structure describing an Internet (IP) socket address. */ +#if __UAPI_DEF_SOCKADDR_IN #define __SOCK_SIZE__ 16 /* sizeof(struct sockaddr) */ struct sockaddr_in { __kernel_sa_family_t sin_family; /* Address family */ @@ -228,8 +237,9 @@ struct sockaddr_in { sizeof(unsigned short int) - sizeof(struct in_addr)]; }; #define sin_zero __pad /* for BSD UNIX comp. -FvK */ +#endif - +#if __UAPI_DEF_IN_CLASS /* * Definitions of the bits in an Internet address integer. * On subnets, host and network parts are found according @@ -280,7 +290,7 @@ struct sockaddr_in { #define INADDR_ALLHOSTS_GROUP 0xe0000001U /* 224.0.0.1 */ #define INADDR_ALLRTRS_GROUP 0xe0000002U /* 224.0.0.2 */ #define INADDR_MAX_LOCAL_GROUP 0xe00000ffU /* 224.0.0.255 */ - +#endif /* <asm/byteorder.h> contains the htonl type stuff.. */ #include <asm/byteorder.h> diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h index fa673e9cc040..7d024ceb075d 100644 --- a/include/uapi/linux/libc-compat.h +++ b/include/uapi/linux/libc-compat.h @@ -56,6 +56,13 @@ /* GLIBC headers included first so don't define anything * that would already be defined. */ +#define __UAPI_DEF_IN_ADDR 0 +#define __UAPI_DEF_IN_IPPROTO 0 +#define __UAPI_DEF_IN_PKTINFO 0 +#define __UAPI_DEF_IP_MREQ 0 +#define __UAPI_DEF_SOCKADDR_IN 0 +#define __UAPI_DEF_IN_CLASS 0 + #define __UAPI_DEF_IN6_ADDR 0 /* The exception is the in6_addr macros which must be defined * if the glibc code didn't define them. This guard matches @@ -78,6 +85,13 @@ /* Linux headers included first, and we must define everything * we need. The expectation is that glibc will check the * __UAPI_DEF_* defines and adjust appropriately. */ +#define __UAPI_DEF_IN_ADDR 1 +#define __UAPI_DEF_IN_IPPROTO 1 +#define __UAPI_DEF_IN_PKTINFO 1 +#define __UAPI_DEF_IP_MREQ 1 +#define __UAPI_DEF_SOCKADDR_IN 1 +#define __UAPI_DEF_IN_CLASS 1 + #define __UAPI_DEF_IN6_ADDR 1 /* We unconditionally define the in6_addr macros and glibc must * coordinate. */ @@ -103,6 +117,14 @@ * that we need. */ #else /* !defined(__GLIBC__) */ +/* Definitions for in.h */ +#define __UAPI_DEF_IN_ADDR 1 +#define __UAPI_DEF_IN_IPPROTO 1 +#define __UAPI_DEF_IN_PKTINFO 1 +#define __UAPI_DEF_IP_MREQ 1 +#define __UAPI_DEF_SOCKADDR_IN 1 +#define __UAPI_DEF_IN_CLASS 1 + /* Definitions for in6.h */ #define __UAPI_DEF_IN6_ADDR 1 #define __UAPI_DEF_IN6_ADDR_ALT 1 diff --git a/init/Kconfig b/init/Kconfig index 7d1ffd2ae536..bcc41bd19999 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1941,26 +1941,21 @@ config MODULE_COMPRESS bool "Compress modules on installation" depends on MODULES help - This option compresses the kernel modules when 'make - modules_install' is run. - The modules will be compressed either using gzip or xz depend on the - choice made in "Compression algorithm". + Compresses kernel modules when 'make modules_install' is run; gzip or + xz depending on "Compression algorithm" below. - module-init-tools has support for gzip format while kmod handle gzip - and xz compressed modules. + module-init-tools MAY support gzip, and kmod MAY support gzip and xz. - When a kernel module is installed from outside of the main kernel - source and uses the Kbuild system for installing modules then that - kernel module will also be compressed when it is installed. + Out-of-tree kernel modules installed using Kbuild will also be + compressed upon installation. - This option provides little benefit when the modules are to be used inside - an initrd or initramfs, it generally is more efficient to compress the whole - initrd or initramfs instead. + Note: for modules inside an initrd or initramfs, it's more efficient + to compress the whole initrd or initramfs instead. - This is fully compatible with signed modules while the signed module is - compressed. module-init-tools or kmod handles decompression and provide to - other layer the uncompressed but signed payload. + Note: This is fully compatible with signed modules. + + If in doubt, say N. choice prompt "Compression algorithm" @@ -1982,6 +1977,10 @@ endchoice endif # MODULES +config MODULES_TREE_LOOKUP + def_bool y + depends on PERF_EVENTS || TRACING + config INIT_ALL_POSSIBLE bool help diff --git a/kernel/configs/xen.config b/kernel/configs/xen.config new file mode 100644 index 000000000000..ff756221f112 --- /dev/null +++ b/kernel/configs/xen.config @@ -0,0 +1,48 @@ +# global stuff - these enable us to allow some +# of the not so generic stuff below for xen +CONFIG_PARAVIRT=y +CONFIG_NET=y +CONFIG_NET_CORE=y +CONFIG_NETDEVICES=y +CONFIG_BLOCK=y +CONFIG_WATCHDOG=y +CONFIG_TARGET_CORE=y +CONFIG_SCSI=y +CONFIG_FB=y +CONFIG_INPUT_MISC=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_TTY=y +# Technically not required but otherwise produces +# pretty useless systems starting from allnoconfig +# You want TCP/IP and ELF binaries right? +CONFIG_INET=y +CONFIG_BINFMT_ELF=y +# generic config +CONFIG_XEN=y +CONFIG_XEN_DOM0=y +# backend drivers +CONFIG_XEN_BACKEND=y +CONFIG_XEN_BLKDEV_BACKEND=m +CONFIG_XEN_NETDEV_BACKEND=m +CONFIG_HVC_XEN=y +CONFIG_XEN_WDT=m +CONFIG_XEN_SCSI_BACKEND=m +# frontend drivers +CONFIG_XEN_FBDEV_FRONTEND=m +CONFIG_HVC_XEN_FRONTEND=y +CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m +CONFIG_XEN_SCSI_FRONTEND=m +# others +CONFIG_XEN_BALLOON=y +CONFIG_XEN_SCRUB_PAGES=y +CONFIG_XEN_DEV_EVTCHN=m +CONFIG_XEN_BLKDEV_FRONTEND=m +CONFIG_XEN_NETDEV_FRONTEND=m +CONFIG_XENFS=m +CONFIG_XEN_COMPAT_XENFS=y +CONFIG_XEN_SYS_HYPERVISOR=y +CONFIG_XEN_XENBUS_FRONTEND=y +CONFIG_XEN_GNTDEV=m +CONFIG_XEN_GRANT_DEV_ALLOC=m +CONFIG_SWIOTLB_XEN=y +CONFIG_XEN_PRIVCMD=m diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 9019f15deab2..52ebaca1b9fc 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -302,7 +302,7 @@ static int jump_label_add_module(struct module *mod) continue; key = iterk; - if (__module_address(iter->key) == mod) { + if (within_module(iter->key, mod)) { /* * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH. */ @@ -339,7 +339,7 @@ static void jump_label_del_module(struct module *mod) key = (struct static_key *)(unsigned long)iter->key; - if (__module_address(iter->key) == mod) + if (within_module(iter->key, mod)) continue; prev = &key->next; @@ -443,14 +443,16 @@ static void jump_label_update(struct static_key *key, int enable) { struct jump_entry *stop = __stop___jump_table; struct jump_entry *entry = jump_label_get_entries(key); - #ifdef CONFIG_MODULES - struct module *mod = __module_address((unsigned long)key); + struct module *mod; __jump_label_mod_update(key, enable); + preempt_disable(); + mod = __module_address((unsigned long)key); if (mod) stop = mod->jump_entries + mod->num_jump_entries; + preempt_enable(); #endif /* if there are no users, entry can be NULL */ if (entry) diff --git a/kernel/module.c b/kernel/module.c index f80a97f7da1f..3e0e19763d24 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -101,48 +101,201 @@ DEFINE_MUTEX(module_mutex); EXPORT_SYMBOL_GPL(module_mutex); static LIST_HEAD(modules); -#ifdef CONFIG_KGDB_KDB -struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ -#endif /* CONFIG_KGDB_KDB */ -#ifdef CONFIG_MODULE_SIG -#ifdef CONFIG_MODULE_SIG_FORCE -static bool sig_enforce = true; -#else -static bool sig_enforce = false; +#ifdef CONFIG_MODULES_TREE_LOOKUP + +/* + * Use a latched RB-tree for __module_address(); this allows us to use + * RCU-sched lookups of the address from any context. + * + * Because modules have two address ranges: init and core, we need two + * latch_tree_nodes entries. Therefore we need the back-pointer from + * mod_tree_node. + * + * Because init ranges are short lived we mark them unlikely and have placed + * them outside the critical cacheline in struct module. + * + * This is conditional on PERF_EVENTS || TRACING because those can really hit + * __module_address() hard by doing a lot of stack unwinding; potentially from + * NMI context. + */ -static int param_set_bool_enable_only(const char *val, - const struct kernel_param *kp) +static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n) { - int err; - bool test; - struct kernel_param dummy_kp = *kp; + struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node); + struct module *mod = mtn->mod; - dummy_kp.arg = &test; + if (unlikely(mtn == &mod->mtn_init)) + return (unsigned long)mod->module_init; - err = param_set_bool(val, &dummy_kp); - if (err) - return err; + return (unsigned long)mod->module_core; +} + +static __always_inline unsigned long __mod_tree_size(struct latch_tree_node *n) +{ + struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node); + struct module *mod = mtn->mod; - /* Don't let them unset it once it's set! */ - if (!test && sig_enforce) - return -EROFS; + if (unlikely(mtn == &mod->mtn_init)) + return (unsigned long)mod->init_size; + + return (unsigned long)mod->core_size; +} + +static __always_inline bool +mod_tree_less(struct latch_tree_node *a, struct latch_tree_node *b) +{ + return __mod_tree_val(a) < __mod_tree_val(b); +} + +static __always_inline int +mod_tree_comp(void *key, struct latch_tree_node *n) +{ + unsigned long val = (unsigned long)key; + unsigned long start, end; + + start = __mod_tree_val(n); + if (val < start) + return -1; + + end = start + __mod_tree_size(n); + if (val >= end) + return 1; - if (test) - sig_enforce = true; return 0; } -static const struct kernel_param_ops param_ops_bool_enable_only = { - .flags = KERNEL_PARAM_OPS_FL_NOARG, - .set = param_set_bool_enable_only, - .get = param_get_bool, +static const struct latch_tree_ops mod_tree_ops = { + .less = mod_tree_less, + .comp = mod_tree_comp, }; -#define param_check_bool_enable_only param_check_bool +static struct mod_tree_root { + struct latch_tree_root root; + unsigned long addr_min; + unsigned long addr_max; +} mod_tree __cacheline_aligned = { + .addr_min = -1UL, +}; + +#define module_addr_min mod_tree.addr_min +#define module_addr_max mod_tree.addr_max + +static noinline void __mod_tree_insert(struct mod_tree_node *node) +{ + latch_tree_insert(&node->node, &mod_tree.root, &mod_tree_ops); +} + +static void __mod_tree_remove(struct mod_tree_node *node) +{ + latch_tree_erase(&node->node, &mod_tree.root, &mod_tree_ops); +} + +/* + * These modifications: insert, remove_init and remove; are serialized by the + * module_mutex. + */ +static void mod_tree_insert(struct module *mod) +{ + mod->mtn_core.mod = mod; + mod->mtn_init.mod = mod; + + __mod_tree_insert(&mod->mtn_core); + if (mod->init_size) + __mod_tree_insert(&mod->mtn_init); +} + +static void mod_tree_remove_init(struct module *mod) +{ + if (mod->init_size) + __mod_tree_remove(&mod->mtn_init); +} + +static void mod_tree_remove(struct module *mod) +{ + __mod_tree_remove(&mod->mtn_core); + mod_tree_remove_init(mod); +} + +static struct module *mod_find(unsigned long addr) +{ + struct latch_tree_node *ltn; + + ltn = latch_tree_find((void *)addr, &mod_tree.root, &mod_tree_ops); + if (!ltn) + return NULL; + + return container_of(ltn, struct mod_tree_node, node)->mod; +} + +#else /* MODULES_TREE_LOOKUP */ + +static unsigned long module_addr_min = -1UL, module_addr_max = 0; + +static void mod_tree_insert(struct module *mod) { } +static void mod_tree_remove_init(struct module *mod) { } +static void mod_tree_remove(struct module *mod) { } + +static struct module *mod_find(unsigned long addr) +{ + struct module *mod; + + list_for_each_entry_rcu(mod, &modules, list) { + if (within_module(addr, mod)) + return mod; + } + + return NULL; +} + +#endif /* MODULES_TREE_LOOKUP */ + +/* + * Bounds of module text, for speeding up __module_address. + * Protected by module_mutex. + */ +static void __mod_update_bounds(void *base, unsigned int size) +{ + unsigned long min = (unsigned long)base; + unsigned long max = min + size; + + if (min < module_addr_min) + module_addr_min = min; + if (max > module_addr_max) + module_addr_max = max; +} + +static void mod_update_bounds(struct module *mod) +{ + __mod_update_bounds(mod->module_core, mod->core_size); + if (mod->init_size) + __mod_update_bounds(mod->module_init, mod->init_size); +} + +#ifdef CONFIG_KGDB_KDB +struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ +#endif /* CONFIG_KGDB_KDB */ + +static void module_assert_mutex(void) +{ + lockdep_assert_held(&module_mutex); +} + +static void module_assert_mutex_or_preempt(void) +{ +#ifdef CONFIG_LOCKDEP + if (unlikely(!debug_locks)) + return; + + WARN_ON(!rcu_read_lock_sched_held() && + !lockdep_is_held(&module_mutex)); +#endif +} + +static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE); +#ifndef CONFIG_MODULE_SIG_FORCE module_param(sig_enforce, bool_enable_only, 0644); #endif /* !CONFIG_MODULE_SIG_FORCE */ -#endif /* CONFIG_MODULE_SIG */ /* Block module loading/unloading? */ int modules_disabled = 0; @@ -153,10 +306,6 @@ static DECLARE_WAIT_QUEUE_HEAD(module_wq); static BLOCKING_NOTIFIER_HEAD(module_notify_list); -/* Bounds of module allocation, for speeding __module_address. - * Protected by module_mutex. */ -static unsigned long module_addr_min = -1UL, module_addr_max = 0; - int register_module_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&module_notify_list, nb); @@ -318,6 +467,8 @@ bool each_symbol_section(bool (*fn)(const struct symsearch *arr, #endif }; + module_assert_mutex_or_preempt(); + if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data)) return true; @@ -457,6 +608,8 @@ static struct module *find_module_all(const char *name, size_t len, { struct module *mod; + module_assert_mutex(); + list_for_each_entry(mod, &modules, list) { if (!even_unformed && mod->state == MODULE_STATE_UNFORMED) continue; @@ -1169,11 +1322,17 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs, { const unsigned long *crc; - /* Since this should be found in kernel (which can't be removed), - * no locking is necessary. */ + /* + * Since this should be found in kernel (which can't be removed), no + * locking is necessary -- use preempt_disable() to placate lockdep. + */ + preempt_disable(); if (!find_symbol(VMLINUX_SYMBOL_STR(module_layout), NULL, - &crc, true, false)) + &crc, true, false)) { + preempt_enable(); BUG(); + } + preempt_enable(); return check_version(sechdrs, versindex, VMLINUX_SYMBOL_STR(module_layout), mod, crc, NULL); @@ -1661,6 +1820,10 @@ static void mod_sysfs_fini(struct module *mod) mod_kobject_put(mod); } +static void init_param_lock(struct module *mod) +{ + mutex_init(&mod->param_lock); +} #else /* !CONFIG_SYSFS */ static int mod_sysfs_setup(struct module *mod, @@ -1683,6 +1846,9 @@ static void del_usage_links(struct module *mod) { } +static void init_param_lock(struct module *mod) +{ +} #endif /* CONFIG_SYSFS */ static void mod_sysfs_teardown(struct module *mod) @@ -1852,10 +2018,11 @@ static void free_module(struct module *mod) mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + mod_tree_remove(mod); /* Remove this module from bug list, this uses list_del_rcu */ module_bug_cleanup(mod); - /* Wait for RCU synchronizing before releasing mod->list and buglist. */ - synchronize_rcu(); + /* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */ + synchronize_sched(); mutex_unlock(&module_mutex); /* This may be NULL, but that's OK */ @@ -2384,22 +2551,6 @@ void * __weak module_alloc(unsigned long size) return vmalloc_exec(size); } -static void *module_alloc_update_bounds(unsigned long size) -{ - void *ret = module_alloc(size); - - if (ret) { - mutex_lock(&module_mutex); - /* Update module bounds. */ - if ((unsigned long)ret < module_addr_min) - module_addr_min = (unsigned long)ret; - if ((unsigned long)ret + size > module_addr_max) - module_addr_max = (unsigned long)ret + size; - mutex_unlock(&module_mutex); - } - return ret; -} - #ifdef CONFIG_DEBUG_KMEMLEAK static void kmemleak_load_module(const struct module *mod, const struct load_info *info) @@ -2805,7 +2956,7 @@ static int move_module(struct module *mod, struct load_info *info) void *ptr; /* Do the allocs. */ - ptr = module_alloc_update_bounds(mod->core_size); + ptr = module_alloc(mod->core_size); /* * The pointer to this block is stored in the module structure * which is inside the block. Just mark it as not being a @@ -2819,7 +2970,7 @@ static int move_module(struct module *mod, struct load_info *info) mod->module_core = ptr; if (mod->init_size) { - ptr = module_alloc_update_bounds(mod->init_size); + ptr = module_alloc(mod->init_size); /* * The pointer to this block is stored in the module structure * which is inside the block. This block doesn't need to be @@ -3119,6 +3270,7 @@ static noinline int do_init_module(struct module *mod) mod->symtab = mod->core_symtab; mod->strtab = mod->core_strtab; #endif + mod_tree_remove_init(mod); unset_module_init_ro_nx(mod); module_arch_freeing_init(mod); mod->module_init = NULL; @@ -3127,11 +3279,11 @@ static noinline int do_init_module(struct module *mod) mod->init_text_size = 0; /* * We want to free module_init, but be aware that kallsyms may be - * walking this with preempt disabled. In all the failure paths, - * we call synchronize_rcu/synchronize_sched, but we don't want - * to slow down the success path, so use actual RCU here. + * walking this with preempt disabled. In all the failure paths, we + * call synchronize_sched(), but we don't want to slow down the success + * path, so use actual RCU here. */ - call_rcu(&freeinit->rcu, do_free_init); + call_rcu_sched(&freeinit->rcu, do_free_init); mutex_unlock(&module_mutex); wake_up_all(&module_wq); @@ -3188,7 +3340,9 @@ again: err = -EEXIST; goto out; } + mod_update_bounds(mod); list_add_rcu(&mod->list, &modules); + mod_tree_insert(mod); err = 0; out: @@ -3304,6 +3458,8 @@ static int load_module(struct load_info *info, const char __user *uargs, if (err) goto unlink_mod; + init_param_lock(mod); + /* Now we've got everything in the final locations, we can * find optional sections. */ err = find_module_sections(mod, info); @@ -3402,8 +3558,8 @@ static int load_module(struct load_info *info, const char __user *uargs, /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); wake_up_all(&module_wq); - /* Wait for RCU synchronizing before releasing mod->list. */ - synchronize_rcu(); + /* Wait for RCU-sched synchronizing before releasing mod->list. */ + synchronize_sched(); mutex_unlock(&module_mutex); free_module: /* Free lock-classes; relies on the preceding sync_rcu() */ @@ -3527,19 +3683,15 @@ const char *module_address_lookup(unsigned long addr, char **modname, char *namebuf) { - struct module *mod; const char *ret = NULL; + struct module *mod; preempt_disable(); - list_for_each_entry_rcu(mod, &modules, list) { - if (mod->state == MODULE_STATE_UNFORMED) - continue; - if (within_module(addr, mod)) { - if (modname) - *modname = mod->name; - ret = get_ksymbol(mod, addr, size, offset); - break; - } + mod = __module_address(addr); + if (mod) { + if (modname) + *modname = mod->name; + ret = get_ksymbol(mod, addr, size, offset); } /* Make a copy in here where it's safe */ if (ret) { @@ -3547,6 +3699,7 @@ const char *module_address_lookup(unsigned long addr, ret = namebuf; } preempt_enable(); + return ret; } @@ -3670,6 +3823,8 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, unsigned int i; int ret; + module_assert_mutex(); + list_for_each_entry(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; @@ -3844,13 +3999,15 @@ struct module *__module_address(unsigned long addr) if (addr < module_addr_min || addr > module_addr_max) return NULL; - list_for_each_entry_rcu(mod, &modules, list) { + module_assert_mutex_or_preempt(); + + mod = mod_find(addr); + if (mod) { + BUG_ON(!within_module(addr, mod)); if (mod->state == MODULE_STATE_UNFORMED) - continue; - if (within_module(addr, mod)) - return mod; + mod = NULL; } - return NULL; + return mod; } EXPORT_SYMBOL_GPL(__module_address); diff --git a/kernel/params.c b/kernel/params.c index 30288c1e15dd..b6554aa71094 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -25,15 +25,34 @@ #include <linux/slab.h> #include <linux/ctype.h> -/* Protects all parameters, and incidentally kmalloced_param list. */ +#ifdef CONFIG_SYSFS +/* Protects all built-in parameters, modules use their own param_lock */ static DEFINE_MUTEX(param_lock); +/* Use the module's mutex, or if built-in use the built-in mutex */ +#ifdef CONFIG_MODULES +#define KPARAM_MUTEX(mod) ((mod) ? &(mod)->param_lock : ¶m_lock) +#else +#define KPARAM_MUTEX(mod) (¶m_lock) +#endif + +static inline void check_kparam_locked(struct module *mod) +{ + BUG_ON(!mutex_is_locked(KPARAM_MUTEX(mod))); +} +#else +static inline void check_kparam_locked(struct module *mod) +{ +} +#endif /* !CONFIG_SYSFS */ + /* This just allows us to keep track of which parameters are kmalloced. */ struct kmalloced_param { struct list_head list; char val[]; }; static LIST_HEAD(kmalloced_params); +static DEFINE_SPINLOCK(kmalloced_params_lock); static void *kmalloc_parameter(unsigned int size) { @@ -43,7 +62,10 @@ static void *kmalloc_parameter(unsigned int size) if (!p) return NULL; + spin_lock(&kmalloced_params_lock); list_add(&p->list, &kmalloced_params); + spin_unlock(&kmalloced_params_lock); + return p->val; } @@ -52,6 +74,7 @@ static void maybe_kfree_parameter(void *param) { struct kmalloced_param *p; + spin_lock(&kmalloced_params_lock); list_for_each_entry(p, &kmalloced_params, list) { if (p->val == param) { list_del(&p->list); @@ -59,6 +82,7 @@ static void maybe_kfree_parameter(void *param) break; } } + spin_unlock(&kmalloced_params_lock); } static char dash2underscore(char c) @@ -119,10 +143,10 @@ static int parse_one(char *param, return -EINVAL; pr_debug("handling %s with %p\n", param, params[i].ops->set); - mutex_lock(¶m_lock); + kernel_param_lock(params[i].mod); param_check_unsafe(¶ms[i]); err = params[i].ops->set(val, ¶ms[i]); - mutex_unlock(¶m_lock); + kernel_param_unlock(params[i].mod); return err; } } @@ -254,7 +278,7 @@ char *parse_args(const char *doing, return scnprintf(buffer, PAGE_SIZE, format, \ *((type *)kp->arg)); \ } \ - struct kernel_param_ops param_ops_##name = { \ + const struct kernel_param_ops param_ops_##name = { \ .set = param_set_##name, \ .get = param_get_##name, \ }; \ @@ -306,7 +330,7 @@ static void param_free_charp(void *arg) maybe_kfree_parameter(*((char **)arg)); } -struct kernel_param_ops param_ops_charp = { +const struct kernel_param_ops param_ops_charp = { .set = param_set_charp, .get = param_get_charp, .free = param_free_charp, @@ -331,13 +355,44 @@ int param_get_bool(char *buffer, const struct kernel_param *kp) } EXPORT_SYMBOL(param_get_bool); -struct kernel_param_ops param_ops_bool = { +const struct kernel_param_ops param_ops_bool = { .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = param_set_bool, .get = param_get_bool, }; EXPORT_SYMBOL(param_ops_bool); +int param_set_bool_enable_only(const char *val, const struct kernel_param *kp) +{ + int err = 0; + bool new_value; + bool orig_value = *(bool *)kp->arg; + struct kernel_param dummy_kp = *kp; + + dummy_kp.arg = &new_value; + + err = param_set_bool(val, &dummy_kp); + if (err) + return err; + + /* Don't let them unset it once it's set! */ + if (!new_value && orig_value) + return -EROFS; + + if (new_value) + err = param_set_bool(val, kp); + + return err; +} +EXPORT_SYMBOL_GPL(param_set_bool_enable_only); + +const struct kernel_param_ops param_ops_bool_enable_only = { + .flags = KERNEL_PARAM_OPS_FL_NOARG, + .set = param_set_bool_enable_only, + .get = param_get_bool, +}; +EXPORT_SYMBOL_GPL(param_ops_bool_enable_only); + /* This one must be bool. */ int param_set_invbool(const char *val, const struct kernel_param *kp) { @@ -359,7 +414,7 @@ int param_get_invbool(char *buffer, const struct kernel_param *kp) } EXPORT_SYMBOL(param_get_invbool); -struct kernel_param_ops param_ops_invbool = { +const struct kernel_param_ops param_ops_invbool = { .set = param_set_invbool, .get = param_get_invbool, }; @@ -367,12 +422,11 @@ EXPORT_SYMBOL(param_ops_invbool); int param_set_bint(const char *val, const struct kernel_param *kp) { - struct kernel_param boolkp; + /* Match bool exactly, by re-using it. */ + struct kernel_param boolkp = *kp; bool v; int ret; - /* Match bool exactly, by re-using it. */ - boolkp = *kp; boolkp.arg = &v; ret = param_set_bool(val, &boolkp); @@ -382,7 +436,7 @@ int param_set_bint(const char *val, const struct kernel_param *kp) } EXPORT_SYMBOL(param_set_bint); -struct kernel_param_ops param_ops_bint = { +const struct kernel_param_ops param_ops_bint = { .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = param_set_bint, .get = param_get_int, @@ -390,7 +444,8 @@ struct kernel_param_ops param_ops_bint = { EXPORT_SYMBOL(param_ops_bint); /* We break the rule and mangle the string. */ -static int param_array(const char *name, +static int param_array(struct module *mod, + const char *name, const char *val, unsigned int min, unsigned int max, void *elem, int elemsize, @@ -421,7 +476,7 @@ static int param_array(const char *name, /* nul-terminate and parse */ save = val[len]; ((char *)val)[len] = '\0'; - BUG_ON(!mutex_is_locked(¶m_lock)); + check_kparam_locked(mod); ret = set(val, &kp); if (ret != 0) @@ -443,7 +498,7 @@ static int param_array_set(const char *val, const struct kernel_param *kp) const struct kparam_array *arr = kp->arr; unsigned int temp_num; - return param_array(kp->name, val, 1, arr->max, arr->elem, + return param_array(kp->mod, kp->name, val, 1, arr->max, arr->elem, arr->elemsize, arr->ops->set, kp->level, arr->num ?: &temp_num); } @@ -452,14 +507,13 @@ static int param_array_get(char *buffer, const struct kernel_param *kp) { int i, off, ret; const struct kparam_array *arr = kp->arr; - struct kernel_param p; + struct kernel_param p = *kp; - p = *kp; for (i = off = 0; i < (arr->num ? *arr->num : arr->max); i++) { if (i) buffer[off++] = ','; p.arg = arr->elem + arr->elemsize * i; - BUG_ON(!mutex_is_locked(¶m_lock)); + check_kparam_locked(p.mod); ret = arr->ops->get(buffer + off, &p); if (ret < 0) return ret; @@ -479,7 +533,7 @@ static void param_array_free(void *arg) arr->ops->free(arr->elem + arr->elemsize * i); } -struct kernel_param_ops param_array_ops = { +const struct kernel_param_ops param_array_ops = { .set = param_array_set, .get = param_array_get, .free = param_array_free, @@ -507,7 +561,7 @@ int param_get_string(char *buffer, const struct kernel_param *kp) } EXPORT_SYMBOL(param_get_string); -struct kernel_param_ops param_ops_string = { +const struct kernel_param_ops param_ops_string = { .set = param_set_copystring, .get = param_get_string, }; @@ -542,9 +596,9 @@ static ssize_t param_attr_show(struct module_attribute *mattr, if (!attribute->param->ops->get) return -EPERM; - mutex_lock(¶m_lock); + kernel_param_lock(mk->mod); count = attribute->param->ops->get(buf, attribute->param); - mutex_unlock(¶m_lock); + kernel_param_unlock(mk->mod); if (count > 0) { strcat(buf, "\n"); ++count; @@ -554,7 +608,7 @@ static ssize_t param_attr_show(struct module_attribute *mattr, /* sysfs always hands a nul-terminated string in buf. We rely on that. */ static ssize_t param_attr_store(struct module_attribute *mattr, - struct module_kobject *km, + struct module_kobject *mk, const char *buf, size_t len) { int err; @@ -563,10 +617,10 @@ static ssize_t param_attr_store(struct module_attribute *mattr, if (!attribute->param->ops->set) return -EPERM; - mutex_lock(¶m_lock); + kernel_param_lock(mk->mod); param_check_unsafe(attribute->param); err = attribute->param->ops->set(buf, attribute->param); - mutex_unlock(¶m_lock); + kernel_param_unlock(mk->mod); if (!err) return len; return err; @@ -580,17 +634,18 @@ static ssize_t param_attr_store(struct module_attribute *mattr, #endif #ifdef CONFIG_SYSFS -void __kernel_param_lock(void) +void kernel_param_lock(struct module *mod) { - mutex_lock(¶m_lock); + mutex_lock(KPARAM_MUTEX(mod)); } -EXPORT_SYMBOL(__kernel_param_lock); -void __kernel_param_unlock(void) +void kernel_param_unlock(struct module *mod) { - mutex_unlock(¶m_lock); + mutex_unlock(KPARAM_MUTEX(mod)); } -EXPORT_SYMBOL(__kernel_param_unlock); + +EXPORT_SYMBOL(kernel_param_lock); +EXPORT_SYMBOL(kernel_param_unlock); /* * add_sysfs_param - add a parameter to sysfs @@ -856,6 +911,7 @@ static void __init version_sysfs_builtin(void) mk = locate_module_kobject(vattr->module_name); if (mk) { err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr); + WARN_ON_ONCE(err); kobject_uevent(&mk->kobj, KOBJ_ADD); kobject_put(&mk->kobj); } diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 7e01f78f0417..9e302315e33d 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -187,7 +187,7 @@ config DPM_WATCHDOG config DPM_WATCHDOG_TIMEOUT int "Watchdog timeout in seconds" range 1 120 - default 12 + default 60 depends on DPM_WATCHDOG config PM_TRACE diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 2329daae5255..690f78f210f2 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -552,7 +552,7 @@ int hibernation_platform_enter(void) error = disable_nonboot_cpus(); if (error) - goto Platform_finish; + goto Enable_cpus; local_irq_disable(); syscore_suspend(); @@ -568,6 +568,8 @@ int hibernation_platform_enter(void) Power_up: syscore_resume(); local_irq_enable(); + + Enable_cpus: enable_nonboot_cpus(); Platform_finish: diff --git a/kernel/time/Makefile b/kernel/time/Makefile index ffc4cc3dcd47..49eca0beed32 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -12,5 +12,3 @@ obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o obj-$(CONFIG_TIMER_STATS) += timer_stats.o obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o obj-$(CONFIG_TEST_UDELAY) += test_udelay.o - -$(obj)/time.o: $(objtree)/include/config/ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 30b7a409bf1e..bca3667a2de1 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -319,32 +319,7 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) * We want to use this from any context including NMI and tracing / * instrumenting the timekeeping code itself. * - * So we handle this differently than the other timekeeping accessor - * functions which retry when the sequence count has changed. The - * update side does: - * - * smp_wmb(); <- Ensure that the last base[1] update is visible - * tkf->seq++; - * smp_wmb(); <- Ensure that the seqcount update is visible - * update(tkf->base[0], tkr); - * smp_wmb(); <- Ensure that the base[0] update is visible - * tkf->seq++; - * smp_wmb(); <- Ensure that the seqcount update is visible - * update(tkf->base[1], tkr); - * - * The reader side does: - * - * do { - * seq = tkf->seq; - * smp_rmb(); - * idx = seq & 0x01; - * now = now(tkf->base[idx]); - * smp_rmb(); - * } while (seq != tkf->seq) - * - * As long as we update base[0] readers are forced off to - * base[1]. Once base[0] is updated readers are redirected to base[0] - * and the base[1] update takes place. + * Employ the latch technique; see @raw_write_seqcount_latch. * * So if a NMI hits the update of base[0] then it will use base[1] * which is still consistent. In the worst case this can result is a @@ -407,7 +382,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) u64 now; do { - seq = raw_read_seqcount(&tkf->seq); + seq = raw_read_seqcount_latch(&tkf->seq); tkr = tkf->base + (seq & 0x01); now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr); } while (read_seqcount_retry(&tkf->seq, seq)); diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 520499dd85af..5e097fa9faf7 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1566,7 +1566,7 @@ static void migrate_timers(int cpu) BUG_ON(cpu_online(cpu)); old_base = per_cpu_ptr(&tvec_bases, cpu); - new_base = this_cpu_ptr(&tvec_bases); + new_base = get_cpu_ptr(&tvec_bases); /* * The caller is globally serialized and nobody else * takes two locks at once, deadlock is not possible. @@ -1590,6 +1590,7 @@ static void migrate_timers(int cpu) spin_unlock(&old_base->lock); spin_unlock_irq(&new_base->lock); + put_cpu_ptr(&tvec_bases); } static int timer_cpu_notify(struct notifier_block *self, diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 5243d4b03087..4c4f06176f74 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -285,12 +285,7 @@ static bool wq_disable_numa; module_param_named(disable_numa, wq_disable_numa, bool, 0444); /* see the comment above the definition of WQ_POWER_EFFICIENT */ -#ifdef CONFIG_WQ_POWER_EFFICIENT_DEFAULT -static bool wq_power_efficient = true; -#else -static bool wq_power_efficient; -#endif - +static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT); module_param_named(power_efficient, wq_power_efficient, bool, 0444); static bool wq_numa_enabled; /* unbound NUMA affinity enabled */ diff --git a/lib/bug.c b/lib/bug.c index 0c3bd9552b6f..cff145f032a5 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -66,7 +66,7 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr) struct module *mod; const struct bug_entry *bug = NULL; - rcu_read_lock(); + rcu_read_lock_sched(); list_for_each_entry_rcu(mod, &module_bug_list, bug_list) { unsigned i; @@ -77,7 +77,7 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr) } bug = NULL; out: - rcu_read_unlock(); + rcu_read_unlock_sched(); return bug; } @@ -88,6 +88,8 @@ void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, char *secstrings; unsigned int i; + lockdep_assert_held(&module_mutex); + mod->bug_table = NULL; mod->num_bugs = 0; @@ -113,6 +115,7 @@ void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, void module_bug_cleanup(struct module *mod) { + lockdep_assert_held(&module_mutex); list_del_rcu(&mod->bug_list); } diff --git a/lib/kobject.c b/lib/kobject.c index 75ee63834fd1..2e3bd01964a9 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -545,6 +545,7 @@ out: kfree(devpath); return error; } +EXPORT_SYMBOL_GPL(kobject_move); /** * kobject_del - unlink kobject from hierarchy. diff --git a/lib/rbtree.c b/lib/rbtree.c index c16c81a3d430..1356454e36de 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -44,6 +44,30 @@ * parentheses and have some accompanying text comment. */ +/* + * Notes on lockless lookups: + * + * All stores to the tree structure (rb_left and rb_right) must be done using + * WRITE_ONCE(). And we must not inadvertently cause (temporary) loops in the + * tree structure as seen in program order. + * + * These two requirements will allow lockless iteration of the tree -- not + * correct iteration mind you, tree rotations are not atomic so a lookup might + * miss entire subtrees. + * + * But they do guarantee that any such traversal will only see valid elements + * and that it will indeed complete -- does not get stuck in a loop. + * + * It also guarantees that if the lookup returns an element it is the 'correct' + * one. But not returning an element does _NOT_ mean it's not present. + * + * NOTE: + * + * Stores to __rb_parent_color are not important for simple lookups so those + * are left undone as of now. Nor did I check for loops involving parent + * pointers. + */ + static inline void rb_set_black(struct rb_node *rb) { rb->__rb_parent_color |= RB_BLACK; @@ -129,8 +153,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root, * This still leaves us in violation of 4), the * continuation into Case 3 will fix that. */ - parent->rb_right = tmp = node->rb_left; - node->rb_left = parent; + tmp = node->rb_left; + WRITE_ONCE(parent->rb_right, tmp); + WRITE_ONCE(node->rb_left, parent); if (tmp) rb_set_parent_color(tmp, parent, RB_BLACK); @@ -149,8 +174,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root, * / \ * n U */ - gparent->rb_left = tmp; /* == parent->rb_right */ - parent->rb_right = gparent; + WRITE_ONCE(gparent->rb_left, tmp); /* == parent->rb_right */ + WRITE_ONCE(parent->rb_right, gparent); if (tmp) rb_set_parent_color(tmp, gparent, RB_BLACK); __rb_rotate_set_parents(gparent, parent, root, RB_RED); @@ -171,8 +196,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root, tmp = parent->rb_left; if (node == tmp) { /* Case 2 - right rotate at parent */ - parent->rb_left = tmp = node->rb_right; - node->rb_right = parent; + tmp = node->rb_right; + WRITE_ONCE(parent->rb_left, tmp); + WRITE_ONCE(node->rb_right, parent); if (tmp) rb_set_parent_color(tmp, parent, RB_BLACK); @@ -183,8 +209,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root, } /* Case 3 - left rotate at gparent */ - gparent->rb_right = tmp; /* == parent->rb_left */ - parent->rb_left = gparent; + WRITE_ONCE(gparent->rb_right, tmp); /* == parent->rb_left */ + WRITE_ONCE(parent->rb_left, gparent); if (tmp) rb_set_parent_color(tmp, gparent, RB_BLACK); __rb_rotate_set_parents(gparent, parent, root, RB_RED); @@ -224,8 +250,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, * / \ / \ * Sl Sr N Sl */ - parent->rb_right = tmp1 = sibling->rb_left; - sibling->rb_left = parent; + tmp1 = sibling->rb_left; + WRITE_ONCE(parent->rb_right, tmp1); + WRITE_ONCE(sibling->rb_left, parent); rb_set_parent_color(tmp1, parent, RB_BLACK); __rb_rotate_set_parents(parent, sibling, root, RB_RED); @@ -275,9 +302,10 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, * \ * Sr */ - sibling->rb_left = tmp1 = tmp2->rb_right; - tmp2->rb_right = sibling; - parent->rb_right = tmp2; + tmp1 = tmp2->rb_right; + WRITE_ONCE(sibling->rb_left, tmp1); + WRITE_ONCE(tmp2->rb_right, sibling); + WRITE_ONCE(parent->rb_right, tmp2); if (tmp1) rb_set_parent_color(tmp1, sibling, RB_BLACK); @@ -297,8 +325,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, * / \ / \ * (sl) sr N (sl) */ - parent->rb_right = tmp2 = sibling->rb_left; - sibling->rb_left = parent; + tmp2 = sibling->rb_left; + WRITE_ONCE(parent->rb_right, tmp2); + WRITE_ONCE(sibling->rb_left, parent); rb_set_parent_color(tmp1, sibling, RB_BLACK); if (tmp2) rb_set_parent(tmp2, parent); @@ -310,8 +339,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, sibling = parent->rb_left; if (rb_is_red(sibling)) { /* Case 1 - right rotate at parent */ - parent->rb_left = tmp1 = sibling->rb_right; - sibling->rb_right = parent; + tmp1 = sibling->rb_right; + WRITE_ONCE(parent->rb_left, tmp1); + WRITE_ONCE(sibling->rb_right, parent); rb_set_parent_color(tmp1, parent, RB_BLACK); __rb_rotate_set_parents(parent, sibling, root, RB_RED); @@ -336,9 +366,10 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, break; } /* Case 3 - right rotate at sibling */ - sibling->rb_right = tmp1 = tmp2->rb_left; - tmp2->rb_left = sibling; - parent->rb_left = tmp2; + tmp1 = tmp2->rb_left; + WRITE_ONCE(sibling->rb_right, tmp1); + WRITE_ONCE(tmp2->rb_left, sibling); + WRITE_ONCE(parent->rb_left, tmp2); if (tmp1) rb_set_parent_color(tmp1, sibling, RB_BLACK); @@ -347,8 +378,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, sibling = tmp2; } /* Case 4 - left rotate at parent + color flips */ - parent->rb_left = tmp2 = sibling->rb_right; - sibling->rb_right = parent; + tmp2 = sibling->rb_right; + WRITE_ONCE(parent->rb_left, tmp2); + WRITE_ONCE(sibling->rb_right, parent); rb_set_parent_color(tmp1, sibling, RB_BLACK); if (tmp2) rb_set_parent(tmp2, parent); diff --git a/mm/slab_common.c b/mm/slab_common.c index 983b78694c46..3e5f8f29c286 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -855,7 +855,7 @@ void __init setup_kmalloc_cache_index_table(void) } } -static void new_kmalloc_cache(int idx, unsigned long flags) +static void __init new_kmalloc_cache(int idx, unsigned long flags) { kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name, kmalloc_info[idx].size, flags); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 9c891d0412a2..ae3a47f9d1d5 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -57,7 +57,7 @@ static const struct proto_ops ax25_proto_ops; static void ax25_free_sock(struct sock *sk) { - ax25_cb_put(ax25_sk(sk)); + ax25_cb_put(sk_to_ax25(sk)); } /* @@ -306,7 +306,7 @@ void ax25_destroy_socket(ax25_cb *ax25) while ((skb = skb_dequeue(&ax25->sk->sk_receive_queue)) != NULL) { if (skb->sk != ax25->sk) { /* A pending connection */ - ax25_cb *sax25 = ax25_sk(skb->sk); + ax25_cb *sax25 = sk_to_ax25(skb->sk); /* Queue the unaccepted socket for death */ sock_orphan(skb->sk); @@ -551,7 +551,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, return -EFAULT; lock_sock(sk); - ax25 = ax25_sk(sk); + ax25 = sk_to_ax25(sk); switch (optname) { case AX25_WINDOW: @@ -697,7 +697,7 @@ static int ax25_getsockopt(struct socket *sock, int level, int optname, length = min_t(unsigned int, maxlen, sizeof(int)); lock_sock(sk); - ax25 = ax25_sk(sk); + ax25 = sk_to_ax25(sk); switch (optname) { case AX25_WINDOW: @@ -796,7 +796,7 @@ out: static struct proto ax25_proto = { .name = "AX25", .owner = THIS_MODULE, - .obj_size = sizeof(struct sock), + .obj_size = sizeof(struct ax25_sock), }; static int ax25_create(struct net *net, struct socket *sock, int protocol, @@ -858,7 +858,7 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol, if (sk == NULL) return -ENOMEM; - ax25 = sk->sk_protinfo = ax25_create_cb(); + ax25 = ax25_sk(sk)->cb = ax25_create_cb(); if (!ax25) { sk_free(sk); return -ENOMEM; @@ -910,7 +910,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev) sk->sk_state = TCP_ESTABLISHED; sock_copy_flags(sk, osk); - oax25 = ax25_sk(osk); + oax25 = sk_to_ax25(osk); ax25->modulus = oax25->modulus; ax25->backoff = oax25->backoff; @@ -938,7 +938,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev) } } - sk->sk_protinfo = ax25; + ax25_sk(sk)->cb = ax25; sk->sk_destruct = ax25_free_sock; ax25->sk = sk; @@ -956,7 +956,7 @@ static int ax25_release(struct socket *sock) sock_hold(sk); sock_orphan(sk); lock_sock(sk); - ax25 = ax25_sk(sk); + ax25 = sk_to_ax25(sk); if (sk->sk_type == SOCK_SEQPACKET) { switch (ax25->state) { @@ -1066,7 +1066,7 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) lock_sock(sk); - ax25 = ax25_sk(sk); + ax25 = sk_to_ax25(sk); if (!sock_flag(sk, SOCK_ZAPPED)) { err = -EINVAL; goto out; @@ -1113,7 +1113,7 @@ static int __must_check ax25_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; - ax25_cb *ax25 = ax25_sk(sk), *ax25t; + ax25_cb *ax25 = sk_to_ax25(sk), *ax25t; struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr; ax25_digi *digi = NULL; int ct = 0, err = 0; @@ -1394,7 +1394,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr, memset(fsa, 0, sizeof(*fsa)); lock_sock(sk); - ax25 = ax25_sk(sk); + ax25 = sk_to_ax25(sk); if (peer != 0) { if (sk->sk_state != TCP_ESTABLISHED) { @@ -1446,7 +1446,7 @@ static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) return -EINVAL; lock_sock(sk); - ax25 = ax25_sk(sk); + ax25 = sk_to_ax25(sk); if (sock_flag(sk, SOCK_ZAPPED)) { err = -EADDRNOTAVAIL; @@ -1621,7 +1621,7 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, if (skb == NULL) goto out; - if (!ax25_sk(sk)->pidincl) + if (!sk_to_ax25(sk)->pidincl) skb_pull(skb, 1); /* Remove PID */ skb_reset_transport_header(skb); @@ -1762,7 +1762,7 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCAX25GETINFO: case SIOCAX25GETINFOOLD: { - ax25_cb *ax25 = ax25_sk(sk); + ax25_cb *ax25 = sk_to_ax25(sk); struct ax25_info_struct ax25_info; ax25_info.t1 = ax25->t1 / HZ; diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 29a3687237aa..bb5a0e4e98d9 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -353,7 +353,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, return 0; } - ax25 = ax25_sk(make); + ax25 = sk_to_ax25(make); skb_set_owner_r(skb, make); skb_queue_head(&sk->sk_receive_queue, skb); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 476e5dda59e1..2a834c6179b9 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -129,7 +129,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_ports *key_ports; struct flow_dissector_key_tags *key_tags; struct flow_dissector_key_keyid *key_keyid; - u8 ip_proto; + u8 ip_proto = 0; if (!data) { data = skb->data; diff --git a/net/core/sock.c b/net/core/sock.c index 1e1fe9a68d83..08f16db46070 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1454,7 +1454,7 @@ void sk_destruct(struct sock *sk) static void __sk_free(struct sock *sk) { - if (unlikely(sock_diag_has_destroy_listeners(sk))) + if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt)) sock_diag_broadcast_destroy(sk); else sk_destruct(sk); @@ -2269,7 +2269,6 @@ static void sock_def_write_space(struct sock *sk) static void sock_def_destruct(struct sock *sk) { - kfree(sk->sk_protinfo); } void sk_send_sigurg(struct sock *sk) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 04ffad311704..0917123790ea 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -112,7 +112,7 @@ static int dsa_slave_open(struct net_device *dev) clear_promisc: if (dev->flags & IFF_PROMISC) - dev_set_promiscuity(master, 0); + dev_set_promiscuity(master, -1); clear_allmulti: if (dev->flags & IFF_ALLMULTI) dev_set_allmulti(master, -1); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 3bfccd83551c..c7358ea4ae93 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1045,7 +1045,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif)) goto nla_put_failure; if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) { - in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev); + in_dev = __in_dev_get_rtnl(fi->fib_nh->nh_dev); if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) rtm->rtm_flags |= RTNH_F_DEAD; @@ -1074,7 +1074,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, rtnh->rtnh_flags = nh->nh_flags & 0xFF; if (nh->nh_flags & RTNH_F_LINKDOWN) { - in_dev = __in_dev_get_rcu(nh->nh_dev); + in_dev = __in_dev_get_rtnl(nh->nh_dev); if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) rtnh->rtnh_flags |= RTNH_F_DEAD; diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 36ba7c4f0283..fda33f961d83 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -103,7 +103,7 @@ ieee80211_rate_control_ops_get(const char *name) const struct rate_control_ops *ops; const char *alg_name; - kparam_block_sysfs_write(ieee80211_default_rc_algo); + kernel_param_lock(THIS_MODULE); if (!name) alg_name = ieee80211_default_rc_algo; else @@ -117,7 +117,7 @@ ieee80211_rate_control_ops_get(const char *name) /* try built-in one if specific alg requested but not found */ if (!ops && strlen(CONFIG_MAC80211_RC_DEFAULT)) ops = ieee80211_try_rate_control_ops_get(CONFIG_MAC80211_RC_DEFAULT); - kparam_unblock_sysfs_write(ieee80211_default_rc_algo); + kernel_param_unlock(THIS_MODULE); return ops; } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index b92d3f49c23e..9d37ccd95062 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -216,8 +216,8 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) }, [TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 }, - [TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 }, - [TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_UDP_SRC] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_UDP_DST] = { .type = NLA_U16 }, }; static void fl_set_key_val(struct nlattr **tb, diff --git a/net/sctp/output.c b/net/sctp/output.c index fc5e45b8a832..abe7c2db2412 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -599,7 +599,9 @@ out: return err; no_route: kfree_skb(nskb); - IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); + + if (asoc) + IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); /* FIXME: Returning the 'err' will effect all the associations * associated with a socket, although only one of the paths of the diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5f6c4e61325b..1425ec2bbd5a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2121,12 +2121,6 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (sp->subscribe.sctp_data_io_event) sctp_ulpevent_read_sndrcvinfo(event, msg); -#if 0 - /* FIXME: we should be calling IP/IPv6 layers. */ - if (sk->sk_protinfo.af_inet.cmsg_flags) - ip_cmsg_recv(msg, skb); -#endif - err = copied; /* If skb's length exceeds the user's buffer, update the skb and diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 47f38be4155f..02f53674dc39 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -72,7 +72,7 @@ static int param_get_hashtbl_sz(char *buffer, const struct kernel_param *kp) #define param_check_hashtbl_sz(name, p) __param_check(name, p, unsigned int); -static struct kernel_param_ops param_ops_hashtbl_sz = { +static const struct kernel_param_ops param_ops_hashtbl_sz = { .set = param_set_hashtbl_sz, .get = param_get_hashtbl_sz, }; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 66891e32c5e3..b0517287075b 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2982,7 +2982,7 @@ static int param_set_portnr(const char *val, const struct kernel_param *kp) RPC_MAX_RESVPORT); } -static struct kernel_param_ops param_ops_portnr = { +static const struct kernel_param_ops param_ops_portnr = { .set = param_set_portnr, .get = param_get_uint, }; @@ -3001,7 +3001,7 @@ static int param_set_slot_table_size(const char *val, RPC_MAX_SLOT_TABLE); } -static struct kernel_param_ops param_ops_slot_table_size = { +static const struct kernel_param_ops param_ops_slot_table_size = { .set = param_set_slot_table_size, .get = param_get_uint, }; @@ -3017,7 +3017,7 @@ static int param_set_max_slot_table_size(const char *val, RPC_MAX_SLOT_TABLE_LIMIT); } -static struct kernel_param_ops param_ops_max_slot_table_size = { +static const struct kernel_param_ops param_ops_max_slot_table_size = { .set = param_set_max_slot_table_size, .get = param_get_uint, }; diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 4906ca3c0f3a..a816382fc8af 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -108,6 +108,11 @@ void tipc_bclink_remove_node(struct net *net, u32 addr) tipc_bclink_lock(net); tipc_nmap_remove(&tn->bclink->bcast_nodes, addr); + + /* Last node? => reset backlog queue */ + if (!tn->bclink->bcast_nodes.count) + tipc_link_purge_backlog(&tn->bclink->link); + tipc_bclink_unlock(net); } diff --git a/net/tipc/link.c b/net/tipc/link.c index ca8b8e0f49b5..eaa9fe54b4ae 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -404,7 +404,7 @@ void tipc_link_reset_fragments(struct tipc_link *l_ptr) l_ptr->reasm_buf = NULL; } -static void tipc_link_purge_backlog(struct tipc_link *l) +void tipc_link_purge_backlog(struct tipc_link *l) { __skb_queue_purge(&l->backlogq); l->backlog[TIPC_LOW_IMPORTANCE].len = 0; diff --git a/net/tipc/link.h b/net/tipc/link.h index 0c02c973e985..ae0a0ea572f2 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -218,6 +218,7 @@ void tipc_link_reset_fragments(struct tipc_link *l_ptr); int tipc_link_is_up(struct tipc_link *l_ptr); int tipc_link_is_active(struct tipc_link *l_ptr); void tipc_link_purge_queues(struct tipc_link *l_ptr); +void tipc_link_purge_backlog(struct tipc_link *l); void tipc_link_reset_all(struct tipc_node *node); void tipc_link_reset(struct tipc_link *l_ptr); int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index d9b1fef0c67e..f52abae0ec5f 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -115,6 +115,10 @@ PHONY += kvmconfig kvmconfig: kvm_guest.config @: +PHONY += xenconfig +xenconfig: xen.config + @: + PHONY += tinyconfig tinyconfig: $(Q)$(MAKE) -f $(srctree)/Makefile allnoconfig tiny.config @@ -139,7 +143,8 @@ help: @echo ' randconfig - New config with random answer to all options' @echo ' listnewconfig - List new options' @echo ' olddefconfig - Same as silentoldconfig but sets new symbols to their default value' - @echo ' kvmconfig - Enable additional options for guest kernel support' + @echo ' kvmconfig - Enable additional options for kvm guest kernel support' + @echo ' xenconfig - Enable additional options for xen dom0 and guest kernel support' @echo ' tinyconfig - Configure the tiniest possible kernel' # lxdialog stuff diff --git a/scripts/sortextable.c b/scripts/sortextable.c index 1052d4834a44..c2423d913b46 100644 --- a/scripts/sortextable.c +++ b/scripts/sortextable.c @@ -47,6 +47,10 @@ #define EM_MICROBLAZE 189 #endif +#ifndef EM_ARCV2 +#define EM_ARCV2 195 +#endif + static int fd_map; /* File descriptor for file being modified. */ static int mmap_failed; /* Boolean flag. */ static void *ehdr_curr; /* current ElfXX_Ehdr * for resource cleanup */ @@ -281,6 +285,7 @@ do_file(char const *const fname) custom_sort = sort_relative_table; break; case EM_ARCOMPACT: + case EM_ARCV2: case EM_ARM: case EM_AARCH64: case EM_MICROBLAZE: diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 5696874e8062..dec607c17b64 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -654,7 +654,7 @@ static struct security_hook_list apparmor_hooks[] = { static int param_set_aabool(const char *val, const struct kernel_param *kp); static int param_get_aabool(char *buffer, const struct kernel_param *kp); #define param_check_aabool param_check_bool -static struct kernel_param_ops param_ops_aabool = { +static const struct kernel_param_ops param_ops_aabool = { .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = param_set_aabool, .get = param_get_aabool @@ -663,7 +663,7 @@ static struct kernel_param_ops param_ops_aabool = { static int param_set_aauint(const char *val, const struct kernel_param *kp); static int param_get_aauint(char *buffer, const struct kernel_param *kp); #define param_check_aauint param_check_uint -static struct kernel_param_ops param_ops_aauint = { +static const struct kernel_param_ops param_ops_aauint = { .set = param_set_aauint, .get = param_get_aauint }; @@ -671,7 +671,7 @@ static struct kernel_param_ops param_ops_aauint = { static int param_set_aalockpolicy(const char *val, const struct kernel_param *kp); static int param_get_aalockpolicy(char *buffer, const struct kernel_param *kp); #define param_check_aalockpolicy param_check_bool -static struct kernel_param_ops param_ops_aalockpolicy = { +static const struct kernel_param_ops param_ops_aalockpolicy = { .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = param_set_aalockpolicy, .get = param_get_aalockpolicy diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c index 686355fea7fd..e24121afb2f2 100644 --- a/security/integrity/ima/ima_crypto.c +++ b/security/integrity/ima/ima_crypto.c @@ -55,7 +55,7 @@ static int param_set_bufsize(const char *val, const struct kernel_param *kp) return 0; } -static struct kernel_param_ops param_ops_bufsize = { +static const struct kernel_param_ops param_ops_bufsize = { .set = param_set_bufsize, .get = param_get_uint, }; diff --git a/sound/core/ctljack.c b/sound/core/ctljack.c index 9149a4aefa95..84a3cd683068 100644 --- a/sound/core/ctljack.c +++ b/sound/core/ctljack.c @@ -41,8 +41,11 @@ static int get_available_index(struct snd_card *card, const char *name) sid.iface = SNDRV_CTL_ELEM_IFACE_CARD; strlcpy(sid.name, name, sizeof(sid.name)); - while (snd_ctl_find_id(card, &sid)) + while (snd_ctl_find_id(card, &sid)) { sid.index++; + /* reset numid; otherwise snd_ctl_find_id() hits this again */ + sid.numid = 0; + } return sid.index; } diff --git a/sound/core/init.c b/sound/core/init.c index 3e0cebacefe1..20f37fb3800e 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -109,13 +109,12 @@ static void snd_card_id_read(struct snd_info_entry *entry, static int init_info_for_card(struct snd_card *card) { - int err; struct snd_info_entry *entry; entry = snd_info_create_card_entry(card, "id", card->proc_root); if (!entry) { dev_dbg(card->dev, "unable to create card entry\n"); - return err; + return -ENOMEM; } entry->c.text.read = snd_card_id_read; card->proc_id = entry; diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 7dea7987d2af..745535d1840a 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -171,7 +171,7 @@ MODULE_PARM_DESC(beep_mode, "Select HDA Beep registration mode " #ifdef CONFIG_PM static int param_set_xint(const char *val, const struct kernel_param *kp); -static struct kernel_param_ops param_ops_xint = { +static const struct kernel_param_ops param_ops_xint = { .set = param_set_xint, .get = param_get_int, }; @@ -2180,6 +2180,8 @@ static const struct pci_device_id azx_ids[] = { { PCI_DEVICE(0x1022, 0x780d), .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB }, /* ATI HDMI */ + { PCI_DEVICE(0x1002, 0x1308), + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, { PCI_DEVICE(0x1002, 0x793b), .driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI }, { PCI_DEVICE(0x1002, 0x7919), @@ -2188,6 +2190,8 @@ static const struct pci_device_id azx_ids[] = { .driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI }, { PCI_DEVICE(0x1002, 0x970f), .driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI }, + { PCI_DEVICE(0x1002, 0x9840), + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, { PCI_DEVICE(0x1002, 0xaa00), .driver_data = AZX_DRIVER_ATIHDMI | AZX_DCAPS_PRESET_ATI_HDMI }, { PCI_DEVICE(0x1002, 0xaa08), diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index f8527342a150..2f2433845d04 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -591,7 +591,7 @@ static int eld_proc_new(struct hdmi_spec_per_pin *per_pin, int index) static void eld_proc_free(struct hdmi_spec_per_pin *per_pin) { - if (!per_pin->codec->bus->shutdown && per_pin->proc_entry) { + if (!per_pin->codec->bus->shutdown) { snd_info_free_entry(per_pin->proc_entry); per_pin->proc_entry = NULL; } diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 431a20b17df4..b3b44681d3cf 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4464,6 +4464,7 @@ enum { ALC269_FIXUP_LIFEBOOK, ALC269_FIXUP_LIFEBOOK_EXTMIC, ALC269_FIXUP_LIFEBOOK_HP_PIN, + ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT, ALC269_FIXUP_AMIC, ALC269_FIXUP_DMIC, ALC269VB_FIXUP_AMIC, @@ -4484,6 +4485,7 @@ enum { ALC269_FIXUP_DELL3_MIC_NO_PRESENCE, ALC269_FIXUP_HEADSET_MODE, ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC, + ALC269_FIXUP_ASPIRE_HEADSET_MIC, ALC269_FIXUP_ASUS_X101_FUNC, ALC269_FIXUP_ASUS_X101_VERB, ALC269_FIXUP_ASUS_X101, @@ -4511,6 +4513,7 @@ enum { ALC255_FIXUP_HEADSET_MODE_NO_HP_MIC, ALC293_FIXUP_DELL1_MIC_NO_PRESENCE, ALC292_FIXUP_TPT440_DOCK, + ALC292_FIXUP_TPT440_DOCK2, ALC283_FIXUP_BXBT2807_MIC, ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED, ALC282_FIXUP_ASPIRE_V5_PINS, @@ -4521,6 +4524,8 @@ enum { ALC288_FIXUP_DELL_HEADSET_MODE, ALC288_FIXUP_DELL1_MIC_NO_PRESENCE, ALC288_FIXUP_DELL_XPS_13_GPIO6, + ALC288_FIXUP_DELL_XPS_13, + ALC288_FIXUP_DISABLE_AAMIX, ALC292_FIXUP_DELL_E7X, ALC292_FIXUP_DISABLE_AAMIX, ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, @@ -4630,6 +4635,10 @@ static const struct hda_fixup alc269_fixups[] = { { } }, }, + [ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc269_fixup_pincfg_no_hp_to_lineout, + }, [ALC269_FIXUP_AMIC] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -4758,6 +4767,15 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_mode_no_hp_mic, }, + [ALC269_FIXUP_ASPIRE_HEADSET_MIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a1913c }, /* headset mic w/o jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE, + }, [ALC286_FIXUP_SONY_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -4960,6 +4978,12 @@ static const struct hda_fixup alc269_fixups[] = { .chain_id = ALC269_FIXUP_HEADSET_MODE }, [ALC292_FIXUP_TPT440_DOCK] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc269_fixup_pincfg_no_hp_to_lineout, + .chained = true, + .chain_id = ALC292_FIXUP_TPT440_DOCK2 + }, + [ALC292_FIXUP_TPT440_DOCK2] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { { 0x16, 0x21211010 }, /* dock headphone */ @@ -5046,9 +5070,23 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC288_FIXUP_DELL1_MIC_NO_PRESENCE }, + [ALC288_FIXUP_DISABLE_AAMIX] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_disable_aamix, + .chained = true, + .chain_id = ALC288_FIXUP_DELL_XPS_13_GPIO6 + }, + [ALC288_FIXUP_DELL_XPS_13] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_dell_xps13, + .chained = true, + .chain_id = ALC288_FIXUP_DISABLE_AAMIX + }, [ALC292_FIXUP_DISABLE_AAMIX] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_disable_aamix, + .chained = true, + .chain_id = ALC269_FIXUP_DELL2_MIC_NO_PRESENCE }, [ALC292_FIXUP_DELL_E7X] = { .type = HDA_FIXUP_FUNC, @@ -5073,6 +5111,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x029b, "Acer 1810TZ", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x0349, "Acer AOD260", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x047c, "Acer AC700", ALC269_FIXUP_ACER_AC700), + SND_PCI_QUIRK(0x1025, 0x072d, "Acer Aspire V5-571G", ALC269_FIXUP_ASPIRE_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x080d, "Acer Aspire V5-122P", ALC269_FIXUP_ASPIRE_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x0740, "Acer AO725", ALC271_FIXUP_HP_GATE_MIC_JACK), SND_PCI_QUIRK(0x1025, 0x0742, "Acer AO756", ALC271_FIXUP_HP_GATE_MIC_JACK), SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572), @@ -5086,10 +5126,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x05f6, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0615, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK), SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK), + SND_PCI_QUIRK(0x1028, 0x062e, "Dell Latitude E7450", ALC292_FIXUP_DELL_E7X), SND_PCI_QUIRK(0x1028, 0x0638, "Dell Inspiron 5439", ALC290_FIXUP_MONO_SPEAKERS_HSJACK), SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1028, 0x0665, "Dell XPS 13", ALC292_FIXUP_DELL_E7X), + SND_PCI_QUIRK(0x1028, 0x0665, "Dell XPS 13", ALC288_FIXUP_DELL_XPS_13), SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x06da, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), @@ -5173,6 +5214,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x104d, 0x9084, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ), SND_PCI_QUIRK(0x104d, 0x9099, "Sony VAIO S13", ALC275_FIXUP_SONY_DISABLE_AAMIX), SND_PCI_QUIRK(0x10cf, 0x1475, "Lifebook", ALC269_FIXUP_LIFEBOOK), + SND_PCI_QUIRK(0x10cf, 0x159f, "Lifebook E780", ALC269_FIXUP_LIFEBOOK_NO_HP_TO_LINEOUT), SND_PCI_QUIRK(0x10cf, 0x15dc, "Lifebook T731", ALC269_FIXUP_LIFEBOOK_HP_PIN), SND_PCI_QUIRK(0x10cf, 0x1757, "Lifebook E752", ALC269_FIXUP_LIFEBOOK_HP_PIN), SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC), diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index 0521be8d46a8..da5366405eda 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -241,7 +241,9 @@ static int via_pin_power_ctl_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct hda_codec *codec = snd_kcontrol_chip(kcontrol); - ucontrol->value.enumerated.item[0] = codec->power_save_node; + struct via_spec *spec = codec->spec; + + ucontrol->value.enumerated.item[0] = spec->gen.power_down_unused; return 0; } @@ -252,9 +254,9 @@ static int via_pin_power_ctl_put(struct snd_kcontrol *kcontrol, struct via_spec *spec = codec->spec; bool val = !!ucontrol->value.enumerated.item[0]; - if (val == codec->power_save_node) + if (val == spec->gen.power_down_unused) return 0; - codec->power_save_node = val; + /* codec->power_save_node = val; */ /* widget PM seems yet broken */ spec->gen.power_down_unused = val; analog_low_current_mode(codec); return 1; |