diff options
653 files changed, 17639 insertions, 9356 deletions
diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt index 4a4fb295ceef..14129f149a75 100644 --- a/Documentation/DMA-API-HOWTO.txt +++ b/Documentation/DMA-API-HOWTO.txt @@ -488,9 +488,10 @@ will invoke the generic mapping error check interface. Doing so will ensure that the mapping code will work correctly on all dma implementations without any dependency on the specifics of the underlying implementation. Using the returned address without checking for errors could result in failures ranging -from panics to silent data corruption. Couple of example of incorrect ways to -check for errors that make assumptions about the underlying dma implementation -are as follows and these are applicable to dma_map_page() as well. +from panics to silent data corruption. A couple of examples of incorrect ways +to check for errors that make assumptions about the underlying dma +implementation are as follows and these are applicable to dma_map_page() as +well. Incorrect example 1: dma_addr_t dma_handle; @@ -751,7 +752,7 @@ Example 1: dma_unmap_single(dma_handle1); map_error_handling1: -Example 2: (if buffers are allocated a loop, unmap all mapped buffers when +Example 2: (if buffers are allocated in a loop, unmap all mapped buffers when mapping error is detected in the middle) dma_addr_t dma_addr; diff --git a/Documentation/IPMI.txt b/Documentation/IPMI.txt index 16eb4c9e9233..f13c9132e9f2 100644 --- a/Documentation/IPMI.txt +++ b/Documentation/IPMI.txt @@ -348,34 +348,40 @@ You can change this at module load time (for a module) with: modprobe ipmi_si.o type=<type1>,<type2>.... ports=<port1>,<port2>... addrs=<addr1>,<addr2>... - irqs=<irq1>,<irq2>... trydefaults=[0|1] + irqs=<irq1>,<irq2>... regspacings=<sp1>,<sp2>,... regsizes=<size1>,<size2>,... regshifts=<shift1>,<shift2>,... slave_addrs=<addr1>,<addr2>,... force_kipmid=<enable1>,<enable2>,... kipmid_max_busy_us=<ustime1>,<ustime2>,... unload_when_empty=[0|1] + trydefaults=[0|1] trydmi=[0|1] tryacpi=[0|1] + tryplatform=[0|1] trypci=[0|1] -Each of these except si_trydefaults is a list, the first item for the +Each of these except try... items is a list, the first item for the first interface, second item for the second interface, etc. The si_type may be either "kcs", "smic", or "bt". If you leave it blank, it defaults to "kcs". -If you specify si_addrs as non-zero for an interface, the driver will +If you specify addrs as non-zero for an interface, the driver will use the memory address given as the address of the device. This overrides si_ports. -If you specify si_ports as non-zero for an interface, the driver will +If you specify ports as non-zero for an interface, the driver will use the I/O port given as the device address. -If you specify si_irqs as non-zero for an interface, the driver will +If you specify irqs as non-zero for an interface, the driver will attempt to use the given interrupt for the device. -si_trydefaults sets whether the standard IPMI interface at 0xca2 and +trydefaults sets whether the standard IPMI interface at 0xca2 and any interfaces specified by ACPE are tried. By default, the driver tries it, set this value to zero to turn this off. +The other try... items disable discovery by their corresponding +names. These are all enabled by default, set them to zero to disable +them. The tryplatform disables openfirmware. + The next three parameters have to do with register layout. The registers used by the interfaces may not appear at successive locations and they may not be in 8-bit registers. These parameters diff --git a/Documentation/block/cfq-iosched.txt b/Documentation/block/cfq-iosched.txt index d89b4fe724d7..a5eb7d19a65d 100644 --- a/Documentation/block/cfq-iosched.txt +++ b/Documentation/block/cfq-iosched.txt @@ -102,6 +102,64 @@ processing of request. Therefore, increasing the value can imporve the performace although this can cause the latency of some I/O to increase due to more number of requests. +CFQ Group scheduling +==================== + +CFQ supports blkio cgroup and has "blkio." prefixed files in each +blkio cgroup directory. It is weight-based and there are four knobs +for configuration - weight[_device] and leaf_weight[_device]. +Internal cgroup nodes (the ones with children) can also have tasks in +them, so the former two configure how much proportion the cgroup as a +whole is entitled to at its parent's level while the latter two +configure how much proportion the tasks in the cgroup have compared to +its direct children. + +Another way to think about it is assuming that each internal node has +an implicit leaf child node which hosts all the tasks whose weight is +configured by leaf_weight[_device]. Let's assume a blkio hierarchy +composed of five cgroups - root, A, B, AA and AB - with the following +weights where the names represent the hierarchy. + + weight leaf_weight + root : 125 125 + A : 500 750 + B : 250 500 + AA : 500 500 + AB : 1000 500 + +root never has a parent making its weight is meaningless. For backward +compatibility, weight is always kept in sync with leaf_weight. B, AA +and AB have no child and thus its tasks have no children cgroup to +compete with. They always get 100% of what the cgroup won at the +parent level. Considering only the weights which matter, the hierarchy +looks like the following. + + root + / | \ + A B leaf + 500 250 125 + / | \ + AA AB leaf + 500 1000 750 + +If all cgroups have active IOs and competing with each other, disk +time will be distributed like the following. + +Distribution below root. The total active weight at this level is +A:500 + B:250 + C:125 = 875. + + root-leaf : 125 / 875 =~ 14% + A : 500 / 875 =~ 57% + B(-leaf) : 250 / 875 =~ 28% + +A has children and further distributes its 57% among the children and +the implicit leaf node. The total active weight at this level is +AA:500 + AB:1000 + A-leaf:750 = 2250. + + A-leaf : ( 750 / 2250) * A =~ 19% + AA(-leaf) : ( 500 / 2250) * A =~ 12% + AB(-leaf) : (1000 / 2250) * A =~ 25% + CFQ IOPS Mode for group scheduling =================================== Basic CFQ design is to provide priority based time slices. Higher priority diff --git a/Documentation/blockdev/nbd.txt b/Documentation/blockdev/nbd.txt index aeb93ffe6416..271e607304da 100644 --- a/Documentation/blockdev/nbd.txt +++ b/Documentation/blockdev/nbd.txt @@ -4,43 +4,13 @@ can use a remote server as one of its block devices. So every time the client computer wants to read, e.g., /dev/nb0, it sends a request over TCP to the server, which will reply with the data read. - This can be used for stations with low disk space (or even diskless - - if you boot from floppy) to borrow disk space from another computer. - Unlike NFS, it is possible to put any filesystem on it, etc. It should - even be possible to use NBD as a root filesystem (I've never tried), - but it requires a user-level program to be in the initrd to start. - It also allows you to run block-device in user land (making server - and client physically the same computer, communicating using loopback). - - Current state: It currently works. Network block device is stable. - I originally thought that it was impossible to swap over TCP. It - turned out not to be true - swapping over TCP now works and seems - to be deadlock-free, but it requires heavy patches into Linux's - network layer. - + This can be used for stations with low disk space (or even diskless) + to borrow disk space from another computer. + Unlike NFS, it is possible to put any filesystem on it, etc. + For more information, or to download the nbd-client and nbd-server tools, go to http://nbd.sf.net/. - Howto: To setup nbd, you can simply do the following: - - First, serve a device or file from a remote server: - - nbd-server <port-number> <device-or-file-to-serve-to-client> - - e.g., - root@server1 # nbd-server 1234 /dev/sdb1 - - (serves sdb1 partition on TCP port 1234) - - Then, on the local (client) system: - - nbd-client <server-name-or-IP> <server-port-number> /dev/nb[0-n] - - e.g., - root@client1 # nbd-client server1 1234 /dev/nb0 - - (creates the nb0 device on client1) - The nbd kernel module need only be installed on the client system, as the nbd-server is completely in userspace. In fact, the nbd-server has been successfully ported to other operating diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt index b4b1fb3a83f0..da272c8f44e7 100644 --- a/Documentation/cgroups/blkio-controller.txt +++ b/Documentation/cgroups/blkio-controller.txt @@ -75,7 +75,7 @@ Throttling/Upper Limit policy mount -t cgroup -o blkio none /sys/fs/cgroup/blkio - Specify a bandwidth rate on particular device for root group. The format - for policy is "<major>:<minor> <byes_per_second>". + for policy is "<major>:<minor> <bytes_per_second>". echo "8:16 1048576" > /sys/fs/cgroup/blkio/blkio.throttle.read_bps_device @@ -94,13 +94,11 @@ Throttling/Upper Limit policy Hierarchical Cgroups ==================== -- Currently none of the IO control policy supports hierarchical groups. But - cgroup interface does allow creation of hierarchical cgroups and internally - IO policies treat them as flat hierarchy. +- Currently only CFQ supports hierarchical groups. For throttling, + cgroup interface does allow creation of hierarchical cgroups and + internally it treats them as flat hierarchy. - So this patch will allow creation of cgroup hierarchcy but at the backend - everything will be treated as flat. So if somebody created a hierarchy like - as follows. + If somebody created a hierarchy like as follows. root / \ @@ -108,16 +106,20 @@ Hierarchical Cgroups | test3 - CFQ and throttling will practically treat all groups at same level. + CFQ will handle the hierarchy correctly but and throttling will + practically treat all groups at same level. For details on CFQ + hierarchy support, refer to Documentation/block/cfq-iosched.txt. + Throttling will treat the hierarchy as if it looks like the + following. pivot / / \ \ root test1 test2 test3 - Down the line we can implement hierarchical accounting/control support - and also introduce a new cgroup file "use_hierarchy" which will control - whether cgroup hierarchy is viewed as flat or hierarchical by the policy.. - This is how memory controller also has implemented the things. + Nesting cgroups, while allowed, isn't officially supported and blkio + genereates warning when cgroups nest. Once throttling implements + hierarchy support, hierarchy will be supported and the warning will + be removed. Various user visible config options =================================== @@ -172,6 +174,12 @@ Proportional weight policy files dev weight 8:16 300 +- blkio.leaf_weight[_device] + - Equivalents of blkio.weight[_device] for the purpose of + deciding how much weight tasks in the given cgroup has while + competing with the cgroup's child cgroups. For details, + please refer to Documentation/block/cfq-iosched.txt. + - blkio.time - disk time allocated to cgroup per device in milliseconds. First two fields specify the major and minor number of the device and @@ -279,6 +287,11 @@ Proportional weight policy files and minor number of the device and third field specifies the number of times a group was dequeued from a particular device. +- blkio.*_recursive + - Recursive version of various stats. These files show the + same information as their non-recursive counterparts but + include stats from all the descendant cgroups. + Throttling/Upper limit policy files ----------------------------------- - blkio.throttle.read_bps_device diff --git a/Documentation/coccinelle.txt b/Documentation/coccinelle.txt index cf44eb6499b4..dffa2d620d6d 100644 --- a/Documentation/coccinelle.txt +++ b/Documentation/coccinelle.txt @@ -87,6 +87,10 @@ As any static code analyzer, Coccinelle produces false positives. Thus, reports must be carefully checked, and patches reviewed. +To enable verbose messages set the V= variable, for example: + + make coccicheck MODE=report V=1 + Using Coccinelle with a single semantic patch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt index a686f9cd69c1..c858f8419eba 100644 --- a/Documentation/kbuild/kconfig-language.txt +++ b/Documentation/kbuild/kconfig-language.txt @@ -388,26 +388,3 @@ config FOO depends on BAR && m limits FOO to module (=m) or disabled (=n). - -Kconfig symbol existence -~~~~~~~~~~~~~~~~~~~~~~~~ -The following two methods produce the same kconfig symbol dependencies -but differ greatly in kconfig symbol existence (production) in the -generated config file. - -case 1: - -config FOO - tristate "about foo" - depends on BAR - -vs. case 2: - -if BAR -config FOO - tristate "about foo" -endif - -In case 1, the symbol FOO will always exist in the config file (given -no other dependencies). In case 2, the symbol FOO will only exist in -the config file if BAR is enabled. diff --git a/Documentation/kbuild/kconfig.txt b/Documentation/kbuild/kconfig.txt index a09f1a6a830c..b8b77bbc784f 100644 --- a/Documentation/kbuild/kconfig.txt +++ b/Documentation/kbuild/kconfig.txt @@ -46,6 +46,12 @@ KCONFIG_OVERWRITECONFIG If you set KCONFIG_OVERWRITECONFIG in the environment, Kconfig will not break symlinks when .config is a symlink to somewhere else. +CONFIG_ +-------------------------------------------------- +If you set CONFIG_ in the environment, Kconfig will prefix all symbols +with its value when saving the configuration, instead of using the default, +"CONFIG_". + ______________________________________________________________________ Environment variables for '{allyes/allmod/allno/rand}config' diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1da946548772..e567af39ee34 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -564,6 +564,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. UART at the specified I/O port or MMIO address, switching to the matching ttyS device later. The options are the same as for ttyS, above. + hvc<n> Use the hypervisor console device <n>. This is for + both Xen and PowerPC hypervisors. If the device connected to the port is not a TTY but a braille device, prepend "brl," before the device type, for instance @@ -757,6 +759,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. earlyprintk= [X86,SH,BLACKFIN] earlyprintk=vga + earlyprintk=xen earlyprintk=serial[,ttySn[,baudrate]] earlyprintk=ttySn[,baudrate] earlyprintk=dbgp[debugController#] @@ -774,6 +777,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. The VGA output is eventually overwritten by the real console. + The xen output can only be used by Xen PV guests. + ekgdboc= [X86,KGDB] Allow early kernel console debugging ekgdboc=kbd diff --git a/MAINTAINERS b/MAINTAINERS index 1e5c3a4ece7b..c7b9b2bf8a0a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -97,12 +97,13 @@ Descriptions of section entries: X: net/ipv6/ matches all files in and below net excluding net/ipv6/ K: Keyword perl extended regex pattern to match content in a - patch or file. For instance: + patch or file, or an affected filename. For instance: K: of_get_profile - matches patches or files that contain "of_get_profile" + matches patch or file content, or filenames, that contain + "of_get_profile" K: \b(printk|pr_(info|err))\b - matches patches or files that contain one or more of the words - printk, pr_info or pr_err + matches patch or file content, or filenames, that contain one or + more of the words printk, pr_info or pr_err One regex pattern per line. Multiple K: lines acceptable. Note: For the hard of thinking, this list is meant to remain in alphabetical @@ -1799,7 +1800,8 @@ F: drivers/bcma/ F: include/linux/bcma/ BROCADE BFA FC SCSI DRIVER -M: Krishna C Gudipati <kgudipat@brocade.com> +M: Anil Gurumurthy <agurumur@brocade.com> +M: Vijaya Mohan Guvva <vmohan@brocade.com> L: linux-scsi@vger.kernel.org S: Supported F: drivers/scsi/bfa/ @@ -2073,8 +2075,8 @@ S: Maintained F: include/linux/clk.h CISCO FCOE HBA DRIVER -M: Abhijeet Joglekar <abjoglek@cisco.com> -M: Venkata Siva Vijayendra Bhamidipati <vbhamidi@cisco.com> +M: Hiral Patel <hiralpat@cisco.com> +M: Suma Ramars <sramars@cisco.com> M: Brian Uchino <buchino@cisco.com> L: linux-scsi@vger.kernel.org S: Supported @@ -5437,6 +5439,7 @@ F: net/netrom/ NETWORK BLOCK DEVICE (NBD) M: Paul Clements <Paul.Clements@steeleye.com> S: Maintained +L: nbd-general@lists.sourceforge.net F: Documentation/blockdev/nbd.txt F: drivers/block/nbd.c F: include/linux/nbd.h @@ -6512,6 +6515,12 @@ S: Maintained F: Documentation/blockdev/ramdisk.txt F: drivers/block/brd.c +RAMSAM DRIVER (IBM RamSan 70/80 PCI SSD Flash Card) +M: Joshua Morris <josh.h.morris@us.ibm.com> +M: Philip Kelleher <pjk1939@linux.vnet.ibm.com> +S: Maintained +F: drivers/block/rsxx/ + RANDOM NUMBER DRIVER M: Theodore Ts'o" <tytso@mit.edu> S: Maintained @@ -7539,6 +7548,7 @@ STAGING - NVIDIA COMPLIANT EMBEDDED CONTROLLER INTERFACE (nvec) M: Julian Andres Klode <jak@jak-linux.org> M: Marc Dietrich <marvin24@gmx.de> L: ac100@lists.launchpad.net (moderated for non-subscribers) +L: linux-tegra@vger.kernel.org S: Maintained F: drivers/staging/nvec/ @@ -7831,9 +7841,7 @@ L: linux-tegra@vger.kernel.org Q: http://patchwork.ozlabs.org/project/linux-tegra/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/swarren/linux-tegra.git S: Supported -F: arch/arm/mach-tegra -F: arch/arm/boot/dts/tegra* -F: arch/arm/configs/tegra_defconfig +K: (?i)[^a-z]tegra TEHUTI ETHERNET DRIVER M: Andy Gospodarek <andy@greyhouse.net> @@ -192,7 +192,6 @@ SUBARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ # "make" in the configured kernel build directory always uses that. # Default value for CROSS_COMPILE is not to prefix executables # Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile -export KBUILD_BUILDHOST := $(SUBARCH) ARCH ?= $(SUBARCH) CROSS_COMPILE ?= $(CONFIG_CROSS_COMPILE:"%"=%) @@ -620,7 +619,8 @@ KBUILD_AFLAGS += -gdwarf-2 endif ifdef CONFIG_DEBUG_INFO_REDUCED -KBUILD_CFLAGS += $(call cc-option, -femit-struct-debug-baseonly) +KBUILD_CFLAGS += $(call cc-option, -femit-struct-debug-baseonly) \ + $(call cc-option,-fno-var-tracking) endif ifdef CONFIG_FUNCTION_TRACER @@ -1398,7 +1398,7 @@ quiet_cmd_rmfiles = $(if $(wildcard $(rm-files)),CLEAN $(wildcard $(rm-files)) # Run depmod only if we have System.map and depmod is executable quiet_cmd_depmod = DEPMOD $(KERNELRELEASE) cmd_depmod = $(CONFIG_SHELL) $(srctree)/scripts/depmod.sh $(DEPMOD) \ - $(KERNELRELEASE) + $(KERNELRELEASE) "$(patsubst "%",%,$(CONFIG_SYMBOL_PREFIX))" # Create temporary dir for module support files # clean it up only when building all modules diff --git a/arch/Kconfig b/arch/Kconfig index 40e2b12c7916..dcd91a85536a 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -303,6 +303,13 @@ config ARCH_WANT_OLD_COMPAT_IPC select ARCH_WANT_COMPAT_IPC_PARSE_VERSION bool +config HAVE_VIRT_TO_BUS + bool + help + An architecture should select this if it implements the + deprecated interface virt_to_bus(). All new architectures + should probably not select this. + config HAVE_ARCH_SECCOMP_FILTER bool help diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 1ecbf7a1b677..5833aa441481 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -9,6 +9,7 @@ config ALPHA select HAVE_PERF_EVENTS select HAVE_DMA_ATTRS select HAVE_GENERIC_HARDIRQS + select HAVE_VIRT_TO_BUS select GENERIC_IRQ_PROBE select AUTO_IRQ_AFFINITY if SMP select GENERIC_IRQ_SHOW diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 6ec8eb3149ea..0e16cca1d011 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -49,6 +49,7 @@ config ARM select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_SYSCALL_TRACEPOINTS select HAVE_UID16 + select HAVE_VIRT_TO_BUS select KTIME_SCALAR select PERF_USE_VMALLOC select RTC_LIB diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c index 4dd41fc9e235..170e9f34003f 100644 --- a/arch/arm/kernel/kprobes.c +++ b/arch/arm/kernel/kprobes.c @@ -395,7 +395,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; @@ -415,7 +415,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) * real return address, and all the rest will point to * kretprobe_trampoline */ - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -442,7 +442,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) kretprobe_assert(ri, orig_ret_address, trampoline_address); kretprobe_hash_unlock(current, &flags); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index 2ae6591b3a55..9b89257b2cfd 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig @@ -7,6 +7,7 @@ config AVR32 select HAVE_OPROFILE select HAVE_KPROBES select HAVE_GENERIC_HARDIRQS + select HAVE_VIRT_TO_BUS select GENERIC_IRQ_PROBE select GENERIC_ATOMIC64 select HARDIRQS_SW_RESEND diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index e98f3248c8aa..600494c70e96 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -33,6 +33,7 @@ config BLACKFIN select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_WANT_OPTIONAL_GPIOLIB select HAVE_UID16 + select HAVE_VIRT_TO_BUS select ARCH_WANT_IPC_PARSE_VERSION select HAVE_GENERIC_HARDIRQS select GENERIC_ATOMIC64 diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index 0e5c187ac7d2..bb0ac66cf533 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -43,6 +43,7 @@ config CRIS select GENERIC_ATOMIC64 select HAVE_GENERIC_HARDIRQS select HAVE_UID16 + select HAVE_VIRT_TO_BUS select ARCH_WANT_IPC_PARSE_VERSION select GENERIC_IRQ_SHOW select GENERIC_IOMAP diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index 2d0509d4cfee..12369b194c7b 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -6,6 +6,7 @@ config FRV select HAVE_PERF_EVENTS select HAVE_UID16 select HAVE_GENERIC_HARDIRQS + select HAVE_VIRT_TO_BUS select GENERIC_IRQ_SHOW select HAVE_DEBUG_BUGVERBOSE select ARCH_HAVE_NMI_SAFE_CMPXCHG diff --git a/arch/frv/mm/elf-fdpic.c b/arch/frv/mm/elf-fdpic.c index 385fd30b142f..836f14707a62 100644 --- a/arch/frv/mm/elf-fdpic.c +++ b/arch/frv/mm/elf-fdpic.c @@ -60,7 +60,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi unsigned long pgoff, unsigned long flags) { struct vm_area_struct *vma; - unsigned long limit; + struct vm_unmapped_area_info info; if (len > TASK_SIZE) return -ENOMEM; @@ -79,39 +79,24 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi } /* search between the bottom of user VM and the stack grow area */ - addr = PAGE_SIZE; - limit = (current->mm->start_stack - 0x00200000); - if (addr + len <= limit) { - limit -= len; - - if (addr <= limit) { - vma = find_vma(current->mm, PAGE_SIZE); - for (; vma; vma = vma->vm_next) { - if (addr > limit) - break; - if (addr + len <= vma->vm_start) - goto success; - addr = vma->vm_end; - } - } - } + info.flags = 0; + info.length = len; + info.low_limit = PAGE_SIZE; + info.high_limit = (current->mm->start_stack - 0x00200000); + info.align_mask = 0; + info.align_offset = 0; + addr = vm_unmapped_area(&info); + if (!(addr & ~PAGE_MASK)) + goto success; + VM_BUG_ON(addr != -ENOMEM); /* search from just above the WorkRAM area to the top of memory */ - addr = PAGE_ALIGN(0x80000000); - limit = TASK_SIZE - len; - if (addr <= limit) { - vma = find_vma(current->mm, addr); - for (; vma; vma = vma->vm_next) { - if (addr > limit) - break; - if (addr + len <= vma->vm_start) - goto success; - addr = vma->vm_end; - } - - if (!vma && addr <= limit) - goto success; - } + info.low_limit = PAGE_ALIGN(0x80000000); + info.high_limit = TASK_SIZE; + addr = vm_unmapped_area(&info); + if (!(addr & ~PAGE_MASK)) + goto success; + VM_BUG_ON(addr != -ENOMEM); #if 0 printk("[area] l=%lx (ENOMEM) f='%s'\n", diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 05b613af223a..ae8551eb3736 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -5,6 +5,7 @@ config H8300 select HAVE_GENERIC_HARDIRQS select GENERIC_ATOMIC64 select HAVE_UID16 + select HAVE_VIRT_TO_BUS select ARCH_WANT_IPC_PARSE_VERSION select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index c90366ef7183..33f3fdc0b214 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -26,6 +26,7 @@ config IA64 select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_VIRT_CPU_ACCOUNTING + select HAVE_VIRT_TO_BUS select ARCH_DISCARD_MEMBLOCK select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 7026b29e277a..f8280a766a78 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -423,7 +423,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = ((struct fnptr *)kretprobe_trampoline)->ip; @@ -444,7 +444,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * real return address, and all the rest will point to * kretprobe_trampoline */ - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -461,7 +461,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) regs->cr_iip = orig_ret_address; - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -487,7 +487,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index f807721e19a5..92623818a1fe 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -10,6 +10,7 @@ config M32R select ARCH_WANT_IPC_PARSE_VERSION select HAVE_DEBUG_BUGVERBOSE select HAVE_GENERIC_HARDIRQS + select HAVE_VIRT_TO_BUS select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_ATOMIC64 diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index efb1ce1f14a3..0e708c78e01c 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -8,6 +8,7 @@ config M68K select GENERIC_IRQ_SHOW select GENERIC_ATOMIC64 select HAVE_UID16 + select HAVE_VIRT_TO_BUS select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS select GENERIC_CPU_DEVICES select GENERIC_STRNCPY_FROM_USER if MMU diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index ba3b7c8c04b8..7843d11156e6 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -19,6 +19,7 @@ config MICROBLAZE select HAVE_DEBUG_KMEMLEAK select IRQ_DOMAIN select HAVE_GENERIC_HARDIRQS + select HAVE_VIRT_TO_BUS select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_PCI_IOMAP diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 198641589bb5..1eabe5753215 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -38,6 +38,7 @@ config MIPS select GENERIC_CLOCKEVENTS select GENERIC_CMOS_UPDATE select HAVE_MOD_ARCH_SPECIFIC + select HAVE_VIRT_TO_BUS select MODULES_USE_ELF_REL if MODULES select MODULES_USE_ELF_RELA if MODULES && 64BIT select CLONE_BACKWARDS diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c index 158467da9bc1..ce3f0807ad1e 100644 --- a/arch/mips/kernel/kprobes.c +++ b/arch/mips/kernel/kprobes.c @@ -598,7 +598,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)kretprobe_trampoline; @@ -618,7 +618,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, * real return address, and all the rest will point to * kretprobe_trampoline */ - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -645,7 +645,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index ad0caea0bfea..b06c7360b1c6 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -8,6 +8,7 @@ config MN10300 select HAVE_ARCH_KGDB select GENERIC_ATOMIC64 select HAVE_NMI_WATCHDOG if MN10300_WD_TIMER + select HAVE_VIRT_TO_BUS select GENERIC_CLOCKEVENTS select MODULES_USE_ELF_RELA select OLD_SIGSUSPEND3 diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index 35a4e5f6e71c..014a6482ed4c 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -12,6 +12,7 @@ config OPENRISC select ARCH_WANT_OPTIONAL_GPIOLIB select HAVE_ARCH_TRACEHOOK select HAVE_GENERIC_HARDIRQS + select HAVE_VIRT_TO_BUS select GENERIC_IRQ_CHIP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 7f9b3c53f74a..4d5ea7648574 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -19,6 +19,7 @@ config PARISC select GENERIC_SMP_IDLE_THREAD select GENERIC_STRNCPY_FROM_USER select HAVE_MOD_ARCH_SPECIFIC + select HAVE_VIRT_TO_BUS select MODULES_USE_ELF_RELA select CLONE_BACKWARDS select TTY # Needed for pdc_cons.c diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 54d619d4cac6..5dfd248e3f1a 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -35,22 +35,17 @@ static unsigned long get_unshared_area(unsigned long addr, unsigned long len) { - struct vm_area_struct *vma; + struct vm_unmapped_area_info info; - addr = PAGE_ALIGN(addr); - - for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) { - /* At this point: (!vma || addr < vma->vm_end). */ - if (TASK_SIZE - len < addr) - return -ENOMEM; - if (!vma || addr + len <= vma->vm_start) - return addr; - addr = vma->vm_end; - } + info.flags = 0; + info.length = len; + info.low_limit = PAGE_ALIGN(addr); + info.high_limit = TASK_SIZE; + info.align_mask = 0; + info.align_offset = 0; + return vm_unmapped_area(&info); } -#define DCACHE_ALIGN(addr) (((addr) + (SHMLBA - 1)) &~ (SHMLBA - 1)) - /* * We need to know the offset to use. Old scheme was to look for * existing mapping and use the same offset. New scheme is to use the @@ -63,30 +58,21 @@ static unsigned long get_unshared_area(unsigned long addr, unsigned long len) */ static int get_offset(struct address_space *mapping) { - int offset = (unsigned long) mapping << (PAGE_SHIFT - 8); - return offset & 0x3FF000; + return (unsigned long) mapping >> 8; } static unsigned long get_shared_area(struct address_space *mapping, unsigned long addr, unsigned long len, unsigned long pgoff) { - struct vm_area_struct *vma; - int offset = mapping ? get_offset(mapping) : 0; + struct vm_unmapped_area_info info; - offset = (offset + (pgoff << PAGE_SHIFT)) & 0x3FF000; - - addr = DCACHE_ALIGN(addr - offset) + offset; - - for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) { - /* At this point: (!vma || addr < vma->vm_end). */ - if (TASK_SIZE - len < addr) - return -ENOMEM; - if (!vma || addr + len <= vma->vm_start) - return addr; - addr = DCACHE_ALIGN(vma->vm_end - offset) + offset; - if (addr < vma->vm_end) /* handle wraparound */ - return -ENOMEM; - } + info.flags = 0; + info.length = len; + info.low_limit = PAGE_ALIGN(addr); + info.high_limit = TASK_SIZE; + info.align_mask = PAGE_MASK & (SHMLBA - 1); + info.align_offset = (get_offset(mapping) + pgoff) << PAGE_SHIFT; + return vm_unmapped_area(&info); } unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 5c7470689a10..b89d7eb730a2 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -87,9 +87,6 @@ config GENERIC_GPIO help Generic GPIO API support -config ARCH_NO_VIRT_TO_BUS - def_bool PPC64 - config PPC bool default y @@ -101,6 +98,7 @@ config PPC select HAVE_FUNCTION_GRAPH_TRACER select SYSCTL_EXCEPTION_TRACE select ARCH_WANT_OPTIONAL_GPIOLIB + select HAVE_VIRT_TO_BUS if !PPC64 select HAVE_IDE select HAVE_IOREMAP_PROT select HAVE_EFFICIENT_UNALIGNED_ACCESS diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index e88c64331819..11f5b03a0b06 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -310,7 +310,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; @@ -330,7 +330,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, * real return address, and all the rest will point to * kretprobe_trampoline */ - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -357,7 +357,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c index 2c86b0d63714..da8b13c4b776 100644 --- a/arch/powerpc/kvm/book3s_mmu_hpte.c +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c @@ -124,7 +124,6 @@ static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); struct hpte_cache *pte; - struct hlist_node *node; int i; rcu_read_lock(); @@ -132,7 +131,7 @@ static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu) for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) { struct hlist_head *list = &vcpu3s->hpte_hash_vpte_long[i]; - hlist_for_each_entry_rcu(pte, node, list, list_vpte_long) + hlist_for_each_entry_rcu(pte, list, list_vpte_long) invalidate_pte(vcpu, pte); } @@ -143,7 +142,6 @@ static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea) { struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); struct hlist_head *list; - struct hlist_node *node; struct hpte_cache *pte; /* Find the list of entries in the map */ @@ -152,7 +150,7 @@ static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea) rcu_read_lock(); /* Check the list for matching entries and invalidate */ - hlist_for_each_entry_rcu(pte, node, list, list_pte) + hlist_for_each_entry_rcu(pte, list, list_pte) if ((pte->pte.eaddr & ~0xfffUL) == guest_ea) invalidate_pte(vcpu, pte); @@ -163,7 +161,6 @@ static void kvmppc_mmu_pte_flush_long(struct kvm_vcpu *vcpu, ulong guest_ea) { struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); struct hlist_head *list; - struct hlist_node *node; struct hpte_cache *pte; /* Find the list of entries in the map */ @@ -173,7 +170,7 @@ static void kvmppc_mmu_pte_flush_long(struct kvm_vcpu *vcpu, ulong guest_ea) rcu_read_lock(); /* Check the list for matching entries and invalidate */ - hlist_for_each_entry_rcu(pte, node, list, list_pte_long) + hlist_for_each_entry_rcu(pte, list, list_pte_long) if ((pte->pte.eaddr & 0x0ffff000UL) == guest_ea) invalidate_pte(vcpu, pte); @@ -207,7 +204,6 @@ static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp) { struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); struct hlist_head *list; - struct hlist_node *node; struct hpte_cache *pte; u64 vp_mask = 0xfffffffffULL; @@ -216,7 +212,7 @@ static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp) rcu_read_lock(); /* Check the list for matching entries and invalidate */ - hlist_for_each_entry_rcu(pte, node, list, list_vpte) + hlist_for_each_entry_rcu(pte, list, list_vpte) if ((pte->pte.vpage & vp_mask) == guest_vp) invalidate_pte(vcpu, pte); @@ -228,7 +224,6 @@ static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp) { struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); struct hlist_head *list; - struct hlist_node *node; struct hpte_cache *pte; u64 vp_mask = 0xffffff000ULL; @@ -238,7 +233,7 @@ static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp) rcu_read_lock(); /* Check the list for matching entries and invalidate */ - hlist_for_each_entry_rcu(pte, node, list, list_vpte_long) + hlist_for_each_entry_rcu(pte, list, list_vpte_long) if ((pte->pte.vpage & vp_mask) == guest_vp) invalidate_pte(vcpu, pte); @@ -266,7 +261,6 @@ void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) { struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); - struct hlist_node *node; struct hpte_cache *pte; int i; @@ -277,7 +271,7 @@ void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) { struct hlist_head *list = &vcpu3s->hpte_hash_vpte_long[i]; - hlist_for_each_entry_rcu(pte, node, list, list_vpte_long) + hlist_for_each_entry_rcu(pte, list, list_vpte_long) if ((pte->pte.raddr >= pa_start) && (pte->pte.raddr < pa_end)) invalidate_pte(vcpu, pte); diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f09ae7b0b4c5..4b505370a1d5 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -134,6 +134,7 @@ config S390 select HAVE_SYSCALL_WRAPPERS select HAVE_UID16 if 32BIT select HAVE_VIRT_CPU_ACCOUNTING + select HAVE_VIRT_TO_BUS select INIT_ALL_POSSIBLE select KTIME_SCALAR if 32BIT select MODULES_USE_ELF_RELA diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index d1c7214e157c..3388b2b2a07d 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -354,7 +354,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, { struct kretprobe_instance *ri; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; unsigned long flags, orig_ret_address; unsigned long trampoline_address; kprobe_opcode_t *correct_ret_addr; @@ -379,7 +379,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, orig_ret_address = 0; correct_ret_addr = NULL; trampoline_address = (unsigned long) &kretprobe_trampoline; - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -398,7 +398,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, kretprobe_assert(ri, orig_ret_address, trampoline_address); correct_ret_addr = ri->ret_addr; - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -427,7 +427,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } diff --git a/arch/s390/pci/pci_msi.c b/arch/s390/pci/pci_msi.c index 90fd3482b9e2..0297931335e1 100644 --- a/arch/s390/pci/pci_msi.c +++ b/arch/s390/pci/pci_msi.c @@ -25,10 +25,9 @@ static DEFINE_SPINLOCK(msi_map_lock); struct msi_desc *__irq_get_msi_desc(unsigned int irq) { - struct hlist_node *entry; struct msi_map *map; - hlist_for_each_entry_rcu(map, entry, + hlist_for_each_entry_rcu(map, &msi_hash[msi_hashfn(irq)], msi_chain) if (map->irq == irq) return map->msi; diff --git a/arch/score/Kconfig b/arch/score/Kconfig index 3b1482e7afac..e569aa1fd2ba 100644 --- a/arch/score/Kconfig +++ b/arch/score/Kconfig @@ -12,6 +12,7 @@ config SCORE select GENERIC_CPU_DEVICES select GENERIC_CLOCKEVENTS select HAVE_MOD_ARCH_SPECIFIC + select HAVE_VIRT_TO_BUS select MODULES_USE_ELF_REL select CLONE_BACKWARDS diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index ef6717a64bc7..5e859633ce69 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -148,9 +148,6 @@ config ARCH_HAS_ILOG2_U32 config ARCH_HAS_ILOG2_U64 def_bool n -config ARCH_NO_VIRT_TO_BUS - def_bool y - config ARCH_HAS_DEFAULT_IDLE def_bool y diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c index 1208b09e95c3..42b46e61a2d5 100644 --- a/arch/sh/kernel/kprobes.c +++ b/arch/sh/kernel/kprobes.c @@ -310,7 +310,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; @@ -330,7 +330,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * real return address, and all the rest will point to * kretprobe_trampoline */ - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -360,7 +360,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) preempt_enable_no_resched(); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 58fb1e3f631d..289127d5241c 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -146,9 +146,6 @@ config GENERIC_GPIO help Generic GPIO API support -config ARCH_NO_VIRT_TO_BUS - def_bool y - config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y if SPARC64 diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c index a39d1ba5a119..e72212148d2a 100644 --- a/arch/sparc/kernel/kprobes.c +++ b/arch/sparc/kernel/kprobes.c @@ -511,7 +511,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; @@ -531,7 +531,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * real return address, and all the rest will point to * kretprobe_trampoline */ - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -559,7 +559,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c index 9fcc6b4e93b3..54df554b82d9 100644 --- a/arch/sparc/kernel/ldc.c +++ b/arch/sparc/kernel/ldc.c @@ -953,9 +953,8 @@ static HLIST_HEAD(ldc_channel_list); static int __ldc_channel_exists(unsigned long id) { struct ldc_channel *lp; - struct hlist_node *n; - hlist_for_each_entry(lp, n, &ldc_channel_list, list) { + hlist_for_each_entry(lp, &ldc_channel_list, list) { if (lp->id == id) return 1; } diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 4ce6e4c390e0..ff496ab1e794 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -17,6 +17,7 @@ config TILE select GENERIC_IRQ_SHOW select HAVE_DEBUG_BUGVERBOSE select HAVE_SYSCALL_WRAPPERS if TILEGX + select HAVE_VIRT_TO_BUS select SYS_HYPERVISOR select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_CLOCKEVENTS diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig index 60651df5f952..dc50b157fc83 100644 --- a/arch/unicore32/Kconfig +++ b/arch/unicore32/Kconfig @@ -9,6 +9,7 @@ config UNICORE32 select GENERIC_ATOMIC64 select HAVE_KERNEL_LZO select HAVE_KERNEL_LZMA + select HAVE_VIRT_TO_BUS select ARCH_HAVE_CUSTOM_GPIO_H select GENERIC_FIND_FIRST_BIT select GENERIC_IRQ_PROBE diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6a9383370311..a4f24f5b1218 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -112,6 +112,7 @@ config X86 select GENERIC_STRNLEN_USER select HAVE_CONTEXT_TRACKING if X86_64 select HAVE_IRQ_TIME_ACCOUNTING + select HAVE_VIRT_TO_BUS select MODULES_USE_ELF_REL if X86_32 select MODULES_USE_ELF_RELA if X86_64 select CLONE_BACKWARDS if X86_32 diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index f8fa41190c35..c205035a6b96 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -19,23 +19,28 @@ static efi_system_table_t *sys_table; +static void efi_char16_printk(efi_char16_t *str) +{ + struct efi_simple_text_output_protocol *out; + + out = (struct efi_simple_text_output_protocol *)sys_table->con_out; + efi_call_phys2(out->output_string, out, str); +} + static void efi_printk(char *str) { char *s8; for (s8 = str; *s8; s8++) { - struct efi_simple_text_output_protocol *out; efi_char16_t ch[2] = { 0 }; ch[0] = *s8; - out = (struct efi_simple_text_output_protocol *)sys_table->con_out; - if (*s8 == '\n') { efi_char16_t nl[2] = { '\r', 0 }; - efi_call_phys2(out->output_string, out, nl); + efi_char16_printk(nl); } - efi_call_phys2(out->output_string, out, ch); + efi_char16_printk(ch); } } @@ -709,7 +714,12 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image, if ((u8 *)p >= (u8 *)filename_16 + sizeof(filename_16)) break; - *p++ = *str++; + if (*str == '/') { + *p++ = '\\'; + *str++; + } else { + *p++ = *str++; + } } *p = '\0'; @@ -737,7 +747,9 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image, status = efi_call_phys5(fh->open, fh, &h, filename_16, EFI_FILE_MODE_READ, (u64)0); if (status != EFI_SUCCESS) { - efi_printk("Failed to open initrd file\n"); + efi_printk("Failed to open initrd file: "); + efi_char16_printk(filename_16); + efi_printk("\n"); goto close_handles; } diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 28677c55113f..60c89f30c727 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -102,7 +102,14 @@ extern void efi_call_phys_epilog(void); extern void efi_unmap_memmap(void); extern void efi_memory_uc(u64 addr, unsigned long size); -#ifndef CONFIG_EFI +#ifdef CONFIG_EFI + +static inline bool efi_is_native(void) +{ + return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT); +} + +#else /* * IF EFI is not configured, have the EFI calls return -ENOSYS. */ diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c index 48d9d4ea1020..992f442ca155 100644 --- a/arch/x86/kernel/head.c +++ b/arch/x86/kernel/head.c @@ -5,8 +5,6 @@ #include <asm/setup.h> #include <asm/bios_ebda.h> -#define BIOS_LOWMEM_KILOBYTES 0x413 - /* * The BIOS places the EBDA/XBDA at the top of conventional * memory, and usually decreases the reported amount of @@ -16,17 +14,30 @@ * chipset: reserve a page before VGA to prevent PCI prefetch * into it (errata #56). Usually the page is reserved anyways, * unless you have no PS/2 mouse plugged in. + * + * This functions is deliberately very conservative. Losing + * memory in the bottom megabyte is rarely a problem, as long + * as we have enough memory to install the trampoline. Using + * memory that is in use by the BIOS or by some DMA device + * the BIOS didn't shut down *is* a big problem. */ + +#define BIOS_LOWMEM_KILOBYTES 0x413 +#define LOWMEM_CAP 0x9f000U /* Absolute maximum */ +#define INSANE_CUTOFF 0x20000U /* Less than this = insane */ + void __init reserve_ebda_region(void) { unsigned int lowmem, ebda_addr; - /* To determine the position of the EBDA and the */ - /* end of conventional memory, we need to look at */ - /* the BIOS data area. In a paravirtual environment */ - /* that area is absent. We'll just have to assume */ - /* that the paravirt case can handle memory setup */ - /* correctly, without our help. */ + /* + * To determine the position of the EBDA and the + * end of conventional memory, we need to look at + * the BIOS data area. In a paravirtual environment + * that area is absent. We'll just have to assume + * that the paravirt case can handle memory setup + * correctly, without our help. + */ if (paravirt_enabled()) return; @@ -37,19 +48,23 @@ void __init reserve_ebda_region(void) /* start of EBDA area */ ebda_addr = get_bios_ebda(); - /* Fixup: bios puts an EBDA in the top 64K segment */ - /* of conventional memory, but does not adjust lowmem. */ - if ((lowmem - ebda_addr) <= 0x10000) - lowmem = ebda_addr; + /* + * Note: some old Dells seem to need 4k EBDA without + * reporting so, so just consider the memory above 0x9f000 + * to be off limits (bugzilla 2990). + */ + + /* If the EBDA address is below 128K, assume it is bogus */ + if (ebda_addr < INSANE_CUTOFF) + ebda_addr = LOWMEM_CAP; - /* Fixup: bios does not report an EBDA at all. */ - /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ - if ((ebda_addr == 0) && (lowmem >= 0x9f000)) - lowmem = 0x9f000; + /* If lowmem is less than 128K, assume it is bogus */ + if (lowmem < INSANE_CUTOFF) + lowmem = LOWMEM_CAP; - /* Paranoia: should never happen, but... */ - if ((lowmem == 0) || (lowmem >= 0x100000)) - lowmem = 0x9f000; + /* Use the lower of the lowmem and EBDA markers as the cutoff */ + lowmem = min(lowmem, ebda_addr); + lowmem = min(lowmem, LOWMEM_CAP); /* Absolute cap */ /* reserve all memory between lowmem and the 1MB mark */ memblock_reserve(lowmem, 0x100000 - lowmem); diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index b7de3b25adb5..6859e9626442 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -48,7 +48,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map) .globl startup_64 startup_64: /* - * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, + * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, * and someone has loaded an identity mapped page table * for us. These identity mapped page tables map all of the * kernel pages and possibly all of memory. @@ -159,7 +159,7 @@ startup_64: jmp 1f ENTRY(secondary_startup_64) /* - * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, + * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, * and someone has loaded a mapped page table. * * %rsi holds a physical pointer to real_mode_data. diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index e124554598ee..3f06e6149981 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -652,7 +652,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; kprobe_opcode_t *correct_ret_addr = NULL; @@ -682,7 +682,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) * will be the real return address, and all the rest will * point to kretprobe_trampoline. */ - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -701,7 +701,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) kretprobe_assert(ri, orig_ret_address, trampoline_address); correct_ret_addr = ri->ret_addr; - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -728,7 +728,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) kretprobe_hash_unlock(current, &flags); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 9c857f05cef0..e89acdf6b77b 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1196,8 +1196,7 @@ void __init setup_arch(char **cmdline_p) * mismatched firmware/kernel archtectures since there is no * support for runtime services. */ - if (efi_enabled(EFI_BOOT) && - IS_ENABLED(CONFIG_X86_64) != efi_enabled(EFI_64BIT)) { + if (efi_enabled(EFI_BOOT) && !efi_is_native()) { pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); efi_unmap_memmap(); } diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 4ed3edbe06bd..956ca358108a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1644,13 +1644,13 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, static void kvm_mmu_commit_zap_page(struct kvm *kvm, struct list_head *invalid_list); -#define for_each_gfn_sp(kvm, sp, gfn, pos) \ - hlist_for_each_entry(sp, pos, \ +#define for_each_gfn_sp(kvm, sp, gfn) \ + hlist_for_each_entry(sp, \ &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ if ((sp)->gfn != (gfn)) {} else -#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn, pos) \ - hlist_for_each_entry(sp, pos, \ +#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn) \ + hlist_for_each_entry(sp, \ &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ if ((sp)->gfn != (gfn) || (sp)->role.direct || \ (sp)->role.invalid) {} else @@ -1706,11 +1706,10 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) { struct kvm_mmu_page *s; - struct hlist_node *node; LIST_HEAD(invalid_list); bool flush = false; - for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { + for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) { if (!s->unsync) continue; @@ -1848,7 +1847,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, union kvm_mmu_page_role role; unsigned quadrant; struct kvm_mmu_page *sp; - struct hlist_node *node; bool need_sync = false; role = vcpu->arch.mmu.base_role; @@ -1863,7 +1861,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; role.quadrant = quadrant; } - for_each_gfn_sp(vcpu->kvm, sp, gfn, node) { + for_each_gfn_sp(vcpu->kvm, sp, gfn) { if (!need_sync && sp->unsync) need_sync = true; @@ -2151,14 +2149,13 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) { struct kvm_mmu_page *sp; - struct hlist_node *node; LIST_HEAD(invalid_list); int r; pgprintk("%s: looking for gfn %llx\n", __func__, gfn); r = 0; spin_lock(&kvm->mmu_lock); - for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { + for_each_gfn_indirect_valid_sp(kvm, sp, gfn) { pgprintk("%s: gfn %llx role %x\n", __func__, gfn, sp->role.word); r = 1; @@ -2288,9 +2285,8 @@ static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) { struct kvm_mmu_page *s; - struct hlist_node *node; - for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { + for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) { if (s->unsync) continue; WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); @@ -2302,10 +2298,9 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync) { struct kvm_mmu_page *s; - struct hlist_node *node; bool need_unsync = false; - for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { + for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) { if (!can_unsync) return 1; @@ -3933,7 +3928,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, gfn_t gfn = gpa >> PAGE_SHIFT; union kvm_mmu_page_role mask = { .word = 0 }; struct kvm_mmu_page *sp; - struct hlist_node *node; LIST_HEAD(invalid_list); u64 entry, gentry, *spte; int npte; @@ -3964,7 +3958,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; - for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { + for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { if (detect_write_misaligned(sp, gpa, bytes) || detect_write_flooding(sp)) { zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 2f81db40d7ca..5f2ecaf3f9d8 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -69,11 +69,6 @@ struct efi_memory_map memmap; static struct efi efi_phys __initdata; static efi_system_table_t efi_systab __initdata; -static inline bool efi_is_native(void) -{ - return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT); -} - unsigned long x86_efi_facility; /* @@ -85,7 +80,7 @@ int efi_enabled(int facility) } EXPORT_SYMBOL(efi_enabled); -static bool disable_runtime = false; +static bool __initdata disable_runtime = false; static int __init setup_noefi(char *arg) { disable_runtime = true; diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index a5c0663c2cdc..35876ffac11d 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -9,6 +9,7 @@ config XTENSA select HAVE_IDE select GENERIC_ATOMIC64 select HAVE_GENERIC_HARDIRQS + select HAVE_VIRT_TO_BUS select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES select MODULES_USE_ELF_RELA diff --git a/block/Kconfig b/block/Kconfig index 4a85ccf8d4cf..a7e40a7c8214 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -4,7 +4,6 @@ menuconfig BLOCK bool "Enable the block layer" if EXPERT default y - select PERCPU_RWSEM help Provide block layer support for the kernel. diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index b8858fb0cafa..b2b9837f9dd3 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -26,11 +26,32 @@ static DEFINE_MUTEX(blkcg_pol_mutex); -struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT }; +struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT, + .cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, }; EXPORT_SYMBOL_GPL(blkcg_root); static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; +static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, + struct request_queue *q, bool update_hint); + +/** + * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants + * @d_blkg: loop cursor pointing to the current descendant + * @pos_cgrp: used for iteration + * @p_blkg: target blkg to walk descendants of + * + * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU + * read locked. If called under either blkcg or queue lock, the iteration + * is guaranteed to include all and only online blkgs. The caller may + * update @pos_cgrp by calling cgroup_rightmost_descendant() to skip + * subtree. + */ +#define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \ + cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \ + if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \ + (p_blkg)->q, false))) + static bool blkcg_policy_enabled(struct request_queue *q, const struct blkcg_policy *pol) { @@ -112,9 +133,10 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, blkg->pd[i] = pd; pd->blkg = blkg; + pd->plid = i; /* invoke per-policy init */ - if (blkcg_policy_enabled(blkg->q, pol)) + if (pol->pd_init_fn) pol->pd_init_fn(blkg); } @@ -125,8 +147,19 @@ err_free: return NULL; } +/** + * __blkg_lookup - internal version of blkg_lookup() + * @blkcg: blkcg of interest + * @q: request_queue of interest + * @update_hint: whether to update lookup hint with the result or not + * + * This is internal version and shouldn't be used by policy + * implementations. Looks up blkgs for the @blkcg - @q pair regardless of + * @q's bypass state. If @update_hint is %true, the caller should be + * holding @q->queue_lock and lookup hint is updated on success. + */ static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, - struct request_queue *q) + struct request_queue *q, bool update_hint) { struct blkcg_gq *blkg; @@ -135,14 +168,19 @@ static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, return blkg; /* - * Hint didn't match. Look up from the radix tree. Note that we - * may not be holding queue_lock and thus are not sure whether - * @blkg from blkg_tree has already been removed or not, so we - * can't update hint to the lookup result. Leave it to the caller. + * Hint didn't match. Look up from the radix tree. Note that the + * hint can only be updated under queue_lock as otherwise @blkg + * could have already been removed from blkg_tree. The caller is + * responsible for grabbing queue_lock if @update_hint. */ blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id); - if (blkg && blkg->q == q) + if (blkg && blkg->q == q) { + if (update_hint) { + lockdep_assert_held(q->queue_lock); + rcu_assign_pointer(blkcg->blkg_hint, blkg); + } return blkg; + } return NULL; } @@ -162,7 +200,7 @@ struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q) if (unlikely(blk_queue_bypass(q))) return NULL; - return __blkg_lookup(blkcg, q); + return __blkg_lookup(blkcg, q, false); } EXPORT_SYMBOL_GPL(blkg_lookup); @@ -170,75 +208,129 @@ EXPORT_SYMBOL_GPL(blkg_lookup); * If @new_blkg is %NULL, this function tries to allocate a new one as * necessary using %GFP_ATOMIC. @new_blkg is always consumed on return. */ -static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, - struct request_queue *q, - struct blkcg_gq *new_blkg) +static struct blkcg_gq *blkg_create(struct blkcg *blkcg, + struct request_queue *q, + struct blkcg_gq *new_blkg) { struct blkcg_gq *blkg; - int ret; + int i, ret; WARN_ON_ONCE(!rcu_read_lock_held()); lockdep_assert_held(q->queue_lock); - /* lookup and update hint on success, see __blkg_lookup() for details */ - blkg = __blkg_lookup(blkcg, q); - if (blkg) { - rcu_assign_pointer(blkcg->blkg_hint, blkg); - goto out_free; - } - /* blkg holds a reference to blkcg */ if (!css_tryget(&blkcg->css)) { - blkg = ERR_PTR(-EINVAL); - goto out_free; + ret = -EINVAL; + goto err_free_blkg; } /* allocate */ if (!new_blkg) { new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC); if (unlikely(!new_blkg)) { - blkg = ERR_PTR(-ENOMEM); - goto out_put; + ret = -ENOMEM; + goto err_put_css; } } blkg = new_blkg; - /* insert */ + /* link parent and insert */ + if (blkcg_parent(blkcg)) { + blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false); + if (WARN_ON_ONCE(!blkg->parent)) { + blkg = ERR_PTR(-EINVAL); + goto err_put_css; + } + blkg_get(blkg->parent); + } + spin_lock(&blkcg->lock); ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg); if (likely(!ret)) { hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); list_add(&blkg->q_node, &q->blkg_list); + + for (i = 0; i < BLKCG_MAX_POLS; i++) { + struct blkcg_policy *pol = blkcg_policy[i]; + + if (blkg->pd[i] && pol->pd_online_fn) + pol->pd_online_fn(blkg); + } } + blkg->online = true; spin_unlock(&blkcg->lock); if (!ret) return blkg; - blkg = ERR_PTR(ret); -out_put: + /* @blkg failed fully initialized, use the usual release path */ + blkg_put(blkg); + return ERR_PTR(ret); + +err_put_css: css_put(&blkcg->css); -out_free: +err_free_blkg: blkg_free(new_blkg); - return blkg; + return ERR_PTR(ret); } +/** + * blkg_lookup_create - lookup blkg, try to create one if not there + * @blkcg: blkcg of interest + * @q: request_queue of interest + * + * Lookup blkg for the @blkcg - @q pair. If it doesn't exist, try to + * create one. blkg creation is performed recursively from blkcg_root such + * that all non-root blkg's have access to the parent blkg. This function + * should be called under RCU read lock and @q->queue_lock. + * + * Returns pointer to the looked up or created blkg on success, ERR_PTR() + * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not + * dead and bypassing, returns ERR_PTR(-EBUSY). + */ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, struct request_queue *q) { + struct blkcg_gq *blkg; + + WARN_ON_ONCE(!rcu_read_lock_held()); + lockdep_assert_held(q->queue_lock); + /* * This could be the first entry point of blkcg implementation and * we shouldn't allow anything to go through for a bypassing queue. */ if (unlikely(blk_queue_bypass(q))) return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY); - return __blkg_lookup_create(blkcg, q, NULL); + + blkg = __blkg_lookup(blkcg, q, true); + if (blkg) + return blkg; + + /* + * Create blkgs walking down from blkcg_root to @blkcg, so that all + * non-root blkgs have access to their parents. + */ + while (true) { + struct blkcg *pos = blkcg; + struct blkcg *parent = blkcg_parent(blkcg); + + while (parent && !__blkg_lookup(parent, q, false)) { + pos = parent; + parent = blkcg_parent(parent); + } + + blkg = blkg_create(pos, q, NULL); + if (pos == blkcg || IS_ERR(blkg)) + return blkg; + } } EXPORT_SYMBOL_GPL(blkg_lookup_create); static void blkg_destroy(struct blkcg_gq *blkg) { struct blkcg *blkcg = blkg->blkcg; + int i; lockdep_assert_held(blkg->q->queue_lock); lockdep_assert_held(&blkcg->lock); @@ -247,6 +339,14 @@ static void blkg_destroy(struct blkcg_gq *blkg) WARN_ON_ONCE(list_empty(&blkg->q_node)); WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node)); + for (i = 0; i < BLKCG_MAX_POLS; i++) { + struct blkcg_policy *pol = blkcg_policy[i]; + + if (blkg->pd[i] && pol->pd_offline_fn) + pol->pd_offline_fn(blkg); + } + blkg->online = false; + radix_tree_delete(&blkcg->blkg_tree, blkg->q->id); list_del_init(&blkg->q_node); hlist_del_init_rcu(&blkg->blkcg_node); @@ -301,8 +401,10 @@ static void blkg_rcu_free(struct rcu_head *rcu_head) void __blkg_release(struct blkcg_gq *blkg) { - /* release the extra blkcg reference this blkg has been holding */ + /* release the blkcg and parent blkg refs this blkg has been holding */ css_put(&blkg->blkcg->css); + if (blkg->parent) + blkg_put(blkg->parent); /* * A group is freed in rcu manner. But having an rcu lock does not @@ -357,7 +459,6 @@ static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, { struct blkcg *blkcg = cgroup_to_blkcg(cgroup); struct blkcg_gq *blkg; - struct hlist_node *n; int i; mutex_lock(&blkcg_pol_mutex); @@ -368,7 +469,7 @@ static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, * stat updates. This is a debug feature which shouldn't exist * anyway. If you get hit by a race, retry. */ - hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { + hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; @@ -402,8 +503,9 @@ static const char *blkg_dev_name(struct blkcg_gq *blkg) * * This function invokes @prfill on each blkg of @blkcg if pd for the * policy specified by @pol exists. @prfill is invoked with @sf, the - * policy data and @data. If @show_total is %true, the sum of the return - * values from @prfill is printed with "Total" label at the end. + * policy data and @data and the matching queue lock held. If @show_total + * is %true, the sum of the return values from @prfill is printed with + * "Total" label at the end. * * This is to be used to construct print functions for * cftype->read_seq_string method. @@ -415,14 +517,16 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, bool show_total) { struct blkcg_gq *blkg; - struct hlist_node *n; u64 total = 0; - spin_lock_irq(&blkcg->lock); - hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) + rcu_read_lock(); + hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { + spin_lock_irq(blkg->q->queue_lock); if (blkcg_policy_enabled(blkg->q, pol)) total += prfill(sf, blkg->pd[pol->plid], data); - spin_unlock_irq(&blkcg->lock); + spin_unlock_irq(blkg->q->queue_lock); + } + rcu_read_unlock(); if (show_total) seq_printf(sf, "Total %llu\n", (unsigned long long)total); @@ -481,6 +585,7 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v); return v; } +EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat); /** * blkg_prfill_stat - prfill callback for blkg_stat @@ -514,6 +619,82 @@ u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, EXPORT_SYMBOL_GPL(blkg_prfill_rwstat); /** + * blkg_stat_recursive_sum - collect hierarchical blkg_stat + * @pd: policy private data of interest + * @off: offset to the blkg_stat in @pd + * + * Collect the blkg_stat specified by @off from @pd and all its online + * descendants and return the sum. The caller must be holding the queue + * lock for online tests. + */ +u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off) +{ + struct blkcg_policy *pol = blkcg_policy[pd->plid]; + struct blkcg_gq *pos_blkg; + struct cgroup *pos_cgrp; + u64 sum; + + lockdep_assert_held(pd->blkg->q->queue_lock); + + sum = blkg_stat_read((void *)pd + off); + + rcu_read_lock(); + blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) { + struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); + struct blkg_stat *stat = (void *)pos_pd + off; + + if (pos_blkg->online) + sum += blkg_stat_read(stat); + } + rcu_read_unlock(); + + return sum; +} +EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum); + +/** + * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat + * @pd: policy private data of interest + * @off: offset to the blkg_stat in @pd + * + * Collect the blkg_rwstat specified by @off from @pd and all its online + * descendants and return the sum. The caller must be holding the queue + * lock for online tests. + */ +struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, + int off) +{ + struct blkcg_policy *pol = blkcg_policy[pd->plid]; + struct blkcg_gq *pos_blkg; + struct cgroup *pos_cgrp; + struct blkg_rwstat sum; + int i; + + lockdep_assert_held(pd->blkg->q->queue_lock); + + sum = blkg_rwstat_read((void *)pd + off); + + rcu_read_lock(); + blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) { + struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); + struct blkg_rwstat *rwstat = (void *)pos_pd + off; + struct blkg_rwstat tmp; + + if (!pos_blkg->online) + continue; + + tmp = blkg_rwstat_read(rwstat); + + for (i = 0; i < BLKG_RWSTAT_NR; i++) + sum.cnt[i] += tmp.cnt[i]; + } + rcu_read_unlock(); + + return sum; +} +EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum); + +/** * blkg_conf_prep - parse and prepare for per-blkg config update * @blkcg: target block cgroup * @pol: target policy @@ -658,6 +839,7 @@ static struct cgroup_subsys_state *blkcg_css_alloc(struct cgroup *cgroup) return ERR_PTR(-ENOMEM); blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT; + blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT; blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */ done: spin_lock_init(&blkcg->lock); @@ -777,7 +959,7 @@ int blkcg_activate_policy(struct request_queue *q, const struct blkcg_policy *pol) { LIST_HEAD(pds); - struct blkcg_gq *blkg; + struct blkcg_gq *blkg, *new_blkg; struct blkg_policy_data *pd, *n; int cnt = 0, ret; bool preloaded; @@ -786,19 +968,27 @@ int blkcg_activate_policy(struct request_queue *q, return 0; /* preallocations for root blkg */ - blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL); - if (!blkg) + new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL); + if (!new_blkg) return -ENOMEM; preloaded = !radix_tree_preload(GFP_KERNEL); blk_queue_bypass_start(q); - /* make sure the root blkg exists and count the existing blkgs */ + /* + * Make sure the root blkg exists and count the existing blkgs. As + * @q is bypassing at this point, blkg_lookup_create() can't be + * used. Open code it. + */ spin_lock_irq(q->queue_lock); rcu_read_lock(); - blkg = __blkg_lookup_create(&blkcg_root, q, blkg); + blkg = __blkg_lookup(&blkcg_root, q, false); + if (blkg) + blkg_free(new_blkg); + else + blkg = blkg_create(&blkcg_root, q, new_blkg); rcu_read_unlock(); if (preloaded) @@ -846,6 +1036,7 @@ int blkcg_activate_policy(struct request_queue *q, blkg->pd[pol->plid] = pd; pd->blkg = blkg; + pd->plid = pol->plid; pol->pd_init_fn(blkg); spin_unlock(&blkg->blkcg->lock); @@ -892,6 +1083,8 @@ void blkcg_deactivate_policy(struct request_queue *q, /* grab blkcg lock too while removing @pd from @blkg */ spin_lock(&blkg->blkcg->lock); + if (pol->pd_offline_fn) + pol->pd_offline_fn(blkg); if (pol->pd_exit_fn) pol->pd_exit_fn(blkg); diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 24597309e23d..f2b292925ccd 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -54,6 +54,7 @@ struct blkcg { /* TODO: per-policy storage in blkcg */ unsigned int cfq_weight; /* belongs to cfq */ + unsigned int cfq_leaf_weight; }; struct blkg_stat { @@ -80,8 +81,9 @@ struct blkg_rwstat { * beginning and pd_size can't be smaller than pd. */ struct blkg_policy_data { - /* the blkg this per-policy data belongs to */ + /* the blkg and policy id this per-policy data belongs to */ struct blkcg_gq *blkg; + int plid; /* used during policy activation */ struct list_head alloc_node; @@ -94,17 +96,27 @@ struct blkcg_gq { struct list_head q_node; struct hlist_node blkcg_node; struct blkcg *blkcg; + + /* all non-root blkcg_gq's are guaranteed to have access to parent */ + struct blkcg_gq *parent; + /* request allocation list for this blkcg-q pair */ struct request_list rl; + /* reference count */ int refcnt; + /* is this blkg online? protected by both blkcg and q locks */ + bool online; + struct blkg_policy_data *pd[BLKCG_MAX_POLS]; struct rcu_head rcu_head; }; typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg); +typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg); +typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg); typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg); typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg); @@ -117,6 +129,8 @@ struct blkcg_policy { /* operations */ blkcg_pol_init_pd_fn *pd_init_fn; + blkcg_pol_online_pd_fn *pd_online_fn; + blkcg_pol_offline_pd_fn *pd_offline_fn; blkcg_pol_exit_pd_fn *pd_exit_fn; blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; }; @@ -150,6 +164,10 @@ u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, int off); +u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off); +struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, + int off); + struct blkg_conf_ctx { struct gendisk *disk; struct blkcg_gq *blkg; @@ -181,6 +199,19 @@ static inline struct blkcg *bio_blkcg(struct bio *bio) } /** + * blkcg_parent - get the parent of a blkcg + * @blkcg: blkcg of interest + * + * Return the parent blkcg of @blkcg. Can be called anytime. + */ +static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) +{ + struct cgroup *pcg = blkcg->css.cgroup->parent; + + return pcg ? cgroup_to_blkcg(pcg) : NULL; +} + +/** * blkg_to_pdata - get policy private data * @blkg: blkg of interest * @pol: policy of interest @@ -387,6 +418,18 @@ static inline void blkg_stat_reset(struct blkg_stat *stat) } /** + * blkg_stat_merge - merge a blkg_stat into another + * @to: the destination blkg_stat + * @from: the source + * + * Add @from's count to @to. + */ +static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from) +{ + blkg_stat_add(to, blkg_stat_read(from)); +} + +/** * blkg_rwstat_add - add a value to a blkg_rwstat * @rwstat: target blkg_rwstat * @rw: mask of REQ_{WRITE|SYNC} @@ -434,14 +477,14 @@ static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) } /** - * blkg_rwstat_sum - read the total count of a blkg_rwstat + * blkg_rwstat_total - read the total count of a blkg_rwstat * @rwstat: blkg_rwstat to read * * Return the total count of @rwstat regardless of the IO direction. This * function can be called without synchronization and takes care of u64 * atomicity. */ -static inline uint64_t blkg_rwstat_sum(struct blkg_rwstat *rwstat) +static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) { struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); @@ -457,6 +500,25 @@ static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) memset(rwstat->cnt, 0, sizeof(rwstat->cnt)); } +/** + * blkg_rwstat_merge - merge a blkg_rwstat into another + * @to: the destination blkg_rwstat + * @from: the source + * + * Add @from's counts to @to. + */ +static inline void blkg_rwstat_merge(struct blkg_rwstat *to, + struct blkg_rwstat *from) +{ + struct blkg_rwstat v = blkg_rwstat_read(from); + int i; + + u64_stats_update_begin(&to->syncp); + for (i = 0; i < BLKG_RWSTAT_NR; i++) + to->cnt[i] += v.cnt[i]; + u64_stats_update_end(&to->syncp); +} + #else /* CONFIG_BLK_CGROUP */ struct cgroup; diff --git a/block/blk-core.c b/block/blk-core.c index 277134cb5d32..074b758efc42 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -39,7 +39,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); -EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug); DEFINE_IDA(blk_queue_ida); @@ -1348,7 +1347,7 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req, if (!ll_back_merge_fn(q, req, bio)) return false; - trace_block_bio_backmerge(q, bio); + trace_block_bio_backmerge(q, req, bio); if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) blk_rq_set_mixed_merge(req); @@ -1370,7 +1369,7 @@ static bool bio_attempt_front_merge(struct request_queue *q, if (!ll_front_merge_fn(q, req, bio)) return false; - trace_block_bio_frontmerge(q, bio); + trace_block_bio_frontmerge(q, req, bio); if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) blk_rq_set_mixed_merge(req); @@ -1553,13 +1552,6 @@ get_rq: if (list_empty(&plug->list)) trace_block_plug(q); else { - if (!plug->should_sort) { - struct request *__rq; - - __rq = list_entry_rq(plug->list.prev); - if (__rq->q != q) - plug->should_sort = 1; - } if (request_count >= BLK_MAX_REQUEST_COUNT) { blk_flush_plug_list(plug, false); trace_block_plug(q); @@ -2890,7 +2882,6 @@ void blk_start_plug(struct blk_plug *plug) plug->magic = PLUG_MAGIC; INIT_LIST_HEAD(&plug->list); INIT_LIST_HEAD(&plug->cb_list); - plug->should_sort = 0; /* * If this is a nested plug, don't actually assign it. It will be @@ -2992,10 +2983,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) list_splice_init(&plug->list, &list); - if (plug->should_sort) { - list_sort(NULL, &list, plug_rq_cmp); - plug->should_sort = 0; - } + list_sort(NULL, &list, plug_rq_cmp); q = NULL; depth = 0; diff --git a/block/blk-exec.c b/block/blk-exec.c index c88202f973d9..e70621396129 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -121,9 +121,9 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, /* Prevent hang_check timer from firing at us during very long I/O */ hang_check = sysctl_hung_task_timeout_secs; if (hang_check) - while (!wait_for_completion_timeout(&wait, hang_check * (HZ/2))); + while (!wait_for_completion_io_timeout(&wait, hang_check * (HZ/2))); else - wait_for_completion(&wait); + wait_for_completion_io(&wait); if (rq->errors) err = -EIO; diff --git a/block/blk-flush.c b/block/blk-flush.c index 720ad607ff91..db8f1b507857 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -436,7 +436,7 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, bio_get(bio); submit_bio(WRITE_FLUSH, bio); - wait_for_completion(&wait); + wait_for_completion_io(&wait); /* * The driver must store the error location in ->bi_sector, if diff --git a/block/blk-ioc.c b/block/blk-ioc.c index fab4cdd3f7bb..9c4bb8266bc8 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -164,7 +164,6 @@ EXPORT_SYMBOL(put_io_context); */ void put_io_context_active(struct io_context *ioc) { - struct hlist_node *n; unsigned long flags; struct io_cq *icq; @@ -180,7 +179,7 @@ void put_io_context_active(struct io_context *ioc) */ retry: spin_lock_irqsave_nested(&ioc->lock, flags, 1); - hlist_for_each_entry(icq, n, &ioc->icq_list, ioc_node) { + hlist_for_each_entry(icq, &ioc->icq_list, ioc_node) { if (icq->flags & ICQ_EXITED) continue; if (spin_trylock(icq->q->queue_lock)) { diff --git a/block/blk-lib.c b/block/blk-lib.c index b3a1f2b70b31..d6f50d572565 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -126,7 +126,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, /* Wait for bios in-flight */ if (!atomic_dec_and_test(&bb.done)) - wait_for_completion(&wait); + wait_for_completion_io(&wait); if (!test_bit(BIO_UPTODATE, &bb.flags)) ret = -EIO; @@ -200,7 +200,7 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, /* Wait for bios in-flight */ if (!atomic_dec_and_test(&bb.done)) - wait_for_completion(&wait); + wait_for_completion_io(&wait); if (!test_bit(BIO_UPTODATE, &bb.flags)) ret = -ENOTSUPP; @@ -262,7 +262,7 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, /* Wait for bios in-flight */ if (!atomic_dec_and_test(&bb.done)) - wait_for_completion(&wait); + wait_for_completion_io(&wait); if (!test_bit(BIO_UPTODATE, &bb.flags)) /* One of bios in the batch was completed with error.*/ diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 788147797a79..6206a934eb8c 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -497,6 +497,13 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr, return res; } +static void blk_free_queue_rcu(struct rcu_head *rcu_head) +{ + struct request_queue *q = container_of(rcu_head, struct request_queue, + rcu_head); + kmem_cache_free(blk_requestq_cachep, q); +} + /** * blk_release_queue: - release a &struct request_queue when it is no longer needed * @kobj: the kobj belonging to the request queue to be released @@ -538,7 +545,7 @@ static void blk_release_queue(struct kobject *kobj) bdi_destroy(&q->backing_dev_info); ida_simple_remove(&blk_queue_ida, q->id); - kmem_cache_free(blk_requestq_cachep, q); + call_rcu(&q->rcu_head, blk_free_queue_rcu); } static const struct sysfs_ops queue_sysfs_ops = { diff --git a/block/blk.h b/block/blk.h index 47fdfdd41520..e837b8f619b7 100644 --- a/block/blk.h +++ b/block/blk.h @@ -61,7 +61,7 @@ static inline void blk_clear_rq_complete(struct request *rq) /* * Internal elevator interface */ -#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) +#define ELV_ON_HASH(rq) hash_hashed(&(rq)->hash) void blk_insert_flush(struct request *rq); void blk_abort_flushes(struct request_queue *q); diff --git a/block/bsg.c b/block/bsg.c index ff64ae3bacee..420a5a9f1b23 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -800,11 +800,10 @@ static struct bsg_device *bsg_add_device(struct inode *inode, static struct bsg_device *__bsg_get_device(int minor, struct request_queue *q) { struct bsg_device *bd; - struct hlist_node *entry; mutex_lock(&bsg_mutex); - hlist_for_each_entry(bd, entry, bsg_dev_idx_hash(minor), dev_list) { + hlist_for_each_entry(bd, bsg_dev_idx_hash(minor), dev_list) { if (bd->queue == q) { atomic_inc(&bd->ref_count); goto found; @@ -997,7 +996,7 @@ int bsg_register_queue(struct request_queue *q, struct device *parent, { struct bsg_class_device *bcd; dev_t dev; - int ret, minor; + int ret; struct device *class_dev = NULL; const char *devname; @@ -1017,23 +1016,16 @@ int bsg_register_queue(struct request_queue *q, struct device *parent, mutex_lock(&bsg_mutex); - ret = idr_pre_get(&bsg_minor_idr, GFP_KERNEL); - if (!ret) { - ret = -ENOMEM; - goto unlock; - } - - ret = idr_get_new(&bsg_minor_idr, bcd, &minor); - if (ret < 0) + ret = idr_alloc(&bsg_minor_idr, bcd, 0, BSG_MAX_DEVS, GFP_KERNEL); + if (ret < 0) { + if (ret == -ENOSPC) { + printk(KERN_ERR "bsg: too many bsg devices\n"); + ret = -EINVAL; + } goto unlock; - - if (minor >= BSG_MAX_DEVS) { - printk(KERN_ERR "bsg: too many bsg devices\n"); - ret = -EINVAL; - goto remove_idr; } - bcd->minor = minor; + bcd->minor = ret; bcd->queue = q; bcd->parent = get_device(parent); bcd->release = release; @@ -1059,8 +1051,7 @@ unregister_class_dev: device_unregister(class_dev); put_dev: put_device(parent); -remove_idr: - idr_remove(&bsg_minor_idr, minor); + idr_remove(&bsg_minor_idr, bcd->minor); unlock: mutex_unlock(&bsg_mutex); return ret; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index e62e9205b80a..4f0ade74cfd0 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -85,7 +85,6 @@ struct cfq_rb_root { struct rb_root rb; struct rb_node *left; unsigned count; - unsigned total_weight; u64 min_vdisktime; struct cfq_ttime ttime; }; @@ -155,7 +154,7 @@ struct cfq_queue { * First index in the service_trees. * IDLE is handled separately, so it has negative index */ -enum wl_prio_t { +enum wl_class_t { BE_WORKLOAD = 0, RT_WORKLOAD = 1, IDLE_WORKLOAD = 2, @@ -223,10 +222,45 @@ struct cfq_group { /* group service_tree key */ u64 vdisktime; + + /* + * The number of active cfqgs and sum of their weights under this + * cfqg. This covers this cfqg's leaf_weight and all children's + * weights, but does not cover weights of further descendants. + * + * If a cfqg is on the service tree, it's active. An active cfqg + * also activates its parent and contributes to the children_weight + * of the parent. + */ + int nr_active; + unsigned int children_weight; + + /* + * vfraction is the fraction of vdisktime that the tasks in this + * cfqg are entitled to. This is determined by compounding the + * ratios walking up from this cfqg to the root. + * + * It is in fixed point w/ CFQ_SERVICE_SHIFT and the sum of all + * vfractions on a service tree is approximately 1. The sum may + * deviate a bit due to rounding errors and fluctuations caused by + * cfqgs entering and leaving the service tree. + */ + unsigned int vfraction; + + /* + * There are two weights - (internal) weight is the weight of this + * cfqg against the sibling cfqgs. leaf_weight is the wight of + * this cfqg against the child cfqgs. For the root cfqg, both + * weights are kept in sync for backward compatibility. + */ unsigned int weight; unsigned int new_weight; unsigned int dev_weight; + unsigned int leaf_weight; + unsigned int new_leaf_weight; + unsigned int dev_leaf_weight; + /* number of cfqq currently on this group */ int nr_cfqq; @@ -248,14 +282,15 @@ struct cfq_group { struct cfq_rb_root service_trees[2][3]; struct cfq_rb_root service_tree_idle; - unsigned long saved_workload_slice; - enum wl_type_t saved_workload; - enum wl_prio_t saved_serving_prio; + unsigned long saved_wl_slice; + enum wl_type_t saved_wl_type; + enum wl_class_t saved_wl_class; /* number of requests that are on the dispatch list or inside driver */ int dispatched; struct cfq_ttime ttime; - struct cfqg_stats stats; + struct cfqg_stats stats; /* stats for this cfqg */ + struct cfqg_stats dead_stats; /* stats pushed from dead children */ }; struct cfq_io_cq { @@ -280,8 +315,8 @@ struct cfq_data { /* * The priority currently being served */ - enum wl_prio_t serving_prio; - enum wl_type_t serving_type; + enum wl_class_t serving_wl_class; + enum wl_type_t serving_wl_type; unsigned long workload_expires; struct cfq_group *serving_group; @@ -353,17 +388,17 @@ struct cfq_data { static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); -static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg, - enum wl_prio_t prio, +static struct cfq_rb_root *st_for(struct cfq_group *cfqg, + enum wl_class_t class, enum wl_type_t type) { if (!cfqg) return NULL; - if (prio == IDLE_WORKLOAD) + if (class == IDLE_WORKLOAD) return &cfqg->service_tree_idle; - return &cfqg->service_trees[prio][type]; + return &cfqg->service_trees[class][type]; } enum cfqq_state_flags { @@ -502,7 +537,7 @@ static void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg) { struct cfqg_stats *stats = &cfqg->stats; - if (blkg_rwstat_sum(&stats->queued)) + if (blkg_rwstat_total(&stats->queued)) return; /* @@ -546,7 +581,7 @@ static void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg) struct cfqg_stats *stats = &cfqg->stats; blkg_stat_add(&stats->avg_queue_size_sum, - blkg_rwstat_sum(&stats->queued)); + blkg_rwstat_total(&stats->queued)); blkg_stat_add(&stats->avg_queue_size_samples, 1); cfqg_stats_update_group_wait_time(stats); } @@ -572,6 +607,13 @@ static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg) return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq)); } +static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg) +{ + struct blkcg_gq *pblkg = cfqg_to_blkg(cfqg)->parent; + + return pblkg ? blkg_to_cfqg(pblkg) : NULL; +} + static inline void cfqg_get(struct cfq_group *cfqg) { return blkg_get(cfqg_to_blkg(cfqg)); @@ -586,8 +628,9 @@ static inline void cfqg_put(struct cfq_group *cfqg) char __pbuf[128]; \ \ blkg_path(cfqg_to_blkg((cfqq)->cfqg), __pbuf, sizeof(__pbuf)); \ - blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \ - cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \ + blk_add_trace_msg((cfqd)->queue, "cfq%d%c%c %s " fmt, (cfqq)->pid, \ + cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \ + cfqq_type((cfqq)) == SYNC_NOIDLE_WORKLOAD ? 'N' : ' ',\ __pbuf, ##args); \ } while (0) @@ -646,11 +689,9 @@ static inline void cfqg_stats_update_completion(struct cfq_group *cfqg, io_start_time - start_time); } -static void cfq_pd_reset_stats(struct blkcg_gq *blkg) +/* @stats = 0 */ +static void cfqg_stats_reset(struct cfqg_stats *stats) { - struct cfq_group *cfqg = blkg_to_cfqg(blkg); - struct cfqg_stats *stats = &cfqg->stats; - /* queued stats shouldn't be cleared */ blkg_rwstat_reset(&stats->service_bytes); blkg_rwstat_reset(&stats->serviced); @@ -669,13 +710,58 @@ static void cfq_pd_reset_stats(struct blkcg_gq *blkg) #endif } +/* @to += @from */ +static void cfqg_stats_merge(struct cfqg_stats *to, struct cfqg_stats *from) +{ + /* queued stats shouldn't be cleared */ + blkg_rwstat_merge(&to->service_bytes, &from->service_bytes); + blkg_rwstat_merge(&to->serviced, &from->serviced); + blkg_rwstat_merge(&to->merged, &from->merged); + blkg_rwstat_merge(&to->service_time, &from->service_time); + blkg_rwstat_merge(&to->wait_time, &from->wait_time); + blkg_stat_merge(&from->time, &from->time); +#ifdef CONFIG_DEBUG_BLK_CGROUP + blkg_stat_merge(&to->unaccounted_time, &from->unaccounted_time); + blkg_stat_merge(&to->avg_queue_size_sum, &from->avg_queue_size_sum); + blkg_stat_merge(&to->avg_queue_size_samples, &from->avg_queue_size_samples); + blkg_stat_merge(&to->dequeue, &from->dequeue); + blkg_stat_merge(&to->group_wait_time, &from->group_wait_time); + blkg_stat_merge(&to->idle_time, &from->idle_time); + blkg_stat_merge(&to->empty_time, &from->empty_time); +#endif +} + +/* + * Transfer @cfqg's stats to its parent's dead_stats so that the ancestors' + * recursive stats can still account for the amount used by this cfqg after + * it's gone. + */ +static void cfqg_stats_xfer_dead(struct cfq_group *cfqg) +{ + struct cfq_group *parent = cfqg_parent(cfqg); + + lockdep_assert_held(cfqg_to_blkg(cfqg)->q->queue_lock); + + if (unlikely(!parent)) + return; + + cfqg_stats_merge(&parent->dead_stats, &cfqg->stats); + cfqg_stats_merge(&parent->dead_stats, &cfqg->dead_stats); + cfqg_stats_reset(&cfqg->stats); + cfqg_stats_reset(&cfqg->dead_stats); +} + #else /* CONFIG_CFQ_GROUP_IOSCHED */ +static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg) { return NULL; } static inline void cfqg_get(struct cfq_group *cfqg) { } static inline void cfqg_put(struct cfq_group *cfqg) { } #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ - blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args) + blk_add_trace_msg((cfqd)->queue, "cfq%d%c%c " fmt, (cfqq)->pid, \ + cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \ + cfqq_type((cfqq)) == SYNC_NOIDLE_WORKLOAD ? 'N' : ' ',\ + ##args) #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0) static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg, @@ -732,7 +818,7 @@ static inline bool iops_mode(struct cfq_data *cfqd) return false; } -static inline enum wl_prio_t cfqq_prio(struct cfq_queue *cfqq) +static inline enum wl_class_t cfqq_class(struct cfq_queue *cfqq) { if (cfq_class_idle(cfqq)) return IDLE_WORKLOAD; @@ -751,23 +837,23 @@ static enum wl_type_t cfqq_type(struct cfq_queue *cfqq) return SYNC_WORKLOAD; } -static inline int cfq_group_busy_queues_wl(enum wl_prio_t wl, +static inline int cfq_group_busy_queues_wl(enum wl_class_t wl_class, struct cfq_data *cfqd, struct cfq_group *cfqg) { - if (wl == IDLE_WORKLOAD) + if (wl_class == IDLE_WORKLOAD) return cfqg->service_tree_idle.count; - return cfqg->service_trees[wl][ASYNC_WORKLOAD].count - + cfqg->service_trees[wl][SYNC_NOIDLE_WORKLOAD].count - + cfqg->service_trees[wl][SYNC_WORKLOAD].count; + return cfqg->service_trees[wl_class][ASYNC_WORKLOAD].count + + cfqg->service_trees[wl_class][SYNC_NOIDLE_WORKLOAD].count + + cfqg->service_trees[wl_class][SYNC_WORKLOAD].count; } static inline int cfqg_busy_async_queues(struct cfq_data *cfqd, struct cfq_group *cfqg) { - return cfqg->service_trees[RT_WORKLOAD][ASYNC_WORKLOAD].count - + cfqg->service_trees[BE_WORKLOAD][ASYNC_WORKLOAD].count; + return cfqg->service_trees[RT_WORKLOAD][ASYNC_WORKLOAD].count + + cfqg->service_trees[BE_WORKLOAD][ASYNC_WORKLOAD].count; } static void cfq_dispatch_insert(struct request_queue *, struct request *); @@ -847,13 +933,27 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); } -static inline u64 cfq_scale_slice(unsigned long delta, struct cfq_group *cfqg) +/** + * cfqg_scale_charge - scale disk time charge according to cfqg weight + * @charge: disk time being charged + * @vfraction: vfraction of the cfqg, fixed point w/ CFQ_SERVICE_SHIFT + * + * Scale @charge according to @vfraction, which is in range (0, 1]. The + * scaling is inversely proportional. + * + * scaled = charge / vfraction + * + * The result is also in fixed point w/ CFQ_SERVICE_SHIFT. + */ +static inline u64 cfqg_scale_charge(unsigned long charge, + unsigned int vfraction) { - u64 d = delta << CFQ_SERVICE_SHIFT; + u64 c = charge << CFQ_SERVICE_SHIFT; /* make it fixed point */ - d = d * CFQ_WEIGHT_DEFAULT; - do_div(d, cfqg->weight); - return d; + /* charge / vfraction */ + c <<= CFQ_SERVICE_SHIFT; + do_div(c, vfraction); + return c; } static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime) @@ -909,9 +1009,7 @@ static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd, static inline unsigned cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg) { - struct cfq_rb_root *st = &cfqd->grp_service_tree; - - return cfqd->cfq_target_latency * cfqg->weight / st->total_weight; + return cfqd->cfq_target_latency * cfqg->vfraction >> CFQ_SERVICE_SHIFT; } static inline unsigned @@ -1178,20 +1276,61 @@ static void cfq_update_group_weight(struct cfq_group *cfqg) { BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); + if (cfqg->new_weight) { cfqg->weight = cfqg->new_weight; cfqg->new_weight = 0; } + + if (cfqg->new_leaf_weight) { + cfqg->leaf_weight = cfqg->new_leaf_weight; + cfqg->new_leaf_weight = 0; + } } static void cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) { + unsigned int vfr = 1 << CFQ_SERVICE_SHIFT; /* start with 1 */ + struct cfq_group *pos = cfqg; + struct cfq_group *parent; + bool propagate; + + /* add to the service tree */ BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); cfq_update_group_weight(cfqg); __cfq_group_service_tree_add(st, cfqg); - st->total_weight += cfqg->weight; + + /* + * Activate @cfqg and calculate the portion of vfraction @cfqg is + * entitled to. vfraction is calculated by walking the tree + * towards the root calculating the fraction it has at each level. + * The compounded ratio is how much vfraction @cfqg owns. + * + * Start with the proportion tasks in this cfqg has against active + * children cfqgs - its leaf_weight against children_weight. + */ + propagate = !pos->nr_active++; + pos->children_weight += pos->leaf_weight; + vfr = vfr * pos->leaf_weight / pos->children_weight; + + /* + * Compound ->weight walking up the tree. Both activation and + * vfraction calculation are done in the same loop. Propagation + * stops once an already activated node is met. vfraction + * calculation should always continue to the root. + */ + while ((parent = cfqg_parent(pos))) { + if (propagate) { + propagate = !parent->nr_active++; + parent->children_weight += pos->weight; + } + vfr = vfr * pos->weight / parent->children_weight; + pos = parent; + } + + cfqg->vfraction = max_t(unsigned, vfr, 1); } static void @@ -1222,7 +1361,32 @@ cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg) static void cfq_group_service_tree_del(struct cfq_rb_root *st, struct cfq_group *cfqg) { - st->total_weight -= cfqg->weight; + struct cfq_group *pos = cfqg; + bool propagate; + + /* + * Undo activation from cfq_group_service_tree_add(). Deactivate + * @cfqg and propagate deactivation upwards. + */ + propagate = !--pos->nr_active; + pos->children_weight -= pos->leaf_weight; + + while (propagate) { + struct cfq_group *parent = cfqg_parent(pos); + + /* @pos has 0 nr_active at this point */ + WARN_ON_ONCE(pos->children_weight); + pos->vfraction = 0; + + if (!parent) + break; + + propagate = !--parent->nr_active; + parent->children_weight -= pos->weight; + pos = parent; + } + + /* remove from the service tree */ if (!RB_EMPTY_NODE(&cfqg->rb_node)) cfq_rb_erase(&cfqg->rb_node, st); } @@ -1241,7 +1405,7 @@ cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg) cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); cfq_group_service_tree_del(st, cfqg); - cfqg->saved_workload_slice = 0; + cfqg->saved_wl_slice = 0; cfqg_stats_update_dequeue(cfqg); } @@ -1284,6 +1448,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, unsigned int used_sl, charge, unaccounted_sl = 0; int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg) - cfqg->service_tree_idle.count; + unsigned int vfr; BUG_ON(nr_sync < 0); used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl); @@ -1293,20 +1458,25 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, else if (!cfq_cfqq_sync(cfqq) && !nr_sync) charge = cfqq->allocated_slice; - /* Can't update vdisktime while group is on service tree */ + /* + * Can't update vdisktime while on service tree and cfqg->vfraction + * is valid only while on it. Cache vfr, leave the service tree, + * update vdisktime and go back on. The re-addition to the tree + * will also update the weights as necessary. + */ + vfr = cfqg->vfraction; cfq_group_service_tree_del(st, cfqg); - cfqg->vdisktime += cfq_scale_slice(charge, cfqg); - /* If a new weight was requested, update now, off tree */ + cfqg->vdisktime += cfqg_scale_charge(charge, vfr); cfq_group_service_tree_add(st, cfqg); /* This group is being expired. Save the context */ if (time_after(cfqd->workload_expires, jiffies)) { - cfqg->saved_workload_slice = cfqd->workload_expires + cfqg->saved_wl_slice = cfqd->workload_expires - jiffies; - cfqg->saved_workload = cfqd->serving_type; - cfqg->saved_serving_prio = cfqd->serving_prio; + cfqg->saved_wl_type = cfqd->serving_wl_type; + cfqg->saved_wl_class = cfqd->serving_wl_class; } else - cfqg->saved_workload_slice = 0; + cfqg->saved_wl_slice = 0; cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, st->min_vdisktime); @@ -1344,6 +1514,52 @@ static void cfq_pd_init(struct blkcg_gq *blkg) cfq_init_cfqg_base(cfqg); cfqg->weight = blkg->blkcg->cfq_weight; + cfqg->leaf_weight = blkg->blkcg->cfq_leaf_weight; +} + +static void cfq_pd_offline(struct blkcg_gq *blkg) +{ + /* + * @blkg is going offline and will be ignored by + * blkg_[rw]stat_recursive_sum(). Transfer stats to the parent so + * that they don't get lost. If IOs complete after this point, the + * stats for them will be lost. Oh well... + */ + cfqg_stats_xfer_dead(blkg_to_cfqg(blkg)); +} + +/* offset delta from cfqg->stats to cfqg->dead_stats */ +static const int dead_stats_off_delta = offsetof(struct cfq_group, dead_stats) - + offsetof(struct cfq_group, stats); + +/* to be used by recursive prfill, sums live and dead stats recursively */ +static u64 cfqg_stat_pd_recursive_sum(struct blkg_policy_data *pd, int off) +{ + u64 sum = 0; + + sum += blkg_stat_recursive_sum(pd, off); + sum += blkg_stat_recursive_sum(pd, off + dead_stats_off_delta); + return sum; +} + +/* to be used by recursive prfill, sums live and dead rwstats recursively */ +static struct blkg_rwstat cfqg_rwstat_pd_recursive_sum(struct blkg_policy_data *pd, + int off) +{ + struct blkg_rwstat a, b; + + a = blkg_rwstat_recursive_sum(pd, off); + b = blkg_rwstat_recursive_sum(pd, off + dead_stats_off_delta); + blkg_rwstat_merge(&a, &b); + return a; +} + +static void cfq_pd_reset_stats(struct blkcg_gq *blkg) +{ + struct cfq_group *cfqg = blkg_to_cfqg(blkg); + + cfqg_stats_reset(&cfqg->stats); + cfqg_stats_reset(&cfqg->dead_stats); } /* @@ -1400,6 +1616,26 @@ static int cfqg_print_weight_device(struct cgroup *cgrp, struct cftype *cft, return 0; } +static u64 cfqg_prfill_leaf_weight_device(struct seq_file *sf, + struct blkg_policy_data *pd, int off) +{ + struct cfq_group *cfqg = pd_to_cfqg(pd); + + if (!cfqg->dev_leaf_weight) + return 0; + return __blkg_prfill_u64(sf, pd, cfqg->dev_leaf_weight); +} + +static int cfqg_print_leaf_weight_device(struct cgroup *cgrp, + struct cftype *cft, + struct seq_file *sf) +{ + blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp), + cfqg_prfill_leaf_weight_device, &blkcg_policy_cfq, 0, + false); + return 0; +} + static int cfq_print_weight(struct cgroup *cgrp, struct cftype *cft, struct seq_file *sf) { @@ -1407,8 +1643,16 @@ static int cfq_print_weight(struct cgroup *cgrp, struct cftype *cft, return 0; } -static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, - const char *buf) +static int cfq_print_leaf_weight(struct cgroup *cgrp, struct cftype *cft, + struct seq_file *sf) +{ + seq_printf(sf, "%u\n", + cgroup_to_blkcg(cgrp)->cfq_leaf_weight); + return 0; +} + +static int __cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, + const char *buf, bool is_leaf_weight) { struct blkcg *blkcg = cgroup_to_blkcg(cgrp); struct blkg_conf_ctx ctx; @@ -1422,8 +1666,13 @@ static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, ret = -EINVAL; cfqg = blkg_to_cfqg(ctx.blkg); if (!ctx.v || (ctx.v >= CFQ_WEIGHT_MIN && ctx.v <= CFQ_WEIGHT_MAX)) { - cfqg->dev_weight = ctx.v; - cfqg->new_weight = cfqg->dev_weight ?: blkcg->cfq_weight; + if (!is_leaf_weight) { + cfqg->dev_weight = ctx.v; + cfqg->new_weight = ctx.v ?: blkcg->cfq_weight; + } else { + cfqg->dev_leaf_weight = ctx.v; + cfqg->new_leaf_weight = ctx.v ?: blkcg->cfq_leaf_weight; + } ret = 0; } @@ -1431,29 +1680,63 @@ static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, return ret; } -static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val) +static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, + const char *buf) +{ + return __cfqg_set_weight_device(cgrp, cft, buf, false); +} + +static int cfqg_set_leaf_weight_device(struct cgroup *cgrp, struct cftype *cft, + const char *buf) +{ + return __cfqg_set_weight_device(cgrp, cft, buf, true); +} + +static int __cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val, + bool is_leaf_weight) { struct blkcg *blkcg = cgroup_to_blkcg(cgrp); struct blkcg_gq *blkg; - struct hlist_node *n; if (val < CFQ_WEIGHT_MIN || val > CFQ_WEIGHT_MAX) return -EINVAL; spin_lock_irq(&blkcg->lock); - blkcg->cfq_weight = (unsigned int)val; - hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { + if (!is_leaf_weight) + blkcg->cfq_weight = val; + else + blkcg->cfq_leaf_weight = val; + + hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { struct cfq_group *cfqg = blkg_to_cfqg(blkg); - if (cfqg && !cfqg->dev_weight) - cfqg->new_weight = blkcg->cfq_weight; + if (!cfqg) + continue; + + if (!is_leaf_weight) { + if (!cfqg->dev_weight) + cfqg->new_weight = blkcg->cfq_weight; + } else { + if (!cfqg->dev_leaf_weight) + cfqg->new_leaf_weight = blkcg->cfq_leaf_weight; + } } spin_unlock_irq(&blkcg->lock); return 0; } +static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val) +{ + return __cfq_set_weight(cgrp, cft, val, false); +} + +static int cfq_set_leaf_weight(struct cgroup *cgrp, struct cftype *cft, u64 val) +{ + return __cfq_set_weight(cgrp, cft, val, true); +} + static int cfqg_print_stat(struct cgroup *cgrp, struct cftype *cft, struct seq_file *sf) { @@ -1474,6 +1757,42 @@ static int cfqg_print_rwstat(struct cgroup *cgrp, struct cftype *cft, return 0; } +static u64 cfqg_prfill_stat_recursive(struct seq_file *sf, + struct blkg_policy_data *pd, int off) +{ + u64 sum = cfqg_stat_pd_recursive_sum(pd, off); + + return __blkg_prfill_u64(sf, pd, sum); +} + +static u64 cfqg_prfill_rwstat_recursive(struct seq_file *sf, + struct blkg_policy_data *pd, int off) +{ + struct blkg_rwstat sum = cfqg_rwstat_pd_recursive_sum(pd, off); + + return __blkg_prfill_rwstat(sf, pd, &sum); +} + +static int cfqg_print_stat_recursive(struct cgroup *cgrp, struct cftype *cft, + struct seq_file *sf) +{ + struct blkcg *blkcg = cgroup_to_blkcg(cgrp); + + blkcg_print_blkgs(sf, blkcg, cfqg_prfill_stat_recursive, + &blkcg_policy_cfq, cft->private, false); + return 0; +} + +static int cfqg_print_rwstat_recursive(struct cgroup *cgrp, struct cftype *cft, + struct seq_file *sf) +{ + struct blkcg *blkcg = cgroup_to_blkcg(cgrp); + + blkcg_print_blkgs(sf, blkcg, cfqg_prfill_rwstat_recursive, + &blkcg_policy_cfq, cft->private, true); + return 0; +} + #ifdef CONFIG_DEBUG_BLK_CGROUP static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf, struct blkg_policy_data *pd, int off) @@ -1503,17 +1822,49 @@ static int cfqg_print_avg_queue_size(struct cgroup *cgrp, struct cftype *cft, #endif /* CONFIG_DEBUG_BLK_CGROUP */ static struct cftype cfq_blkcg_files[] = { + /* on root, weight is mapped to leaf_weight */ + { + .name = "weight_device", + .flags = CFTYPE_ONLY_ON_ROOT, + .read_seq_string = cfqg_print_leaf_weight_device, + .write_string = cfqg_set_leaf_weight_device, + .max_write_len = 256, + }, + { + .name = "weight", + .flags = CFTYPE_ONLY_ON_ROOT, + .read_seq_string = cfq_print_leaf_weight, + .write_u64 = cfq_set_leaf_weight, + }, + + /* no such mapping necessary for !roots */ { .name = "weight_device", + .flags = CFTYPE_NOT_ON_ROOT, .read_seq_string = cfqg_print_weight_device, .write_string = cfqg_set_weight_device, .max_write_len = 256, }, { .name = "weight", + .flags = CFTYPE_NOT_ON_ROOT, .read_seq_string = cfq_print_weight, .write_u64 = cfq_set_weight, }, + + { + .name = "leaf_weight_device", + .read_seq_string = cfqg_print_leaf_weight_device, + .write_string = cfqg_set_leaf_weight_device, + .max_write_len = 256, + }, + { + .name = "leaf_weight", + .read_seq_string = cfq_print_leaf_weight, + .write_u64 = cfq_set_leaf_weight, + }, + + /* statistics, covers only the tasks in the cfqg */ { .name = "time", .private = offsetof(struct cfq_group, stats.time), @@ -1554,6 +1905,48 @@ static struct cftype cfq_blkcg_files[] = { .private = offsetof(struct cfq_group, stats.queued), .read_seq_string = cfqg_print_rwstat, }, + + /* the same statictics which cover the cfqg and its descendants */ + { + .name = "time_recursive", + .private = offsetof(struct cfq_group, stats.time), + .read_seq_string = cfqg_print_stat_recursive, + }, + { + .name = "sectors_recursive", + .private = offsetof(struct cfq_group, stats.sectors), + .read_seq_string = cfqg_print_stat_recursive, + }, + { + .name = "io_service_bytes_recursive", + .private = offsetof(struct cfq_group, stats.service_bytes), + .read_seq_string = cfqg_print_rwstat_recursive, + }, + { + .name = "io_serviced_recursive", + .private = offsetof(struct cfq_group, stats.serviced), + .read_seq_string = cfqg_print_rwstat_recursive, + }, + { + .name = "io_service_time_recursive", + .private = offsetof(struct cfq_group, stats.service_time), + .read_seq_string = cfqg_print_rwstat_recursive, + }, + { + .name = "io_wait_time_recursive", + .private = offsetof(struct cfq_group, stats.wait_time), + .read_seq_string = cfqg_print_rwstat_recursive, + }, + { + .name = "io_merged_recursive", + .private = offsetof(struct cfq_group, stats.merged), + .read_seq_string = cfqg_print_rwstat_recursive, + }, + { + .name = "io_queued_recursive", + .private = offsetof(struct cfq_group, stats.queued), + .read_seq_string = cfqg_print_rwstat_recursive, + }, #ifdef CONFIG_DEBUG_BLK_CGROUP { .name = "avg_queue_size", @@ -1612,15 +2005,14 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct rb_node **p, *parent; struct cfq_queue *__cfqq; unsigned long rb_key; - struct cfq_rb_root *service_tree; + struct cfq_rb_root *st; int left; int new_cfqq = 1; - service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq), - cfqq_type(cfqq)); + st = st_for(cfqq->cfqg, cfqq_class(cfqq), cfqq_type(cfqq)); if (cfq_class_idle(cfqq)) { rb_key = CFQ_IDLE_DELAY; - parent = rb_last(&service_tree->rb); + parent = rb_last(&st->rb); if (parent && parent != &cfqq->rb_node) { __cfqq = rb_entry(parent, struct cfq_queue, rb_node); rb_key += __cfqq->rb_key; @@ -1638,7 +2030,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfqq->slice_resid = 0; } else { rb_key = -HZ; - __cfqq = cfq_rb_first(service_tree); + __cfqq = cfq_rb_first(st); rb_key += __cfqq ? __cfqq->rb_key : jiffies; } @@ -1647,8 +2039,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, /* * same position, nothing more to do */ - if (rb_key == cfqq->rb_key && - cfqq->service_tree == service_tree) + if (rb_key == cfqq->rb_key && cfqq->service_tree == st) return; cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree); @@ -1657,11 +2048,9 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, left = 1; parent = NULL; - cfqq->service_tree = service_tree; - p = &service_tree->rb.rb_node; + cfqq->service_tree = st; + p = &st->rb.rb_node; while (*p) { - struct rb_node **n; - parent = *p; __cfqq = rb_entry(parent, struct cfq_queue, rb_node); @@ -1669,22 +2058,20 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, * sort by key, that represents service time. */ if (time_before(rb_key, __cfqq->rb_key)) - n = &(*p)->rb_left; + p = &parent->rb_left; else { - n = &(*p)->rb_right; + p = &parent->rb_right; left = 0; } - - p = n; } if (left) - service_tree->left = &cfqq->rb_node; + st->left = &cfqq->rb_node; cfqq->rb_key = rb_key; rb_link_node(&cfqq->rb_node, parent, p); - rb_insert_color(&cfqq->rb_node, &service_tree->rb); - service_tree->count++; + rb_insert_color(&cfqq->rb_node, &st->rb); + st->count++; if (add_front || !new_cfqq) return; cfq_group_notify_queue_add(cfqd, cfqq->cfqg); @@ -2030,8 +2417,8 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) { if (cfqq) { - cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d", - cfqd->serving_prio, cfqd->serving_type); + cfq_log_cfqq(cfqd, cfqq, "set_active wl_class:%d wl_type:%d", + cfqd->serving_wl_class, cfqd->serving_wl_type); cfqg_stats_update_avg_queue_size(cfqq->cfqg); cfqq->slice_start = 0; cfqq->dispatch_start = jiffies; @@ -2117,19 +2504,18 @@ static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out) */ static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) { - struct cfq_rb_root *service_tree = - service_tree_for(cfqd->serving_group, cfqd->serving_prio, - cfqd->serving_type); + struct cfq_rb_root *st = st_for(cfqd->serving_group, + cfqd->serving_wl_class, cfqd->serving_wl_type); if (!cfqd->rq_queued) return NULL; /* There is nothing to dispatch */ - if (!service_tree) + if (!st) return NULL; - if (RB_EMPTY_ROOT(&service_tree->rb)) + if (RB_EMPTY_ROOT(&st->rb)) return NULL; - return cfq_rb_first(service_tree); + return cfq_rb_first(st); } static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd) @@ -2285,17 +2671,17 @@ static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd, static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) { - enum wl_prio_t prio = cfqq_prio(cfqq); - struct cfq_rb_root *service_tree = cfqq->service_tree; + enum wl_class_t wl_class = cfqq_class(cfqq); + struct cfq_rb_root *st = cfqq->service_tree; - BUG_ON(!service_tree); - BUG_ON(!service_tree->count); + BUG_ON(!st); + BUG_ON(!st->count); if (!cfqd->cfq_slice_idle) return false; /* We never do for idle class queues. */ - if (prio == IDLE_WORKLOAD) + if (wl_class == IDLE_WORKLOAD) return false; /* We do for queues that were marked with idle window flag. */ @@ -2307,11 +2693,10 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) * Otherwise, we do only if they are the last ones * in their service tree. */ - if (service_tree->count == 1 && cfq_cfqq_sync(cfqq) && - !cfq_io_thinktime_big(cfqd, &service_tree->ttime, false)) + if (st->count == 1 && cfq_cfqq_sync(cfqq) && + !cfq_io_thinktime_big(cfqd, &st->ttime, false)) return true; - cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", - service_tree->count); + cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", st->count); return false; } @@ -2494,8 +2879,8 @@ static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq) } } -static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, - struct cfq_group *cfqg, enum wl_prio_t prio) +static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd, + struct cfq_group *cfqg, enum wl_class_t wl_class) { struct cfq_queue *queue; int i; @@ -2505,7 +2890,7 @@ static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, for (i = 0; i <= SYNC_WORKLOAD; ++i) { /* select the one with lowest rb_key */ - queue = cfq_rb_first(service_tree_for(cfqg, prio, i)); + queue = cfq_rb_first(st_for(cfqg, wl_class, i)); if (queue && (!key_valid || time_before(queue->rb_key, lowest_key))) { lowest_key = queue->rb_key; @@ -2517,26 +2902,27 @@ static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, return cur_best; } -static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) +static void +choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg) { unsigned slice; unsigned count; struct cfq_rb_root *st; unsigned group_slice; - enum wl_prio_t original_prio = cfqd->serving_prio; + enum wl_class_t original_class = cfqd->serving_wl_class; /* Choose next priority. RT > BE > IDLE */ if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg)) - cfqd->serving_prio = RT_WORKLOAD; + cfqd->serving_wl_class = RT_WORKLOAD; else if (cfq_group_busy_queues_wl(BE_WORKLOAD, cfqd, cfqg)) - cfqd->serving_prio = BE_WORKLOAD; + cfqd->serving_wl_class = BE_WORKLOAD; else { - cfqd->serving_prio = IDLE_WORKLOAD; + cfqd->serving_wl_class = IDLE_WORKLOAD; cfqd->workload_expires = jiffies + 1; return; } - if (original_prio != cfqd->serving_prio) + if (original_class != cfqd->serving_wl_class) goto new_workload; /* @@ -2544,7 +2930,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload * expiration time */ - st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type); + st = st_for(cfqg, cfqd->serving_wl_class, cfqd->serving_wl_type); count = st->count; /* @@ -2555,9 +2941,9 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) new_workload: /* otherwise select new workload type */ - cfqd->serving_type = - cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio); - st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type); + cfqd->serving_wl_type = cfq_choose_wl_type(cfqd, cfqg, + cfqd->serving_wl_class); + st = st_for(cfqg, cfqd->serving_wl_class, cfqd->serving_wl_type); count = st->count; /* @@ -2568,10 +2954,11 @@ new_workload: group_slice = cfq_group_slice(cfqd, cfqg); slice = group_slice * count / - max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_prio], - cfq_group_busy_queues_wl(cfqd->serving_prio, cfqd, cfqg)); + max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_wl_class], + cfq_group_busy_queues_wl(cfqd->serving_wl_class, cfqd, + cfqg)); - if (cfqd->serving_type == ASYNC_WORKLOAD) { + if (cfqd->serving_wl_type == ASYNC_WORKLOAD) { unsigned int tmp; /* @@ -2617,14 +3004,14 @@ static void cfq_choose_cfqg(struct cfq_data *cfqd) cfqd->serving_group = cfqg; /* Restore the workload type data */ - if (cfqg->saved_workload_slice) { - cfqd->workload_expires = jiffies + cfqg->saved_workload_slice; - cfqd->serving_type = cfqg->saved_workload; - cfqd->serving_prio = cfqg->saved_serving_prio; + if (cfqg->saved_wl_slice) { + cfqd->workload_expires = jiffies + cfqg->saved_wl_slice; + cfqd->serving_wl_type = cfqg->saved_wl_type; + cfqd->serving_wl_class = cfqg->saved_wl_class; } else cfqd->workload_expires = jiffies - 1; - choose_service_tree(cfqd, cfqg); + choose_wl_class_and_type(cfqd, cfqg); } /* @@ -3206,6 +3593,8 @@ retry: spin_lock_irq(cfqd->queue->queue_lock); if (new_cfqq) goto retry; + else + return &cfqd->oom_cfqq; } else { cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask | __GFP_ZERO, @@ -3403,7 +3792,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, return true; /* Allow preemption only if we are idling on sync-noidle tree */ - if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD && + if (cfqd->serving_wl_type == SYNC_NOIDLE_WORKLOAD && cfqq_type(new_cfqq) == SYNC_NOIDLE_WORKLOAD && new_cfqq->service_tree->count == 2 && RB_EMPTY_ROOT(&cfqq->sort_list)) @@ -3455,7 +3844,7 @@ static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) * doesn't happen */ if (old_type != cfqq_type(cfqq)) - cfqq->cfqg->saved_workload_slice = 0; + cfqq->cfqg->saved_wl_slice = 0; /* * Put the new queue at the front of the of the current list, @@ -3637,16 +4026,17 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; if (sync) { - struct cfq_rb_root *service_tree; + struct cfq_rb_root *st; RQ_CIC(rq)->ttime.last_end_request = now; if (cfq_cfqq_on_rr(cfqq)) - service_tree = cfqq->service_tree; + st = cfqq->service_tree; else - service_tree = service_tree_for(cfqq->cfqg, - cfqq_prio(cfqq), cfqq_type(cfqq)); - service_tree->ttime.last_end_request = now; + st = st_for(cfqq->cfqg, cfqq_class(cfqq), + cfqq_type(cfqq)); + + st->ttime.last_end_request = now; if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now)) cfqd->last_delayed_sync = now; } @@ -3993,6 +4383,7 @@ static int cfq_init_queue(struct request_queue *q) cfq_init_cfqg_base(cfqd->root_group); #endif cfqd->root_group->weight = 2 * CFQ_WEIGHT_DEFAULT; + cfqd->root_group->leaf_weight = 2 * CFQ_WEIGHT_DEFAULT; /* * Not strictly needed (since RB_ROOT just clears the node and we @@ -4177,6 +4568,7 @@ static struct blkcg_policy blkcg_policy_cfq = { .cftypes = cfq_blkcg_files, .pd_init_fn = cfq_pd_init, + .pd_offline_fn = cfq_pd_offline, .pd_reset_stats_fn = cfq_pd_reset_stats, }; #endif diff --git a/block/elevator.c b/block/elevator.c index 603b2c178740..a0ffdd943c98 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -46,11 +46,6 @@ static LIST_HEAD(elv_list); /* * Merge hash stuff. */ -static const int elv_hash_shift = 6; -#define ELV_HASH_BLOCK(sec) ((sec) >> 3) -#define ELV_HASH_FN(sec) \ - (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift)) -#define ELV_HASH_ENTRIES (1 << elv_hash_shift) #define rq_hash_key(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq)) /* @@ -158,7 +153,6 @@ static struct elevator_queue *elevator_alloc(struct request_queue *q, struct elevator_type *e) { struct elevator_queue *eq; - int i; eq = kmalloc_node(sizeof(*eq), GFP_KERNEL | __GFP_ZERO, q->node); if (unlikely(!eq)) @@ -167,14 +161,7 @@ static struct elevator_queue *elevator_alloc(struct request_queue *q, eq->type = e; kobject_init(&eq->kobj, &elv_ktype); mutex_init(&eq->sysfs_lock); - - eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, - GFP_KERNEL, q->node); - if (!eq->hash) - goto err; - - for (i = 0; i < ELV_HASH_ENTRIES; i++) - INIT_HLIST_HEAD(&eq->hash[i]); + hash_init(eq->hash); return eq; err: @@ -189,7 +176,6 @@ static void elevator_release(struct kobject *kobj) e = container_of(kobj, struct elevator_queue, kobj); elevator_put(e->type); - kfree(e->hash); kfree(e); } @@ -261,7 +247,7 @@ EXPORT_SYMBOL(elevator_exit); static inline void __elv_rqhash_del(struct request *rq) { - hlist_del_init(&rq->hash); + hash_del(&rq->hash); } static void elv_rqhash_del(struct request_queue *q, struct request *rq) @@ -275,7 +261,7 @@ static void elv_rqhash_add(struct request_queue *q, struct request *rq) struct elevator_queue *e = q->elevator; BUG_ON(ELV_ON_HASH(rq)); - hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]); + hash_add(e->hash, &rq->hash, rq_hash_key(rq)); } static void elv_rqhash_reposition(struct request_queue *q, struct request *rq) @@ -287,11 +273,10 @@ static void elv_rqhash_reposition(struct request_queue *q, struct request *rq) static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset) { struct elevator_queue *e = q->elevator; - struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; - struct hlist_node *entry, *next; + struct hlist_node *next; struct request *rq; - hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) { + hash_for_each_possible_safe(e->hash, rq, next, hash, offset) { BUG_ON(!ELV_ON_HASH(rq)); if (unlikely(!rq_mergeable(rq))) { diff --git a/block/genhd.c b/block/genhd.c index 5f73c2435fde..3c001fba80c7 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -26,7 +26,7 @@ static DEFINE_MUTEX(block_class_lock); struct kobject *block_depr; /* for extended dynamic devt allocation, currently only one major is used */ -#define MAX_EXT_DEVT (1 << MINORBITS) +#define NR_EXT_DEVT (1 << MINORBITS) /* For extended devt allocation. ext_devt_mutex prevents look up * results from going away underneath its user. @@ -411,7 +411,7 @@ static int blk_mangle_minor(int minor) int blk_alloc_devt(struct hd_struct *part, dev_t *devt) { struct gendisk *disk = part_to_disk(part); - int idx, rc; + int idx; /* in consecutive minor range? */ if (part->partno < disk->minors) { @@ -420,19 +420,11 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt) } /* allocate ext devt */ - do { - if (!idr_pre_get(&ext_devt_idr, GFP_KERNEL)) - return -ENOMEM; - rc = idr_get_new(&ext_devt_idr, part, &idx); - } while (rc == -EAGAIN); - - if (rc) - return rc; - - if (idx > MAX_EXT_DEVT) { - idr_remove(&ext_devt_idr, idx); - return -EBUSY; - } + mutex_lock(&ext_devt_mutex); + idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_KERNEL); + mutex_unlock(&ext_devt_mutex); + if (idx < 0) + return idx == -ENOSPC ? -EBUSY : idx; *devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx)); return 0; @@ -655,7 +647,6 @@ void del_gendisk(struct gendisk *disk) disk_part_iter_exit(&piter); invalidate_partition(disk, 0); - blk_free_devt(disk_to_dev(disk)->devt); set_capacity(disk, 0); disk->flags &= ~GENHD_FL_UP; @@ -674,6 +665,7 @@ void del_gendisk(struct gendisk *disk) sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); device_del(disk_to_dev(disk)); + blk_free_devt(disk_to_dev(disk)->devt); } EXPORT_SYMBOL(del_gendisk); diff --git a/block/partition-generic.c b/block/partition-generic.c index f1d14519cc04..789cdea05893 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -249,11 +249,11 @@ void delete_partition(struct gendisk *disk, int partno) if (!part) return; - blk_free_devt(part_devt(part)); rcu_assign_pointer(ptbl->part[partno], NULL); rcu_assign_pointer(ptbl->last_lookup, NULL); kobject_put(part->holder_dir); device_del(part_to_dev(part)); + blk_free_devt(part_devt(part)); hd_struct_put(part); } @@ -418,7 +418,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) int p, highest, res; rescan: if (state && !IS_ERR(state)) { - kfree(state); + free_partitions(state); state = NULL; } @@ -525,7 +525,7 @@ rescan: md_autodetect_dev(part_to_dev(part)->devt); #endif } - kfree(state); + free_partitions(state); return 0; } diff --git a/block/partitions/check.c b/block/partitions/check.c index bc908672c976..19ba207ea7d1 100644 --- a/block/partitions/check.c +++ b/block/partitions/check.c @@ -14,6 +14,7 @@ */ #include <linux/slab.h> +#include <linux/vmalloc.h> #include <linux/ctype.h> #include <linux/genhd.h> @@ -106,18 +107,45 @@ static int (*check_part[])(struct parsed_partitions *) = { NULL }; +static struct parsed_partitions *allocate_partitions(struct gendisk *hd) +{ + struct parsed_partitions *state; + int nr; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return NULL; + + nr = disk_max_parts(hd); + state->parts = vzalloc(nr * sizeof(state->parts[0])); + if (!state->parts) { + kfree(state); + return NULL; + } + + state->limit = nr; + + return state; +} + +void free_partitions(struct parsed_partitions *state) +{ + vfree(state->parts); + kfree(state); +} + struct parsed_partitions * check_partition(struct gendisk *hd, struct block_device *bdev) { struct parsed_partitions *state; int i, res, err; - state = kzalloc(sizeof(struct parsed_partitions), GFP_KERNEL); + state = allocate_partitions(hd); if (!state) return NULL; state->pp_buf = (char *)__get_free_page(GFP_KERNEL); if (!state->pp_buf) { - kfree(state); + free_partitions(state); return NULL; } state->pp_buf[0] = '\0'; @@ -128,10 +156,9 @@ check_partition(struct gendisk *hd, struct block_device *bdev) if (isdigit(state->name[strlen(state->name)-1])) sprintf(state->name, "p"); - state->limit = disk_max_parts(hd); i = res = err = 0; while (!res && check_part[i]) { - memset(&state->parts, 0, sizeof(state->parts)); + memset(state->parts, 0, state->limit * sizeof(state->parts[0])); res = check_part[i++](state); if (res < 0) { /* We have hit an I/O error which we don't report now. @@ -161,6 +188,6 @@ check_partition(struct gendisk *hd, struct block_device *bdev) printk(KERN_INFO "%s", state->pp_buf); free_page((unsigned long)state->pp_buf); - kfree(state); + free_partitions(state); return ERR_PTR(res); } diff --git a/block/partitions/check.h b/block/partitions/check.h index 52b100311ec3..eade17ea910b 100644 --- a/block/partitions/check.h +++ b/block/partitions/check.h @@ -15,13 +15,15 @@ struct parsed_partitions { int flags; bool has_info; struct partition_meta_info info; - } parts[DISK_MAX_PARTS]; + } *parts; int next; int limit; bool access_beyond_eod; char *pp_buf; }; +void free_partitions(struct parsed_partitions *state); + struct parsed_partitions * check_partition(struct gendisk *, struct block_device *); diff --git a/block/partitions/efi.c b/block/partitions/efi.c index b62fb88b8711..ff5804e2f1d2 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c @@ -310,15 +310,23 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba, goto fail; } - /* Check the GUID Partition Table header size */ + /* Check the GUID Partition Table header size is too big */ if (le32_to_cpu((*gpt)->header_size) > bdev_logical_block_size(state->bdev)) { - pr_debug("GUID Partition Table Header size is wrong: %u > %u\n", + pr_debug("GUID Partition Table Header size is too large: %u > %u\n", le32_to_cpu((*gpt)->header_size), bdev_logical_block_size(state->bdev)); goto fail; } + /* Check the GUID Partition Table header size is too small */ + if (le32_to_cpu((*gpt)->header_size) < sizeof(gpt_header)) { + pr_debug("GUID Partition Table Header size is too small: %u < %zu\n", + le32_to_cpu((*gpt)->header_size), + sizeof(gpt_header)); + goto fail; + } + /* Check the GUID Partition Table CRC */ origcrc = le32_to_cpu((*gpt)->header_crc32); (*gpt)->header_crc32 = 0; diff --git a/block/partitions/mac.c b/block/partitions/mac.c index 11f688bd76c5..76d8ba6379a9 100644 --- a/block/partitions/mac.c +++ b/block/partitions/mac.c @@ -63,6 +63,10 @@ int mac_partition(struct parsed_partitions *state) put_dev_sector(sect); return 0; } + + if (blocks_in_map >= state->limit) + blocks_in_map = state->limit - 1; + strlcat(state->pp_buf, " [mac]", PAGE_SIZE); for (slot = 1; slot <= blocks_in_map; ++slot) { int pos = slot * secsize; diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c index 8752a5d26565..7681cd295ab8 100644 --- a/block/partitions/msdos.c +++ b/block/partitions/msdos.c @@ -455,14 +455,19 @@ int msdos_partition(struct parsed_partitions *state) data = read_part_sector(state, 0, §); if (!data) return -1; - if (!msdos_magic_present(data + 510)) { + + /* + * Note order! (some AIX disks, e.g. unbootable kind, + * have no MSDOS 55aa) + */ + if (aix_magic_present(state, data)) { put_dev_sector(sect); + strlcat(state->pp_buf, " [AIX]", PAGE_SIZE); return 0; } - if (aix_magic_present(state, data)) { + if (!msdos_magic_present(data + 510)) { put_dev_sector(sect); - strlcat(state->pp_buf, " [AIX]", PAGE_SIZE); return 0; } diff --git a/crypto/algapi.c b/crypto/algapi.c index 08c57c8aec95..6149a6e09643 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -447,7 +447,7 @@ EXPORT_SYMBOL_GPL(crypto_register_template); void crypto_unregister_template(struct crypto_template *tmpl) { struct crypto_instance *inst; - struct hlist_node *p, *n; + struct hlist_node *n; struct hlist_head *list; LIST_HEAD(users); @@ -457,7 +457,7 @@ void crypto_unregister_template(struct crypto_template *tmpl) list_del_init(&tmpl->list); list = &tmpl->instances; - hlist_for_each_entry(inst, p, list, list) { + hlist_for_each_entry(inst, list, list) { int err = crypto_remove_alg(&inst->alg, &users); BUG_ON(err); } @@ -466,7 +466,7 @@ void crypto_unregister_template(struct crypto_template *tmpl) up_write(&crypto_alg_sem); - hlist_for_each_entry_safe(inst, p, n, list, list) { + hlist_for_each_entry_safe(inst, n, list, list) { BUG_ON(atomic_read(&inst->alg.cra_refcnt) != 1); tmpl->free(inst); } diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 320006019e68..92ed9692c47e 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -266,7 +266,8 @@ config ACPI_CUSTOM_DSDT default ACPI_CUSTOM_DSDT_FILE != "" config ACPI_INITRD_TABLE_OVERRIDE - bool "ACPI tables can be passed via uncompressed cpio in initrd" + bool "ACPI tables override via initrd" + depends on BLK_DEV_INITRD && X86 default n help This option provides functionality to override arbitrary ACPI tables diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c index b22d71cac54c..0e3f8f9dcd29 100644 --- a/drivers/atm/atmtcp.c +++ b/drivers/atm/atmtcp.c @@ -157,7 +157,6 @@ static int atmtcp_v_ioctl(struct atm_dev *dev,unsigned int cmd,void __user *arg) { struct atm_cirange ci; struct atm_vcc *vcc; - struct hlist_node *node; struct sock *s; int i; @@ -171,7 +170,7 @@ static int atmtcp_v_ioctl(struct atm_dev *dev,unsigned int cmd,void __user *arg) for(i = 0; i < VCC_HTABLE_SIZE; ++i) { struct hlist_head *head = &vcc_hash[i]; - sk_for_each(s, node, head) { + sk_for_each(s, head) { vcc = atm_sk(s); if (vcc->dev != dev) continue; @@ -264,12 +263,11 @@ static struct atm_vcc *find_vcc(struct atm_dev *dev, short vpi, int vci) { struct hlist_head *head; struct atm_vcc *vcc; - struct hlist_node *node; struct sock *s; head = &vcc_hash[vci & (VCC_HTABLE_SIZE -1)]; - sk_for_each(s, node, head) { + sk_for_each(s, head) { vcc = atm_sk(s); if (vcc->dev == dev && vcc->vci == vci && vcc->vpi == vpi && diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index c1eb6fa8ac35..b1955ba40d63 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -2093,7 +2093,6 @@ static unsigned char eni_phy_get(struct atm_dev *dev,unsigned long addr) static int eni_proc_read(struct atm_dev *dev,loff_t *pos,char *page) { - struct hlist_node *node; struct sock *s; static const char *signal[] = { "LOST","unknown","okay" }; struct eni_dev *eni_dev = ENI_DEV(dev); @@ -2171,7 +2170,7 @@ static int eni_proc_read(struct atm_dev *dev,loff_t *pos,char *page) for(i = 0; i < VCC_HTABLE_SIZE; ++i) { struct hlist_head *head = &vcc_hash[i]; - sk_for_each(s, node, head) { + sk_for_each(s, head) { struct eni_vcc *eni_vcc; int length; diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 72b6960fa95f..d6891267f5bb 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -329,7 +329,6 @@ __find_vcc(struct he_dev *he_dev, unsigned cid) { struct hlist_head *head; struct atm_vcc *vcc; - struct hlist_node *node; struct sock *s; short vpi; int vci; @@ -338,7 +337,7 @@ __find_vcc(struct he_dev *he_dev, unsigned cid) vci = cid & ((1 << he_dev->vcibits) - 1); head = &vcc_hash[vci & (VCC_HTABLE_SIZE -1)]; - sk_for_each(s, node, head) { + sk_for_each(s, head) { vcc = atm_sk(s); if (vcc->dev == he_dev->atm_dev && vcc->vci == vci && vcc->vpi == vpi && diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index ed1d2b7f923b..6587dc295eb0 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -251,7 +251,6 @@ static void nicstar_remove_one(struct pci_dev *pcidev) if (card->scd2vc[j] != NULL) free_scq(card, card->scd2vc[j]->scq, card->scd2vc[j]->tx_vcc); } - idr_remove_all(&card->idr); idr_destroy(&card->idr); pci_free_consistent(card->pcidev, NS_RSQSIZE + NS_RSQ_ALIGNMENT, card->rsq.org, card->rsq.dma); @@ -950,11 +949,10 @@ static void free_scq(ns_dev *card, scq_info *scq, struct atm_vcc *vcc) static void push_rxbufs(ns_dev * card, struct sk_buff *skb) { struct sk_buff *handle1, *handle2; - u32 id1 = 0, id2 = 0; + int id1, id2; u32 addr1, addr2; u32 stat; unsigned long flags; - int err; /* *BARF* */ handle2 = NULL; @@ -1027,23 +1025,12 @@ static void push_rxbufs(ns_dev * card, struct sk_buff *skb) card->lbfqc += 2; } - do { - if (!idr_pre_get(&card->idr, GFP_ATOMIC)) { - printk(KERN_ERR - "nicstar%d: no free memory for idr\n", - card->index); - goto out; - } - - if (!id1) - err = idr_get_new_above(&card->idr, handle1, 0, &id1); - - if (!id2 && err == 0) - err = idr_get_new_above(&card->idr, handle2, 0, &id2); - - } while (err == -EAGAIN); + id1 = idr_alloc(&card->idr, handle1, 0, 0, GFP_ATOMIC); + if (id1 < 0) + goto out; - if (err) + id2 = idr_alloc(&card->idr, handle2, 0, 0, GFP_ATOMIC); + if (id2 < 0) goto out; spin_lock_irqsave(&card->res_lock, flags); diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index 0474a89170b9..32784d18d1f7 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c @@ -896,12 +896,11 @@ static struct atm_vcc *find_vcc(struct atm_dev *dev, short vpi, int vci) { struct hlist_head *head; struct atm_vcc *vcc = NULL; - struct hlist_node *node; struct sock *s; read_lock(&vcc_sklist_lock); head = &vcc_hash[vci & (VCC_HTABLE_SIZE -1)]; - sk_for_each(s, node, head) { + sk_for_each(s, head) { vcc = atm_sk(s); if (vcc->dev == dev && vcc->vci == vci && vcc->vpi == vpi && vcc->qos.rxtp.traffic_class != ATM_NONE && diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 8f12dc78a848..5b5ee79ff236 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -7054,6 +7054,7 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request, else ErrorCode = 0; } + break; default: ErrorCode = -ENOTTY; } diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 824e09c4d0d7..5dc0daed8fac 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -63,19 +63,6 @@ config AMIGA_Z2RAM To compile this driver as a module, choose M here: the module will be called z2ram. -config BLK_DEV_XD - tristate "XT hard disk support" - depends on ISA && ISA_DMA_API - select CHECK_SIGNATURE - help - Very old 8 bit hard disk controllers used in the IBM XT computer - will be supported if you say Y here. - - To compile this driver as a module, choose M here: the - module will be called xd. - - It's pretty unlikely that you have one of these: say N. - config GDROM tristate "SEGA Dreamcast GD-ROM drive" depends on SH_DREAMCAST @@ -544,4 +531,14 @@ config BLK_DEV_RBD If unsure, say N. +config BLK_DEV_RSXX + tristate "RamSam PCIe Flash SSD Device Driver" + depends on PCI + help + Device driver for IBM's high speed PCIe SSD + storage devices: RamSan-70 and RamSan-80. + + To compile this driver as a module, choose M here: the + module will be called rsxx. + endif # BLK_DEV diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 17e82df3df74..a3b40232c6ab 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -15,7 +15,6 @@ obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o obj-$(CONFIG_BLK_DEV_RAM) += brd.o obj-$(CONFIG_BLK_DEV_LOOP) += loop.o -obj-$(CONFIG_BLK_DEV_XD) += xd.o obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o @@ -41,4 +40,6 @@ obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ obj-$(CONFIG_BLK_DEV_RBD) += rbd.o obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ +obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ + swim_mod-y := swim.o swim_asm.o diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 8c13eeb83c53..e98da675f0c1 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2660,25 +2660,24 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, mdev->read_requests = RB_ROOT; mdev->write_requests = RB_ROOT; - if (!idr_pre_get(&minors, GFP_KERNEL)) - goto out_no_minor_idr; - if (idr_get_new_above(&minors, mdev, minor, &minor_got)) + minor_got = idr_alloc(&minors, mdev, minor, minor + 1, GFP_KERNEL); + if (minor_got < 0) { + if (minor_got == -ENOSPC) { + err = ERR_MINOR_EXISTS; + drbd_msg_put_info("requested minor exists already"); + } goto out_no_minor_idr; - if (minor_got != minor) { - err = ERR_MINOR_EXISTS; - drbd_msg_put_info("requested minor exists already"); - goto out_idr_remove_minor; } - if (!idr_pre_get(&tconn->volumes, GFP_KERNEL)) - goto out_idr_remove_minor; - if (idr_get_new_above(&tconn->volumes, mdev, vnr, &vnr_got)) + vnr_got = idr_alloc(&tconn->volumes, mdev, vnr, vnr + 1, GFP_KERNEL); + if (vnr_got < 0) { + if (vnr_got == -ENOSPC) { + err = ERR_INVALID_REQUEST; + drbd_msg_put_info("requested volume exists already"); + } goto out_idr_remove_minor; - if (vnr_got != vnr) { - err = ERR_INVALID_REQUEST; - drbd_msg_put_info("requested volume exists already"); - goto out_idr_remove_vol; } + add_disk(disk); kref_init(&mdev->kref); /* one ref for both idrs and the the add_disk */ @@ -2689,8 +2688,6 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, return NO_ERROR; -out_idr_remove_vol: - idr_remove(&tconn->volumes, vnr_got); out_idr_remove_minor: idr_remove(&minors, minor_got); synchronize_rcu(); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 8031a8cdd698..747bb2af69dc 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -162,12 +162,13 @@ static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = { static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file) { - loff_t size, loopsize; + loff_t loopsize; /* Compute loopsize in bytes */ - size = i_size_read(file->f_mapping->host); - loopsize = size - offset; - /* offset is beyond i_size, wierd but possible */ + loopsize = i_size_read(file->f_mapping->host); + if (offset > 0) + loopsize -= offset; + /* offset is beyond i_size, weird but possible */ if (loopsize < 0) return 0; @@ -190,6 +191,7 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) { loff_t size = get_size(offset, sizelimit, lo->lo_backing_file); sector_t x = (sector_t)size; + struct block_device *bdev = lo->lo_device; if (unlikely((loff_t)x != size)) return -EFBIG; @@ -198,6 +200,9 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) if (lo->lo_sizelimit != sizelimit) lo->lo_sizelimit = sizelimit; set_capacity(lo->lo_disk, x); + bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9); + /* let user-space know about the new size */ + kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); return 0; } @@ -1091,10 +1096,10 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) return err; if (lo->lo_offset != info->lo_offset || - lo->lo_sizelimit != info->lo_sizelimit) { + lo->lo_sizelimit != info->lo_sizelimit) if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) return -EFBIG; - } + loop_config_discard(lo); memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); @@ -1271,28 +1276,10 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) { static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev) { - int err; - sector_t sec; - loff_t sz; - - err = -ENXIO; if (unlikely(lo->lo_state != Lo_bound)) - goto out; - err = figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); - if (unlikely(err)) - goto out; - sec = get_capacity(lo->lo_disk); - /* the width of sector_t may be narrow for bit-shift */ - sz = sec; - sz <<= 9; - mutex_lock(&bdev->bd_mutex); - bd_set_size(bdev, sz); - /* let user-space know about the new size */ - kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); - mutex_unlock(&bdev->bd_mutex); + return -ENXIO; - out: - return err; + return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); } static int lo_ioctl(struct block_device *bdev, fmode_t mode, @@ -1624,30 +1611,17 @@ static int loop_add(struct loop_device **l, int i) if (!lo) goto out; - if (!idr_pre_get(&loop_index_idr, GFP_KERNEL)) - goto out_free_dev; - + /* allocate id, if @id >= 0, we're requesting that specific id */ if (i >= 0) { - int m; - - /* create specific i in the index */ - err = idr_get_new_above(&loop_index_idr, lo, i, &m); - if (err >= 0 && i != m) { - idr_remove(&loop_index_idr, m); + err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_KERNEL); + if (err == -ENOSPC) err = -EEXIST; - } - } else if (i == -1) { - int m; - - /* get next free nr */ - err = idr_get_new(&loop_index_idr, lo, &m); - if (err >= 0) - i = m; } else { - err = -EINVAL; + err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL); } if (err < 0) goto out_free_dev; + i = err; lo->lo_queue = blk_alloc_queue(GFP_KERNEL); if (!lo->lo_queue) @@ -1858,11 +1832,15 @@ static int __init loop_init(void) max_part = (1UL << part_shift) - 1; } - if ((1UL << part_shift) > DISK_MAX_PARTS) - return -EINVAL; + if ((1UL << part_shift) > DISK_MAX_PARTS) { + err = -EINVAL; + goto misc_out; + } - if (max_loop > 1UL << (MINORBITS - part_shift)) - return -EINVAL; + if (max_loop > 1UL << (MINORBITS - part_shift)) { + err = -EINVAL; + goto misc_out; + } /* * If max_loop is specified, create that many devices upfront. @@ -1880,8 +1858,10 @@ static int __init loop_init(void) range = 1UL << MINORBITS; } - if (register_blkdev(LOOP_MAJOR, "loop")) - return -EIO; + if (register_blkdev(LOOP_MAJOR, "loop")) { + err = -EIO; + goto misc_out; + } blk_register_region(MKDEV(LOOP_MAJOR, 0), range, THIS_MODULE, loop_probe, NULL, NULL); @@ -1894,6 +1874,10 @@ static int __init loop_init(void) printk(KERN_INFO "loop: module loaded\n"); return 0; + +misc_out: + misc_deregister(&loop_misc); + return err; } static int loop_exit_cb(int id, void *ptr, void *data) @@ -1911,7 +1895,6 @@ static void __exit loop_exit(void) range = max_loop ? max_loop << part_shift : 1UL << MINORBITS; idr_for_each(&loop_index_idr, &loop_exit_cb, NULL); - idr_remove_all(&loop_index_idr); idr_destroy(&loop_index_idr); blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); diff --git a/drivers/block/mtip32xx/Kconfig b/drivers/block/mtip32xx/Kconfig index 0ba837fc62a8..1fca1f996b45 100644 --- a/drivers/block/mtip32xx/Kconfig +++ b/drivers/block/mtip32xx/Kconfig @@ -4,6 +4,6 @@ config BLK_DEV_PCIESSD_MTIP32XX tristate "Block Device Driver for Micron PCIe SSDs" - depends on PCI + depends on PCI && GENERIC_HARDIRQS help This enables the block driver for Micron PCIe SSDs. diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 3fd100990453..11cc9522cdd4 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -88,6 +88,8 @@ static int instance; static int mtip_major; static struct dentry *dfs_parent; +static u32 cpu_use[NR_CPUS]; + static DEFINE_SPINLOCK(rssd_index_lock); static DEFINE_IDA(rssd_index_ida); @@ -296,16 +298,17 @@ static int hba_reset_nosleep(struct driver_data *dd) */ static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag) { - atomic_set(&port->commands[tag].active, 1); + int group = tag >> 5; - spin_lock(&port->cmd_issue_lock); + atomic_set(&port->commands[tag].active, 1); + /* guard SACT and CI registers */ + spin_lock(&port->cmd_issue_lock[group]); writel((1 << MTIP_TAG_BIT(tag)), port->s_active[MTIP_TAG_INDEX(tag)]); writel((1 << MTIP_TAG_BIT(tag)), port->cmd_issue[MTIP_TAG_INDEX(tag)]); - - spin_unlock(&port->cmd_issue_lock); + spin_unlock(&port->cmd_issue_lock[group]); /* Set the command's timeout value.*/ port->commands[tag].comp_time = jiffies + msecs_to_jiffies( @@ -964,56 +967,56 @@ handle_tfe_exit: /* * Handle a set device bits interrupt */ -static inline void mtip_process_sdbf(struct driver_data *dd) +static inline void mtip_workq_sdbfx(struct mtip_port *port, int group, + u32 completed) { - struct mtip_port *port = dd->port; - int group, tag, bit; - u32 completed; + struct driver_data *dd = port->dd; + int tag, bit; struct mtip_cmd *command; - /* walk all bits in all slot groups */ - for (group = 0; group < dd->slot_groups; group++) { - completed = readl(port->completed[group]); - if (!completed) - continue; + if (!completed) { + WARN_ON_ONCE(!completed); + return; + } + /* clear completed status register in the hardware.*/ + writel(completed, port->completed[group]); - /* clear completed status register in the hardware.*/ - writel(completed, port->completed[group]); + /* Process completed commands. */ + for (bit = 0; (bit < 32) && completed; bit++) { + if (completed & 0x01) { + tag = (group << 5) | bit; - /* Process completed commands. */ - for (bit = 0; - (bit < 32) && completed; - bit++, completed >>= 1) { - if (completed & 0x01) { - tag = (group << 5) | bit; + /* skip internal command slot. */ + if (unlikely(tag == MTIP_TAG_INTERNAL)) + continue; - /* skip internal command slot. */ - if (unlikely(tag == MTIP_TAG_INTERNAL)) - continue; + command = &port->commands[tag]; + /* make internal callback */ + if (likely(command->comp_func)) { + command->comp_func( + port, + tag, + command->comp_data, + 0); + } else { + dev_warn(&dd->pdev->dev, + "Null completion " + "for tag %d", + tag); - command = &port->commands[tag]; - /* make internal callback */ - if (likely(command->comp_func)) { - command->comp_func( - port, - tag, - command->comp_data, - 0); - } else { - dev_warn(&dd->pdev->dev, - "Null completion " - "for tag %d", - tag); - - if (mtip_check_surprise_removal( - dd->pdev)) { - mtip_command_cleanup(dd); - return; - } + if (mtip_check_surprise_removal( + dd->pdev)) { + mtip_command_cleanup(dd); + return; } } } + completed >>= 1; } + + /* If last, re-enable interrupts */ + if (atomic_dec_return(&dd->irq_workers_active) == 0) + writel(0xffffffff, dd->mmio + HOST_IRQ_STAT); } /* @@ -1072,6 +1075,8 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data) struct mtip_port *port = dd->port; u32 hba_stat, port_stat; int rv = IRQ_NONE; + int do_irq_enable = 1, i, workers; + struct mtip_work *twork; hba_stat = readl(dd->mmio + HOST_IRQ_STAT); if (hba_stat) { @@ -1082,8 +1087,42 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data) writel(port_stat, port->mmio + PORT_IRQ_STAT); /* Demux port status */ - if (likely(port_stat & PORT_IRQ_SDB_FIS)) - mtip_process_sdbf(dd); + if (likely(port_stat & PORT_IRQ_SDB_FIS)) { + do_irq_enable = 0; + WARN_ON_ONCE(atomic_read(&dd->irq_workers_active) != 0); + + /* Start at 1: group zero is always local? */ + for (i = 0, workers = 0; i < MTIP_MAX_SLOT_GROUPS; + i++) { + twork = &dd->work[i]; + twork->completed = readl(port->completed[i]); + if (twork->completed) + workers++; + } + + atomic_set(&dd->irq_workers_active, workers); + if (workers) { + for (i = 1; i < MTIP_MAX_SLOT_GROUPS; i++) { + twork = &dd->work[i]; + if (twork->completed) + queue_work_on( + twork->cpu_binding, + dd->isr_workq, + &twork->work); + } + + if (likely(dd->work[0].completed)) + mtip_workq_sdbfx(port, 0, + dd->work[0].completed); + + } else { + /* + * Chip quirk: SDB interrupt but nothing + * to complete + */ + do_irq_enable = 1; + } + } if (unlikely(port_stat & PORT_IRQ_ERR)) { if (unlikely(mtip_check_surprise_removal(dd->pdev))) { @@ -1103,21 +1142,13 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data) } /* acknowledge interrupt */ - writel(hba_stat, dd->mmio + HOST_IRQ_STAT); + if (unlikely(do_irq_enable)) + writel(hba_stat, dd->mmio + HOST_IRQ_STAT); return rv; } /* - * Wrapper for mtip_handle_irq - * (ignores return code) - */ -static void mtip_tasklet(unsigned long data) -{ - mtip_handle_irq((struct driver_data *) data); -} - -/* * HBA interrupt subroutine. * * @irq IRQ number. @@ -1130,8 +1161,8 @@ static void mtip_tasklet(unsigned long data) static irqreturn_t mtip_irq_handler(int irq, void *instance) { struct driver_data *dd = instance; - tasklet_schedule(&dd->tasklet); - return IRQ_HANDLED; + + return mtip_handle_irq(dd); } static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag) @@ -1489,6 +1520,12 @@ static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer) } #endif + /* Demux ID.DRAT & ID.RZAT to determine trim support */ + if (port->identify[69] & (1 << 14) && port->identify[69] & (1 << 5)) + port->dd->trim_supp = true; + else + port->dd->trim_supp = false; + /* Set the identify buffer as valid. */ port->identify_valid = 1; @@ -1676,6 +1713,81 @@ static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id, } /* + * Trim unused sectors + * + * @dd pointer to driver_data structure + * @lba starting lba + * @len # of 512b sectors to trim + * + * return value + * -ENOMEM Out of dma memory + * -EINVAL Invalid parameters passed in, trim not supported + * -EIO Error submitting trim request to hw + */ +static int mtip_send_trim(struct driver_data *dd, unsigned int lba, unsigned int len) +{ + int i, rv = 0; + u64 tlba, tlen, sect_left; + struct mtip_trim_entry *buf; + dma_addr_t dma_addr; + struct host_to_dev_fis fis; + + if (!len || dd->trim_supp == false) + return -EINVAL; + + /* Trim request too big */ + WARN_ON(len > (MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES)); + + /* Trim request not aligned on 4k boundary */ + WARN_ON(len % 8 != 0); + + /* Warn if vu_trim structure is too big */ + WARN_ON(sizeof(struct mtip_trim) > ATA_SECT_SIZE); + + /* Allocate a DMA buffer for the trim structure */ + buf = dmam_alloc_coherent(&dd->pdev->dev, ATA_SECT_SIZE, &dma_addr, + GFP_KERNEL); + if (!buf) + return -ENOMEM; + memset(buf, 0, ATA_SECT_SIZE); + + for (i = 0, sect_left = len, tlba = lba; + i < MTIP_MAX_TRIM_ENTRIES && sect_left; + i++) { + tlen = (sect_left >= MTIP_MAX_TRIM_ENTRY_LEN ? + MTIP_MAX_TRIM_ENTRY_LEN : + sect_left); + buf[i].lba = __force_bit2int cpu_to_le32(tlba); + buf[i].range = __force_bit2int cpu_to_le16(tlen); + tlba += tlen; + sect_left -= tlen; + } + WARN_ON(sect_left != 0); + + /* Build the fis */ + memset(&fis, 0, sizeof(struct host_to_dev_fis)); + fis.type = 0x27; + fis.opts = 1 << 7; + fis.command = 0xfb; + fis.features = 0x60; + fis.sect_count = 1; + fis.device = ATA_DEVICE_OBS; + + if (mtip_exec_internal_command(dd->port, + &fis, + 5, + dma_addr, + ATA_SECT_SIZE, + 0, + GFP_KERNEL, + MTIP_TRIM_TIMEOUT_MS) < 0) + rv = -EIO; + + dmam_free_coherent(&dd->pdev->dev, ATA_SECT_SIZE, buf, dma_addr); + return rv; +} + +/* * Get the drive capacity. * * @dd Pointer to the device data structure. @@ -3005,20 +3117,24 @@ static int mtip_hw_init(struct driver_data *dd) hba_setup(dd); - tasklet_init(&dd->tasklet, mtip_tasklet, (unsigned long)dd); - - dd->port = kzalloc(sizeof(struct mtip_port), GFP_KERNEL); + dd->port = kzalloc_node(sizeof(struct mtip_port), GFP_KERNEL, + dd->numa_node); if (!dd->port) { dev_err(&dd->pdev->dev, "Memory allocation: port structure\n"); return -ENOMEM; } + /* Continue workqueue setup */ + for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++) + dd->work[i].port = dd->port; + /* Counting semaphore to track command slot usage */ sema_init(&dd->port->cmd_slot, num_command_slots - 1); /* Spinlock to prevent concurrent issue */ - spin_lock_init(&dd->port->cmd_issue_lock); + for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++) + spin_lock_init(&dd->port->cmd_issue_lock[i]); /* Set the port mmio base address. */ dd->port->mmio = dd->mmio + PORT_OFFSET; @@ -3165,6 +3281,7 @@ static int mtip_hw_init(struct driver_data *dd) "Unable to allocate IRQ %d\n", dd->pdev->irq); goto out2; } + irq_set_affinity_hint(dd->pdev->irq, get_cpu_mask(dd->isr_binding)); /* Enable interrupts on the HBA. */ writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN, @@ -3241,7 +3358,8 @@ out3: writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN, dd->mmio + HOST_CTL); - /*Release the IRQ. */ + /* Release the IRQ. */ + irq_set_affinity_hint(dd->pdev->irq, NULL); devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd); out2: @@ -3291,11 +3409,9 @@ static int mtip_hw_exit(struct driver_data *dd) del_timer_sync(&dd->port->cmd_timer); /* Release the IRQ. */ + irq_set_affinity_hint(dd->pdev->irq, NULL); devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd); - /* Stop the bottom half tasklet. */ - tasklet_kill(&dd->tasklet); - /* Free the command/command header memory. */ dmam_free_coherent(&dd->pdev->dev, HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4), @@ -3641,6 +3757,12 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio) } } + if (unlikely(bio->bi_rw & REQ_DISCARD)) { + bio_endio(bio, mtip_send_trim(dd, bio->bi_sector, + bio_sectors(bio))); + return; + } + if (unlikely(!bio_has_data(bio))) { blk_queue_flush(queue, 0); bio_endio(bio, 0); @@ -3711,7 +3833,7 @@ static int mtip_block_initialize(struct driver_data *dd) goto protocol_init_error; } - dd->disk = alloc_disk(MTIP_MAX_MINORS); + dd->disk = alloc_disk_node(MTIP_MAX_MINORS, dd->numa_node); if (dd->disk == NULL) { dev_err(&dd->pdev->dev, "Unable to allocate gendisk structure\n"); @@ -3755,7 +3877,7 @@ static int mtip_block_initialize(struct driver_data *dd) skip_create_disk: /* Allocate the request queue. */ - dd->queue = blk_alloc_queue(GFP_KERNEL); + dd->queue = blk_alloc_queue_node(GFP_KERNEL, dd->numa_node); if (dd->queue == NULL) { dev_err(&dd->pdev->dev, "Unable to allocate request queue\n"); @@ -3783,6 +3905,15 @@ skip_create_disk: */ blk_queue_flush(dd->queue, 0); + /* Signal trim support */ + if (dd->trim_supp == true) { + set_bit(QUEUE_FLAG_DISCARD, &dd->queue->queue_flags); + dd->queue->limits.discard_granularity = 4096; + blk_queue_max_discard_sectors(dd->queue, + MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES); + dd->queue->limits.discard_zeroes_data = 0; + } + /* Set the capacity of the device in 512 byte sectors. */ if (!(mtip_hw_get_capacity(dd, &capacity))) { dev_warn(&dd->pdev->dev, @@ -3813,9 +3944,8 @@ skip_create_disk: start_service_thread: sprintf(thd_name, "mtip_svc_thd_%02d", index); - - dd->mtip_svc_handler = kthread_run(mtip_service_thread, - dd, thd_name); + dd->mtip_svc_handler = kthread_create_on_node(mtip_service_thread, + dd, dd->numa_node, thd_name); if (IS_ERR(dd->mtip_svc_handler)) { dev_err(&dd->pdev->dev, "service thread failed to start\n"); @@ -3823,7 +3953,7 @@ start_service_thread: rv = -EFAULT; goto kthread_run_error; } - + wake_up_process(dd->mtip_svc_handler); if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC) rv = wait_for_rebuild; @@ -3963,6 +4093,56 @@ static int mtip_block_resume(struct driver_data *dd) return 0; } +static void drop_cpu(int cpu) +{ + cpu_use[cpu]--; +} + +static int get_least_used_cpu_on_node(int node) +{ + int cpu, least_used_cpu, least_cnt; + const struct cpumask *node_mask; + + node_mask = cpumask_of_node(node); + least_used_cpu = cpumask_first(node_mask); + least_cnt = cpu_use[least_used_cpu]; + cpu = least_used_cpu; + + for_each_cpu(cpu, node_mask) { + if (cpu_use[cpu] < least_cnt) { + least_used_cpu = cpu; + least_cnt = cpu_use[cpu]; + } + } + cpu_use[least_used_cpu]++; + return least_used_cpu; +} + +/* Helper for selecting a node in round robin mode */ +static inline int mtip_get_next_rr_node(void) +{ + static int next_node = -1; + + if (next_node == -1) { + next_node = first_online_node; + return next_node; + } + + next_node = next_online_node(next_node); + if (next_node == MAX_NUMNODES) + next_node = first_online_node; + return next_node; +} + +static DEFINE_HANDLER(0); +static DEFINE_HANDLER(1); +static DEFINE_HANDLER(2); +static DEFINE_HANDLER(3); +static DEFINE_HANDLER(4); +static DEFINE_HANDLER(5); +static DEFINE_HANDLER(6); +static DEFINE_HANDLER(7); + /* * Called for each supported PCI device detected. * @@ -3977,9 +4157,25 @@ static int mtip_pci_probe(struct pci_dev *pdev, { int rv = 0; struct driver_data *dd = NULL; + char cpu_list[256]; + const struct cpumask *node_mask; + int cpu, i = 0, j = 0; + int my_node = NUMA_NO_NODE; /* Allocate memory for this devices private data. */ - dd = kzalloc(sizeof(struct driver_data), GFP_KERNEL); + my_node = pcibus_to_node(pdev->bus); + if (my_node != NUMA_NO_NODE) { + if (!node_online(my_node)) + my_node = mtip_get_next_rr_node(); + } else { + dev_info(&pdev->dev, "Kernel not reporting proximity, choosing a node\n"); + my_node = mtip_get_next_rr_node(); + } + dev_info(&pdev->dev, "NUMA node %d (closest: %d,%d, probe on %d:%d)\n", + my_node, pcibus_to_node(pdev->bus), dev_to_node(&pdev->dev), + cpu_to_node(smp_processor_id()), smp_processor_id()); + + dd = kzalloc_node(sizeof(struct driver_data), GFP_KERNEL, my_node); if (dd == NULL) { dev_err(&pdev->dev, "Unable to allocate memory for driver data\n"); @@ -4016,19 +4212,82 @@ static int mtip_pci_probe(struct pci_dev *pdev, } } - pci_set_master(pdev); + /* Copy the info we may need later into the private data structure. */ + dd->major = mtip_major; + dd->instance = instance; + dd->pdev = pdev; + dd->numa_node = my_node; + memset(dd->workq_name, 0, 32); + snprintf(dd->workq_name, 31, "mtipq%d", dd->instance); + + dd->isr_workq = create_workqueue(dd->workq_name); + if (!dd->isr_workq) { + dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance); + goto block_initialize_err; + } + + memset(cpu_list, 0, sizeof(cpu_list)); + + node_mask = cpumask_of_node(dd->numa_node); + if (!cpumask_empty(node_mask)) { + for_each_cpu(cpu, node_mask) + { + snprintf(&cpu_list[j], 256 - j, "%d ", cpu); + j = strlen(cpu_list); + } + + dev_info(&pdev->dev, "Node %d on package %d has %d cpu(s): %s\n", + dd->numa_node, + topology_physical_package_id(cpumask_first(node_mask)), + nr_cpus_node(dd->numa_node), + cpu_list); + } else + dev_dbg(&pdev->dev, "mtip32xx: node_mask empty\n"); + + dd->isr_binding = get_least_used_cpu_on_node(dd->numa_node); + dev_info(&pdev->dev, "Initial IRQ binding node:cpu %d:%d\n", + cpu_to_node(dd->isr_binding), dd->isr_binding); + + /* first worker context always runs in ISR */ + dd->work[0].cpu_binding = dd->isr_binding; + dd->work[1].cpu_binding = get_least_used_cpu_on_node(dd->numa_node); + dd->work[2].cpu_binding = get_least_used_cpu_on_node(dd->numa_node); + dd->work[3].cpu_binding = dd->work[0].cpu_binding; + dd->work[4].cpu_binding = dd->work[1].cpu_binding; + dd->work[5].cpu_binding = dd->work[2].cpu_binding; + dd->work[6].cpu_binding = dd->work[2].cpu_binding; + dd->work[7].cpu_binding = dd->work[1].cpu_binding; + + /* Log the bindings */ + for_each_present_cpu(cpu) { + memset(cpu_list, 0, sizeof(cpu_list)); + for (i = 0, j = 0; i < MTIP_MAX_SLOT_GROUPS; i++) { + if (dd->work[i].cpu_binding == cpu) { + snprintf(&cpu_list[j], 256 - j, "%d ", i); + j = strlen(cpu_list); + } + } + if (j) + dev_info(&pdev->dev, "CPU %d: WQs %s\n", cpu, cpu_list); + } + + INIT_WORK(&dd->work[0].work, mtip_workq_sdbf0); + INIT_WORK(&dd->work[1].work, mtip_workq_sdbf1); + INIT_WORK(&dd->work[2].work, mtip_workq_sdbf2); + INIT_WORK(&dd->work[3].work, mtip_workq_sdbf3); + INIT_WORK(&dd->work[4].work, mtip_workq_sdbf4); + INIT_WORK(&dd->work[5].work, mtip_workq_sdbf5); + INIT_WORK(&dd->work[6].work, mtip_workq_sdbf6); + INIT_WORK(&dd->work[7].work, mtip_workq_sdbf7); + + pci_set_master(pdev); if (pci_enable_msi(pdev)) { dev_warn(&pdev->dev, "Unable to enable MSI interrupt.\n"); goto block_initialize_err; } - /* Copy the info we may need later into the private data structure. */ - dd->major = mtip_major; - dd->instance = instance; - dd->pdev = pdev; - /* Initialize the block layer. */ rv = mtip_block_initialize(dd); if (rv < 0) { @@ -4048,7 +4307,13 @@ static int mtip_pci_probe(struct pci_dev *pdev, block_initialize_err: pci_disable_msi(pdev); - + if (dd->isr_workq) { + flush_workqueue(dd->isr_workq); + destroy_workqueue(dd->isr_workq); + drop_cpu(dd->work[0].cpu_binding); + drop_cpu(dd->work[1].cpu_binding); + drop_cpu(dd->work[2].cpu_binding); + } setmask_err: pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); @@ -4089,6 +4354,14 @@ static void mtip_pci_remove(struct pci_dev *pdev) /* Clean up the block layer. */ mtip_block_remove(dd); + if (dd->isr_workq) { + flush_workqueue(dd->isr_workq); + destroy_workqueue(dd->isr_workq); + drop_cpu(dd->work[0].cpu_binding); + drop_cpu(dd->work[1].cpu_binding); + drop_cpu(dd->work[2].cpu_binding); + } + pci_disable_msi(pdev); kfree(dd); diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index b1742640556a..3bffff5f670c 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -164,6 +164,35 @@ struct smart_attr { u8 res[3]; } __packed; +struct mtip_work { + struct work_struct work; + void *port; + int cpu_binding; + u32 completed; +} ____cacheline_aligned_in_smp; + +#define DEFINE_HANDLER(group) \ + void mtip_workq_sdbf##group(struct work_struct *work) \ + { \ + struct mtip_work *w = (struct mtip_work *) work; \ + mtip_workq_sdbfx(w->port, group, w->completed); \ + } + +#define MTIP_TRIM_TIMEOUT_MS 240000 +#define MTIP_MAX_TRIM_ENTRIES 8 +#define MTIP_MAX_TRIM_ENTRY_LEN 0xfff8 + +struct mtip_trim_entry { + u32 lba; /* starting lba of region */ + u16 rsvd; /* unused */ + u16 range; /* # of 512b blocks to trim */ +} __packed; + +struct mtip_trim { + /* Array of regions to trim */ + struct mtip_trim_entry entry[MTIP_MAX_TRIM_ENTRIES]; +} __packed; + /* Register Frame Information Structure (FIS), host to device. */ struct host_to_dev_fis { /* @@ -424,7 +453,7 @@ struct mtip_port { */ struct semaphore cmd_slot; /* Spinlock for working around command-issue bug. */ - spinlock_t cmd_issue_lock; + spinlock_t cmd_issue_lock[MTIP_MAX_SLOT_GROUPS]; }; /* @@ -447,9 +476,6 @@ struct driver_data { struct mtip_port *port; /* Pointer to the port data structure. */ - /* Tasklet used to process the bottom half of the ISR. */ - struct tasklet_struct tasklet; - unsigned product_type; /* magic value declaring the product type */ unsigned slot_groups; /* number of slot groups the product supports */ @@ -461,6 +487,20 @@ struct driver_data { struct task_struct *mtip_svc_handler; /* task_struct of svc thd */ struct dentry *dfs_node; + + bool trim_supp; /* flag indicating trim support */ + + int numa_node; /* NUMA support */ + + char workq_name[32]; + + struct workqueue_struct *isr_workq; + + struct mtip_work work[MTIP_MAX_SLOT_GROUPS]; + + atomic_t irq_workers_active; + + int isr_binding; }; #endif diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index ade146bf65e5..7fecc784be01 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -98,6 +98,7 @@ static const char *nbdcmd_to_ascii(int cmd) case NBD_CMD_READ: return "read"; case NBD_CMD_WRITE: return "write"; case NBD_CMD_DISC: return "disconnect"; + case NBD_CMD_FLUSH: return "flush"; case NBD_CMD_TRIM: return "trim/discard"; } return "invalid"; @@ -244,8 +245,15 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req) request.magic = htonl(NBD_REQUEST_MAGIC); request.type = htonl(nbd_cmd(req)); - request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); - request.len = htonl(size); + + if (nbd_cmd(req) == NBD_CMD_FLUSH) { + /* Other values are reserved for FLUSH requests. */ + request.from = 0; + request.len = 0; + } else { + request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); + request.len = htonl(size); + } memcpy(request.handle, &req, sizeof(req)); dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n", @@ -482,6 +490,11 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req) } } + if (req->cmd_flags & REQ_FLUSH) { + BUG_ON(unlikely(blk_rq_sectors(req))); + nbd_cmd(req) = NBD_CMD_FLUSH; + } + req->errors = 0; mutex_lock(&nbd->tx_lock); @@ -551,6 +564,7 @@ static int nbd_thread(void *data) */ static void do_nbd_request(struct request_queue *q) + __releases(q->queue_lock) __acquires(q->queue_lock) { struct request *req; @@ -595,12 +609,20 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, struct request sreq; dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); + if (!nbd->sock) + return -EINVAL; + mutex_unlock(&nbd->tx_lock); + fsync_bdev(bdev); + mutex_lock(&nbd->tx_lock); blk_rq_init(NULL, &sreq); sreq.cmd_type = REQ_TYPE_SPECIAL; nbd_cmd(&sreq) = NBD_CMD_DISC; + + /* Check again after getting mutex back. */ if (!nbd->sock) return -EINVAL; + nbd_send_req(nbd, &sreq); return 0; } @@ -614,6 +636,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, nbd_clear_que(nbd); BUG_ON(!list_empty(&nbd->queue_head)); BUG_ON(!list_empty(&nbd->waiting_queue)); + kill_bdev(bdev); if (file) fput(file); return 0; @@ -681,9 +704,15 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, mutex_unlock(&nbd->tx_lock); + if (nbd->flags & NBD_FLAG_READ_ONLY) + set_device_ro(bdev, true); if (nbd->flags & NBD_FLAG_SEND_TRIM) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); + if (nbd->flags & NBD_FLAG_SEND_FLUSH) + blk_queue_flush(nbd->disk->queue, REQ_FLUSH); + else + blk_queue_flush(nbd->disk->queue, 0); thread = kthread_create(nbd_thread, nbd, nbd->disk->disk_name); if (IS_ERR(thread)) { @@ -702,9 +731,12 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, nbd->file = NULL; nbd_clear_que(nbd); dev_warn(disk_to_dev(nbd->disk), "queue cleared\n"); + kill_bdev(bdev); queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); + set_device_ro(bdev, false); if (file) fput(file); + nbd->flags = 0; nbd->bytesize = 0; bdev->bd_inode->i_size = 0; set_capacity(nbd->disk, 0); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 89576a0b3f2e..6c81a4c040b9 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -52,9 +52,12 @@ #define SECTOR_SHIFT 9 #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) -/* It might be useful to have this defined elsewhere too */ +/* It might be useful to have these defined elsewhere */ -#define U64_MAX ((u64) (~0ULL)) +#define U8_MAX ((u8) (~0U)) +#define U16_MAX ((u16) (~0U)) +#define U32_MAX ((u32) (~0U)) +#define U64_MAX ((u64) (~0ULL)) #define RBD_DRV_NAME "rbd" #define RBD_DRV_NAME_LONG "rbd (rados block device)" @@ -66,7 +69,6 @@ (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1)) #define RBD_MAX_SNAP_COUNT 510 /* allows max snapc to fit in 4KB */ -#define RBD_MAX_OPT_LEN 1024 #define RBD_SNAP_HEAD_NAME "-" @@ -93,8 +95,6 @@ #define DEV_NAME_LEN 32 #define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1) -#define RBD_READ_ONLY_DEFAULT false - /* * block device image metadata (in-memory version) */ @@ -119,16 +119,33 @@ struct rbd_image_header { * An rbd image specification. * * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely - * identify an image. + * identify an image. Each rbd_dev structure includes a pointer to + * an rbd_spec structure that encapsulates this identity. + * + * Each of the id's in an rbd_spec has an associated name. For a + * user-mapped image, the names are supplied and the id's associated + * with them are looked up. For a layered image, a parent image is + * defined by the tuple, and the names are looked up. + * + * An rbd_dev structure contains a parent_spec pointer which is + * non-null if the image it represents is a child in a layered + * image. This pointer will refer to the rbd_spec structure used + * by the parent rbd_dev for its own identity (i.e., the structure + * is shared between the parent and child). + * + * Since these structures are populated once, during the discovery + * phase of image construction, they are effectively immutable so + * we make no effort to synchronize access to them. + * + * Note that code herein does not assume the image name is known (it + * could be a null pointer). */ struct rbd_spec { u64 pool_id; char *pool_name; char *image_id; - size_t image_id_len; char *image_name; - size_t image_name_len; u64 snap_id; char *snap_name; @@ -136,10 +153,6 @@ struct rbd_spec { struct kref kref; }; -struct rbd_options { - bool read_only; -}; - /* * an instance of the client. multiple devices may share an rbd client. */ @@ -149,37 +162,76 @@ struct rbd_client { struct list_head node; }; -/* - * a request completion status - */ -struct rbd_req_status { - int done; - int rc; - u64 bytes; +struct rbd_img_request; +typedef void (*rbd_img_callback_t)(struct rbd_img_request *); + +#define BAD_WHICH U32_MAX /* Good which or bad which, which? */ + +struct rbd_obj_request; +typedef void (*rbd_obj_callback_t)(struct rbd_obj_request *); + +enum obj_request_type { + OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES }; -/* - * a collection of requests - */ -struct rbd_req_coll { - int total; - int num_done; +struct rbd_obj_request { + const char *object_name; + u64 offset; /* object start byte */ + u64 length; /* bytes from offset */ + + struct rbd_img_request *img_request; + struct list_head links; /* img_request->obj_requests */ + u32 which; /* posn image request list */ + + enum obj_request_type type; + union { + struct bio *bio_list; + struct { + struct page **pages; + u32 page_count; + }; + }; + + struct ceph_osd_request *osd_req; + + u64 xferred; /* bytes transferred */ + u64 version; + int result; + atomic_t done; + + rbd_obj_callback_t callback; + struct completion completion; + struct kref kref; - struct rbd_req_status status[0]; }; -/* - * a single io request - */ -struct rbd_request { - struct request *rq; /* blk layer request */ - struct bio *bio; /* cloned bio */ - struct page **pages; /* list of used pages */ - u64 len; - int coll_index; - struct rbd_req_coll *coll; +struct rbd_img_request { + struct request *rq; + struct rbd_device *rbd_dev; + u64 offset; /* starting image byte offset */ + u64 length; /* byte count from offset */ + bool write_request; /* false for read */ + union { + struct ceph_snap_context *snapc; /* for writes */ + u64 snap_id; /* for reads */ + }; + spinlock_t completion_lock;/* protects next_completion */ + u32 next_completion; + rbd_img_callback_t callback; + + u32 obj_request_count; + struct list_head obj_requests; /* rbd_obj_request structs */ + + struct kref kref; }; +#define for_each_obj_request(ireq, oreq) \ + list_for_each_entry(oreq, &(ireq)->obj_requests, links) +#define for_each_obj_request_from(ireq, oreq) \ + list_for_each_entry_from(oreq, &(ireq)->obj_requests, links) +#define for_each_obj_request_safe(ireq, oreq, n) \ + list_for_each_entry_safe_reverse(oreq, n, &(ireq)->obj_requests, links) + struct rbd_snap { struct device dev; const char *name; @@ -209,16 +261,18 @@ struct rbd_device { char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ - spinlock_t lock; /* queue lock */ + spinlock_t lock; /* queue, flags, open_count */ struct rbd_image_header header; - bool exists; + unsigned long flags; /* possibly lock protected */ struct rbd_spec *spec; char *header_name; + struct ceph_file_layout layout; + struct ceph_osd_event *watch_event; - struct ceph_osd_request *watch_request; + struct rbd_obj_request *watch_request; struct rbd_spec *parent_spec; u64 parent_overlap; @@ -235,7 +289,19 @@ struct rbd_device { /* sysfs related */ struct device dev; - unsigned long open_count; + unsigned long open_count; /* protected by lock */ +}; + +/* + * Flag bits for rbd_dev->flags. If atomicity is required, + * rbd_dev->lock is used to protect access. + * + * Currently, only the "removing" flag (which is coupled with the + * "open_count" field) requires atomic access. + */ +enum rbd_dev_flags { + RBD_DEV_FLAG_EXISTS, /* mapped snapshot has not been deleted */ + RBD_DEV_FLAG_REMOVING, /* this mapping is being removed */ }; static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ @@ -277,6 +343,33 @@ static struct device rbd_root_dev = { .release = rbd_root_dev_release, }; +static __printf(2, 3) +void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + if (!rbd_dev) + printk(KERN_WARNING "%s: %pV\n", RBD_DRV_NAME, &vaf); + else if (rbd_dev->disk) + printk(KERN_WARNING "%s: %s: %pV\n", + RBD_DRV_NAME, rbd_dev->disk->disk_name, &vaf); + else if (rbd_dev->spec && rbd_dev->spec->image_name) + printk(KERN_WARNING "%s: image %s: %pV\n", + RBD_DRV_NAME, rbd_dev->spec->image_name, &vaf); + else if (rbd_dev->spec && rbd_dev->spec->image_id) + printk(KERN_WARNING "%s: id %s: %pV\n", + RBD_DRV_NAME, rbd_dev->spec->image_id, &vaf); + else /* punt */ + printk(KERN_WARNING "%s: rbd_dev %p: %pV\n", + RBD_DRV_NAME, rbd_dev, &vaf); + va_end(args); +} + #ifdef RBD_DEBUG #define rbd_assert(expr) \ if (unlikely(!(expr))) { \ @@ -296,14 +389,23 @@ static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver); static int rbd_open(struct block_device *bdev, fmode_t mode) { struct rbd_device *rbd_dev = bdev->bd_disk->private_data; + bool removing = false; if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only) return -EROFS; + spin_lock_irq(&rbd_dev->lock); + if (test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) + removing = true; + else + rbd_dev->open_count++; + spin_unlock_irq(&rbd_dev->lock); + if (removing) + return -ENOENT; + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); (void) get_device(&rbd_dev->dev); set_device_ro(bdev, rbd_dev->mapping.read_only); - rbd_dev->open_count++; mutex_unlock(&ctl_mutex); return 0; @@ -312,10 +414,14 @@ static int rbd_open(struct block_device *bdev, fmode_t mode) static int rbd_release(struct gendisk *disk, fmode_t mode) { struct rbd_device *rbd_dev = disk->private_data; + unsigned long open_count_before; + + spin_lock_irq(&rbd_dev->lock); + open_count_before = rbd_dev->open_count--; + spin_unlock_irq(&rbd_dev->lock); + rbd_assert(open_count_before > 0); mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - rbd_assert(rbd_dev->open_count > 0); - rbd_dev->open_count--; put_device(&rbd_dev->dev); mutex_unlock(&ctl_mutex); @@ -337,7 +443,7 @@ static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) struct rbd_client *rbdc; int ret = -ENOMEM; - dout("rbd_client_create\n"); + dout("%s:\n", __func__); rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL); if (!rbdc) goto out_opt; @@ -361,8 +467,8 @@ static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) spin_unlock(&rbd_client_list_lock); mutex_unlock(&ctl_mutex); + dout("%s: rbdc %p\n", __func__, rbdc); - dout("rbd_client_create created %p\n", rbdc); return rbdc; out_err: @@ -373,6 +479,8 @@ out_mutex: out_opt: if (ceph_opts) ceph_destroy_options(ceph_opts); + dout("%s: error %d\n", __func__, ret); + return ERR_PTR(ret); } @@ -426,6 +534,12 @@ static match_table_t rbd_opts_tokens = { {-1, NULL} }; +struct rbd_options { + bool read_only; +}; + +#define RBD_READ_ONLY_DEFAULT false + static int parse_rbd_opts_token(char *c, void *private) { struct rbd_options *rbd_opts = private; @@ -493,7 +607,7 @@ static void rbd_client_release(struct kref *kref) { struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); - dout("rbd_release_client %p\n", rbdc); + dout("%s: rbdc %p\n", __func__, rbdc); spin_lock(&rbd_client_list_lock); list_del(&rbdc->node); spin_unlock(&rbd_client_list_lock); @@ -512,18 +626,6 @@ static void rbd_put_client(struct rbd_client *rbdc) kref_put(&rbdc->kref, rbd_client_release); } -/* - * Destroy requests collection - */ -static void rbd_coll_release(struct kref *kref) -{ - struct rbd_req_coll *coll = - container_of(kref, struct rbd_req_coll, kref); - - dout("rbd_coll_release %p\n", coll); - kfree(coll); -} - static bool rbd_image_format_valid(u32 image_format) { return image_format == 1 || image_format == 2; @@ -707,7 +809,8 @@ static int rbd_dev_set_mapping(struct rbd_device *rbd_dev) goto done; rbd_dev->mapping.read_only = true; } - rbd_dev->exists = true; + set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); + done: return ret; } @@ -724,7 +827,7 @@ static void rbd_header_free(struct rbd_image_header *header) header->snapc = NULL; } -static char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) +static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) { char *name; u64 segment; @@ -767,23 +870,6 @@ static u64 rbd_segment_length(struct rbd_device *rbd_dev, return length; } -static int rbd_get_num_segments(struct rbd_image_header *header, - u64 ofs, u64 len) -{ - u64 start_seg; - u64 end_seg; - - if (!len) - return 0; - if (len - 1 > U64_MAX - ofs) - return -ERANGE; - - start_seg = ofs >> header->obj_order; - end_seg = (ofs + len - 1) >> header->obj_order; - - return end_seg - start_seg + 1; -} - /* * returns the size of an object in the image */ @@ -949,8 +1035,10 @@ static struct bio *bio_chain_clone_range(struct bio **bio_src, unsigned int bi_size; struct bio *bio; - if (!bi) + if (!bi) { + rbd_warn(NULL, "bio_chain exhausted with %u left", len); goto out_err; /* EINVAL; ran out of bio's */ + } bi_size = min_t(unsigned int, bi->bi_size - off, len); bio = bio_clone_range(bi, off, bi_size, gfpmask); if (!bio) @@ -976,399 +1064,721 @@ out_err: return NULL; } -/* - * helpers for osd request op vectors. - */ -static struct ceph_osd_req_op *rbd_create_rw_ops(int num_ops, - int opcode, u32 payload_len) +static void rbd_obj_request_get(struct rbd_obj_request *obj_request) { - struct ceph_osd_req_op *ops; + dout("%s: obj %p (was %d)\n", __func__, obj_request, + atomic_read(&obj_request->kref.refcount)); + kref_get(&obj_request->kref); +} + +static void rbd_obj_request_destroy(struct kref *kref); +static void rbd_obj_request_put(struct rbd_obj_request *obj_request) +{ + rbd_assert(obj_request != NULL); + dout("%s: obj %p (was %d)\n", __func__, obj_request, + atomic_read(&obj_request->kref.refcount)); + kref_put(&obj_request->kref, rbd_obj_request_destroy); +} + +static void rbd_img_request_get(struct rbd_img_request *img_request) +{ + dout("%s: img %p (was %d)\n", __func__, img_request, + atomic_read(&img_request->kref.refcount)); + kref_get(&img_request->kref); +} + +static void rbd_img_request_destroy(struct kref *kref); +static void rbd_img_request_put(struct rbd_img_request *img_request) +{ + rbd_assert(img_request != NULL); + dout("%s: img %p (was %d)\n", __func__, img_request, + atomic_read(&img_request->kref.refcount)); + kref_put(&img_request->kref, rbd_img_request_destroy); +} + +static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request, + struct rbd_obj_request *obj_request) +{ + rbd_assert(obj_request->img_request == NULL); + + rbd_obj_request_get(obj_request); + obj_request->img_request = img_request; + obj_request->which = img_request->obj_request_count; + rbd_assert(obj_request->which != BAD_WHICH); + img_request->obj_request_count++; + list_add_tail(&obj_request->links, &img_request->obj_requests); + dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request, + obj_request->which); +} - ops = kzalloc(sizeof (*ops) * (num_ops + 1), GFP_NOIO); - if (!ops) +static inline void rbd_img_obj_request_del(struct rbd_img_request *img_request, + struct rbd_obj_request *obj_request) +{ + rbd_assert(obj_request->which != BAD_WHICH); + + dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request, + obj_request->which); + list_del(&obj_request->links); + rbd_assert(img_request->obj_request_count > 0); + img_request->obj_request_count--; + rbd_assert(obj_request->which == img_request->obj_request_count); + obj_request->which = BAD_WHICH; + rbd_assert(obj_request->img_request == img_request); + obj_request->img_request = NULL; + obj_request->callback = NULL; + rbd_obj_request_put(obj_request); +} + +static bool obj_request_type_valid(enum obj_request_type type) +{ + switch (type) { + case OBJ_REQUEST_NODATA: + case OBJ_REQUEST_BIO: + case OBJ_REQUEST_PAGES: + return true; + default: + return false; + } +} + +static struct ceph_osd_req_op *rbd_osd_req_op_create(u16 opcode, ...) +{ + struct ceph_osd_req_op *op; + va_list args; + size_t size; + + op = kzalloc(sizeof (*op), GFP_NOIO); + if (!op) return NULL; + op->op = opcode; + va_start(args, opcode); + switch (opcode) { + case CEPH_OSD_OP_READ: + case CEPH_OSD_OP_WRITE: + /* rbd_osd_req_op_create(READ, offset, length) */ + /* rbd_osd_req_op_create(WRITE, offset, length) */ + op->extent.offset = va_arg(args, u64); + op->extent.length = va_arg(args, u64); + if (opcode == CEPH_OSD_OP_WRITE) + op->payload_len = op->extent.length; + break; + case CEPH_OSD_OP_STAT: + break; + case CEPH_OSD_OP_CALL: + /* rbd_osd_req_op_create(CALL, class, method, data, datalen) */ + op->cls.class_name = va_arg(args, char *); + size = strlen(op->cls.class_name); + rbd_assert(size <= (size_t) U8_MAX); + op->cls.class_len = size; + op->payload_len = size; + + op->cls.method_name = va_arg(args, char *); + size = strlen(op->cls.method_name); + rbd_assert(size <= (size_t) U8_MAX); + op->cls.method_len = size; + op->payload_len += size; + + op->cls.argc = 0; + op->cls.indata = va_arg(args, void *); + size = va_arg(args, size_t); + rbd_assert(size <= (size_t) U32_MAX); + op->cls.indata_len = (u32) size; + op->payload_len += size; + break; + case CEPH_OSD_OP_NOTIFY_ACK: + case CEPH_OSD_OP_WATCH: + /* rbd_osd_req_op_create(NOTIFY_ACK, cookie, version) */ + /* rbd_osd_req_op_create(WATCH, cookie, version, flag) */ + op->watch.cookie = va_arg(args, u64); + op->watch.ver = va_arg(args, u64); + op->watch.ver = cpu_to_le64(op->watch.ver); + if (opcode == CEPH_OSD_OP_WATCH && va_arg(args, int)) + op->watch.flag = (u8) 1; + break; + default: + rbd_warn(NULL, "unsupported opcode %hu\n", opcode); + kfree(op); + op = NULL; + break; + } + va_end(args); - ops[0].op = opcode; + return op; +} - /* - * op extent offset and length will be set later on - * in calc_raw_layout() - */ - ops[0].payload_len = payload_len; +static void rbd_osd_req_op_destroy(struct ceph_osd_req_op *op) +{ + kfree(op); +} + +static int rbd_obj_request_submit(struct ceph_osd_client *osdc, + struct rbd_obj_request *obj_request) +{ + dout("%s: osdc %p obj %p\n", __func__, osdc, obj_request); - return ops; + return ceph_osdc_start_request(osdc, obj_request->osd_req, false); } -static void rbd_destroy_ops(struct ceph_osd_req_op *ops) +static void rbd_img_request_complete(struct rbd_img_request *img_request) { - kfree(ops); + dout("%s: img %p\n", __func__, img_request); + if (img_request->callback) + img_request->callback(img_request); + else + rbd_img_request_put(img_request); } -static void rbd_coll_end_req_index(struct request *rq, - struct rbd_req_coll *coll, - int index, - int ret, u64 len) +/* Caller is responsible for rbd_obj_request_destroy(obj_request) */ + +static int rbd_obj_request_wait(struct rbd_obj_request *obj_request) { - struct request_queue *q; - int min, max, i; + dout("%s: obj %p\n", __func__, obj_request); - dout("rbd_coll_end_req_index %p index %d ret %d len %llu\n", - coll, index, ret, (unsigned long long) len); + return wait_for_completion_interruptible(&obj_request->completion); +} - if (!rq) - return; +static void obj_request_done_init(struct rbd_obj_request *obj_request) +{ + atomic_set(&obj_request->done, 0); + smp_wmb(); +} - if (!coll) { - blk_end_request(rq, ret, len); - return; +static void obj_request_done_set(struct rbd_obj_request *obj_request) +{ + int done; + + done = atomic_inc_return(&obj_request->done); + if (done > 1) { + struct rbd_img_request *img_request = obj_request->img_request; + struct rbd_device *rbd_dev; + + rbd_dev = img_request ? img_request->rbd_dev : NULL; + rbd_warn(rbd_dev, "obj_request %p was already done\n", + obj_request); } +} - q = rq->q; - - spin_lock_irq(q->queue_lock); - coll->status[index].done = 1; - coll->status[index].rc = ret; - coll->status[index].bytes = len; - max = min = coll->num_done; - while (max < coll->total && coll->status[max].done) - max++; - - for (i = min; i<max; i++) { - __blk_end_request(rq, coll->status[i].rc, - coll->status[i].bytes); - coll->num_done++; - kref_put(&coll->kref, rbd_coll_release); +static bool obj_request_done_test(struct rbd_obj_request *obj_request) +{ + smp_mb(); + return atomic_read(&obj_request->done) != 0; +} + +static void rbd_obj_request_complete(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p cb %p\n", __func__, obj_request, + obj_request->callback); + if (obj_request->callback) + obj_request->callback(obj_request); + else + complete_all(&obj_request->completion); +} + +static void rbd_osd_trivial_callback(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p\n", __func__, obj_request); + obj_request_done_set(obj_request); +} + +static void rbd_osd_read_callback(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request, + obj_request->result, obj_request->xferred, obj_request->length); + /* + * ENOENT means a hole in the object. We zero-fill the + * entire length of the request. A short read also implies + * zero-fill to the end of the request. Either way we + * update the xferred count to indicate the whole request + * was satisfied. + */ + if (obj_request->result == -ENOENT) { + zero_bio_chain(obj_request->bio_list, 0); + obj_request->result = 0; + obj_request->xferred = obj_request->length; + } else if (obj_request->xferred < obj_request->length && + !obj_request->result) { + zero_bio_chain(obj_request->bio_list, obj_request->xferred); + obj_request->xferred = obj_request->length; } - spin_unlock_irq(q->queue_lock); + obj_request_done_set(obj_request); } -static void rbd_coll_end_req(struct rbd_request *req, - int ret, u64 len) +static void rbd_osd_write_callback(struct rbd_obj_request *obj_request) { - rbd_coll_end_req_index(req->rq, req->coll, req->coll_index, ret, len); + dout("%s: obj %p result %d %llu\n", __func__, obj_request, + obj_request->result, obj_request->length); + /* + * There is no such thing as a successful short write. + * Our xferred value is the number of bytes transferred + * back. Set it to our originally-requested length. + */ + obj_request->xferred = obj_request->length; + obj_request_done_set(obj_request); } /* - * Send ceph osd request + * For a simple stat call there's nothing to do. We'll do more if + * this is part of a write sequence for a layered image. */ -static int rbd_do_request(struct request *rq, - struct rbd_device *rbd_dev, - struct ceph_snap_context *snapc, - u64 snapid, - const char *object_name, u64 ofs, u64 len, - struct bio *bio, - struct page **pages, - int num_pages, - int flags, - struct ceph_osd_req_op *ops, - struct rbd_req_coll *coll, - int coll_index, - void (*rbd_cb)(struct ceph_osd_request *req, - struct ceph_msg *msg), - struct ceph_osd_request **linger_req, - u64 *ver) -{ - struct ceph_osd_request *req; - struct ceph_file_layout *layout; - int ret; - u64 bno; - struct timespec mtime = CURRENT_TIME; - struct rbd_request *req_data; - struct ceph_osd_request_head *reqhead; - struct ceph_osd_client *osdc; +static void rbd_osd_stat_callback(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p\n", __func__, obj_request); + obj_request_done_set(obj_request); +} - req_data = kzalloc(sizeof(*req_data), GFP_NOIO); - if (!req_data) { - if (coll) - rbd_coll_end_req_index(rq, coll, coll_index, - -ENOMEM, len); - return -ENOMEM; +static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, + struct ceph_msg *msg) +{ + struct rbd_obj_request *obj_request = osd_req->r_priv; + u16 opcode; + + dout("%s: osd_req %p msg %p\n", __func__, osd_req, msg); + rbd_assert(osd_req == obj_request->osd_req); + rbd_assert(!!obj_request->img_request ^ + (obj_request->which == BAD_WHICH)); + + if (osd_req->r_result < 0) + obj_request->result = osd_req->r_result; + obj_request->version = le64_to_cpu(osd_req->r_reassert_version.version); + + WARN_ON(osd_req->r_num_ops != 1); /* For now */ + + /* + * We support a 64-bit length, but ultimately it has to be + * passed to blk_end_request(), which takes an unsigned int. + */ + obj_request->xferred = osd_req->r_reply_op_len[0]; + rbd_assert(obj_request->xferred < (u64) UINT_MAX); + opcode = osd_req->r_request_ops[0].op; + switch (opcode) { + case CEPH_OSD_OP_READ: + rbd_osd_read_callback(obj_request); + break; + case CEPH_OSD_OP_WRITE: + rbd_osd_write_callback(obj_request); + break; + case CEPH_OSD_OP_STAT: + rbd_osd_stat_callback(obj_request); + break; + case CEPH_OSD_OP_CALL: + case CEPH_OSD_OP_NOTIFY_ACK: + case CEPH_OSD_OP_WATCH: + rbd_osd_trivial_callback(obj_request); + break; + default: + rbd_warn(NULL, "%s: unsupported op %hu\n", + obj_request->object_name, (unsigned short) opcode); + break; } - if (coll) { - req_data->coll = coll; - req_data->coll_index = coll_index; + if (obj_request_done_test(obj_request)) + rbd_obj_request_complete(obj_request); +} + +static struct ceph_osd_request *rbd_osd_req_create( + struct rbd_device *rbd_dev, + bool write_request, + struct rbd_obj_request *obj_request, + struct ceph_osd_req_op *op) +{ + struct rbd_img_request *img_request = obj_request->img_request; + struct ceph_snap_context *snapc = NULL; + struct ceph_osd_client *osdc; + struct ceph_osd_request *osd_req; + struct timespec now; + struct timespec *mtime; + u64 snap_id = CEPH_NOSNAP; + u64 offset = obj_request->offset; + u64 length = obj_request->length; + + if (img_request) { + rbd_assert(img_request->write_request == write_request); + if (img_request->write_request) + snapc = img_request->snapc; + else + snap_id = img_request->snap_id; } - dout("rbd_do_request object_name=%s ofs=%llu len=%llu coll=%p[%d]\n", - object_name, (unsigned long long) ofs, - (unsigned long long) len, coll, coll_index); + /* Allocate and initialize the request, for the single op */ osdc = &rbd_dev->rbd_client->client->osdc; - req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, - false, GFP_NOIO, pages, bio); - if (!req) { - ret = -ENOMEM; - goto done_pages; + osd_req = ceph_osdc_alloc_request(osdc, snapc, 1, false, GFP_ATOMIC); + if (!osd_req) + return NULL; /* ENOMEM */ + + rbd_assert(obj_request_type_valid(obj_request->type)); + switch (obj_request->type) { + case OBJ_REQUEST_NODATA: + break; /* Nothing to do */ + case OBJ_REQUEST_BIO: + rbd_assert(obj_request->bio_list != NULL); + osd_req->r_bio = obj_request->bio_list; + break; + case OBJ_REQUEST_PAGES: + osd_req->r_pages = obj_request->pages; + osd_req->r_num_pages = obj_request->page_count; + osd_req->r_page_alignment = offset & ~PAGE_MASK; + break; } - req->r_callback = rbd_cb; + if (write_request) { + osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; + now = CURRENT_TIME; + mtime = &now; + } else { + osd_req->r_flags = CEPH_OSD_FLAG_READ; + mtime = NULL; /* not needed for reads */ + offset = 0; /* These are not used... */ + length = 0; /* ...for osd read requests */ + } - req_data->rq = rq; - req_data->bio = bio; - req_data->pages = pages; - req_data->len = len; + osd_req->r_callback = rbd_osd_req_callback; + osd_req->r_priv = obj_request; - req->r_priv = req_data; + osd_req->r_oid_len = strlen(obj_request->object_name); + rbd_assert(osd_req->r_oid_len < sizeof (osd_req->r_oid)); + memcpy(osd_req->r_oid, obj_request->object_name, osd_req->r_oid_len); - reqhead = req->r_request->front.iov_base; - reqhead->snapid = cpu_to_le64(CEPH_NOSNAP); + osd_req->r_file_layout = rbd_dev->layout; /* struct */ - strncpy(req->r_oid, object_name, sizeof(req->r_oid)); - req->r_oid_len = strlen(req->r_oid); + /* osd_req will get its own reference to snapc (if non-null) */ - layout = &req->r_file_layout; - memset(layout, 0, sizeof(*layout)); - layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); - layout->fl_stripe_count = cpu_to_le32(1); - layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); - layout->fl_pg_pool = cpu_to_le32((int) rbd_dev->spec->pool_id); - ret = ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, - req, ops); - rbd_assert(ret == 0); + ceph_osdc_build_request(osd_req, offset, length, 1, op, + snapc, snap_id, mtime); - ceph_osdc_build_request(req, ofs, &len, - ops, - snapc, - &mtime, - req->r_oid, req->r_oid_len); + return osd_req; +} - if (linger_req) { - ceph_osdc_set_request_linger(osdc, req); - *linger_req = req; - } +static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req) +{ + ceph_osdc_put_request(osd_req); +} - ret = ceph_osdc_start_request(osdc, req, false); - if (ret < 0) - goto done_err; - - if (!rbd_cb) { - ret = ceph_osdc_wait_request(osdc, req); - if (ver) - *ver = le64_to_cpu(req->r_reassert_version.version); - dout("reassert_ver=%llu\n", - (unsigned long long) - le64_to_cpu(req->r_reassert_version.version)); - ceph_osdc_put_request(req); +/* object_name is assumed to be a non-null pointer and NUL-terminated */ + +static struct rbd_obj_request *rbd_obj_request_create(const char *object_name, + u64 offset, u64 length, + enum obj_request_type type) +{ + struct rbd_obj_request *obj_request; + size_t size; + char *name; + + rbd_assert(obj_request_type_valid(type)); + + size = strlen(object_name) + 1; + obj_request = kzalloc(sizeof (*obj_request) + size, GFP_KERNEL); + if (!obj_request) + return NULL; + + name = (char *)(obj_request + 1); + obj_request->object_name = memcpy(name, object_name, size); + obj_request->offset = offset; + obj_request->length = length; + obj_request->which = BAD_WHICH; + obj_request->type = type; + INIT_LIST_HEAD(&obj_request->links); + obj_request_done_init(obj_request); + init_completion(&obj_request->completion); + kref_init(&obj_request->kref); + + dout("%s: \"%s\" %llu/%llu %d -> obj %p\n", __func__, object_name, + offset, length, (int)type, obj_request); + + return obj_request; +} + +static void rbd_obj_request_destroy(struct kref *kref) +{ + struct rbd_obj_request *obj_request; + + obj_request = container_of(kref, struct rbd_obj_request, kref); + + dout("%s: obj %p\n", __func__, obj_request); + + rbd_assert(obj_request->img_request == NULL); + rbd_assert(obj_request->which == BAD_WHICH); + + if (obj_request->osd_req) + rbd_osd_req_destroy(obj_request->osd_req); + + rbd_assert(obj_request_type_valid(obj_request->type)); + switch (obj_request->type) { + case OBJ_REQUEST_NODATA: + break; /* Nothing to do */ + case OBJ_REQUEST_BIO: + if (obj_request->bio_list) + bio_chain_put(obj_request->bio_list); + break; + case OBJ_REQUEST_PAGES: + if (obj_request->pages) + ceph_release_page_vector(obj_request->pages, + obj_request->page_count); + break; } - return ret; -done_err: - bio_chain_put(req_data->bio); - ceph_osdc_put_request(req); -done_pages: - rbd_coll_end_req(req_data, ret, len); - kfree(req_data); - return ret; + kfree(obj_request); } /* - * Ceph osd op callback + * Caller is responsible for filling in the list of object requests + * that comprises the image request, and the Linux request pointer + * (if there is one). */ -static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) -{ - struct rbd_request *req_data = req->r_priv; - struct ceph_osd_reply_head *replyhead; - struct ceph_osd_op *op; - __s32 rc; - u64 bytes; - int read_op; - - /* parse reply */ - replyhead = msg->front.iov_base; - WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); - op = (void *)(replyhead + 1); - rc = le32_to_cpu(replyhead->result); - bytes = le64_to_cpu(op->extent.length); - read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ); - - dout("rbd_req_cb bytes=%llu readop=%d rc=%d\n", - (unsigned long long) bytes, read_op, (int) rc); - - if (rc == -ENOENT && read_op) { - zero_bio_chain(req_data->bio, 0); - rc = 0; - } else if (rc == 0 && read_op && bytes < req_data->len) { - zero_bio_chain(req_data->bio, bytes); - bytes = req_data->len; - } +static struct rbd_img_request *rbd_img_request_create( + struct rbd_device *rbd_dev, + u64 offset, u64 length, + bool write_request) +{ + struct rbd_img_request *img_request; + struct ceph_snap_context *snapc = NULL; - rbd_coll_end_req(req_data, rc, bytes); + img_request = kmalloc(sizeof (*img_request), GFP_ATOMIC); + if (!img_request) + return NULL; - if (req_data->bio) - bio_chain_put(req_data->bio); + if (write_request) { + down_read(&rbd_dev->header_rwsem); + snapc = ceph_get_snap_context(rbd_dev->header.snapc); + up_read(&rbd_dev->header_rwsem); + if (WARN_ON(!snapc)) { + kfree(img_request); + return NULL; /* Shouldn't happen */ + } + } - ceph_osdc_put_request(req); - kfree(req_data); + img_request->rq = NULL; + img_request->rbd_dev = rbd_dev; + img_request->offset = offset; + img_request->length = length; + img_request->write_request = write_request; + if (write_request) + img_request->snapc = snapc; + else + img_request->snap_id = rbd_dev->spec->snap_id; + spin_lock_init(&img_request->completion_lock); + img_request->next_completion = 0; + img_request->callback = NULL; + img_request->obj_request_count = 0; + INIT_LIST_HEAD(&img_request->obj_requests); + kref_init(&img_request->kref); + + rbd_img_request_get(img_request); /* Avoid a warning */ + rbd_img_request_put(img_request); /* TEMPORARY */ + + dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev, + write_request ? "write" : "read", offset, length, + img_request); + + return img_request; } -static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) +static void rbd_img_request_destroy(struct kref *kref) { - ceph_osdc_put_request(req); + struct rbd_img_request *img_request; + struct rbd_obj_request *obj_request; + struct rbd_obj_request *next_obj_request; + + img_request = container_of(kref, struct rbd_img_request, kref); + + dout("%s: img %p\n", __func__, img_request); + + for_each_obj_request_safe(img_request, obj_request, next_obj_request) + rbd_img_obj_request_del(img_request, obj_request); + rbd_assert(img_request->obj_request_count == 0); + + if (img_request->write_request) + ceph_put_snap_context(img_request->snapc); + + kfree(img_request); } -/* - * Do a synchronous ceph osd operation - */ -static int rbd_req_sync_op(struct rbd_device *rbd_dev, - struct ceph_snap_context *snapc, - u64 snapid, - int flags, - struct ceph_osd_req_op *ops, - const char *object_name, - u64 ofs, u64 inbound_size, - char *inbound, - struct ceph_osd_request **linger_req, - u64 *ver) +static int rbd_img_request_fill_bio(struct rbd_img_request *img_request, + struct bio *bio_list) { - int ret; - struct page **pages; - int num_pages; - - rbd_assert(ops != NULL); + struct rbd_device *rbd_dev = img_request->rbd_dev; + struct rbd_obj_request *obj_request = NULL; + struct rbd_obj_request *next_obj_request; + unsigned int bio_offset; + u64 image_offset; + u64 resid; + u16 opcode; + + dout("%s: img %p bio %p\n", __func__, img_request, bio_list); + + opcode = img_request->write_request ? CEPH_OSD_OP_WRITE + : CEPH_OSD_OP_READ; + bio_offset = 0; + image_offset = img_request->offset; + rbd_assert(image_offset == bio_list->bi_sector << SECTOR_SHIFT); + resid = img_request->length; + rbd_assert(resid > 0); + while (resid) { + const char *object_name; + unsigned int clone_size; + struct ceph_osd_req_op *op; + u64 offset; + u64 length; + + object_name = rbd_segment_name(rbd_dev, image_offset); + if (!object_name) + goto out_unwind; + offset = rbd_segment_offset(rbd_dev, image_offset); + length = rbd_segment_length(rbd_dev, image_offset, resid); + obj_request = rbd_obj_request_create(object_name, + offset, length, + OBJ_REQUEST_BIO); + kfree(object_name); /* object request has its own copy */ + if (!obj_request) + goto out_unwind; + + rbd_assert(length <= (u64) UINT_MAX); + clone_size = (unsigned int) length; + obj_request->bio_list = bio_chain_clone_range(&bio_list, + &bio_offset, clone_size, + GFP_ATOMIC); + if (!obj_request->bio_list) + goto out_partial; - num_pages = calc_pages_for(ofs, inbound_size); - pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); - if (IS_ERR(pages)) - return PTR_ERR(pages); + /* + * Build up the op to use in building the osd + * request. Note that the contents of the op are + * copied by rbd_osd_req_create(). + */ + op = rbd_osd_req_op_create(opcode, offset, length); + if (!op) + goto out_partial; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, + img_request->write_request, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out_partial; + /* status and version are initially zero-filled */ + + rbd_img_obj_request_add(img_request, obj_request); + + image_offset += length; + resid -= length; + } - ret = rbd_do_request(NULL, rbd_dev, snapc, snapid, - object_name, ofs, inbound_size, NULL, - pages, num_pages, - flags, - ops, - NULL, 0, - NULL, - linger_req, ver); - if (ret < 0) - goto done; + return 0; - if ((flags & CEPH_OSD_FLAG_READ) && inbound) - ret = ceph_copy_from_page_vector(pages, inbound, ofs, ret); +out_partial: + rbd_obj_request_put(obj_request); +out_unwind: + for_each_obj_request_safe(img_request, obj_request, next_obj_request) + rbd_obj_request_put(obj_request); -done: - ceph_release_page_vector(pages, num_pages); - return ret; + return -ENOMEM; } -/* - * Do an asynchronous ceph osd operation - */ -static int rbd_do_op(struct request *rq, - struct rbd_device *rbd_dev, - struct ceph_snap_context *snapc, - u64 ofs, u64 len, - struct bio *bio, - struct rbd_req_coll *coll, - int coll_index) -{ - char *seg_name; - u64 seg_ofs; - u64 seg_len; - int ret; - struct ceph_osd_req_op *ops; - u32 payload_len; - int opcode; - int flags; - u64 snapid; - - seg_name = rbd_segment_name(rbd_dev, ofs); - if (!seg_name) - return -ENOMEM; - seg_len = rbd_segment_length(rbd_dev, ofs, len); - seg_ofs = rbd_segment_offset(rbd_dev, ofs); - - if (rq_data_dir(rq) == WRITE) { - opcode = CEPH_OSD_OP_WRITE; - flags = CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK; - snapid = CEPH_NOSNAP; - payload_len = seg_len; - } else { - opcode = CEPH_OSD_OP_READ; - flags = CEPH_OSD_FLAG_READ; - snapc = NULL; - snapid = rbd_dev->spec->snap_id; - payload_len = 0; +static void rbd_img_obj_callback(struct rbd_obj_request *obj_request) +{ + struct rbd_img_request *img_request; + u32 which = obj_request->which; + bool more = true; + + img_request = obj_request->img_request; + + dout("%s: img %p obj %p\n", __func__, img_request, obj_request); + rbd_assert(img_request != NULL); + rbd_assert(img_request->rq != NULL); + rbd_assert(img_request->obj_request_count > 0); + rbd_assert(which != BAD_WHICH); + rbd_assert(which < img_request->obj_request_count); + rbd_assert(which >= img_request->next_completion); + + spin_lock_irq(&img_request->completion_lock); + if (which != img_request->next_completion) + goto out; + + for_each_obj_request_from(img_request, obj_request) { + unsigned int xferred; + int result; + + rbd_assert(more); + rbd_assert(which < img_request->obj_request_count); + + if (!obj_request_done_test(obj_request)) + break; + + rbd_assert(obj_request->xferred <= (u64) UINT_MAX); + xferred = (unsigned int) obj_request->xferred; + result = (int) obj_request->result; + if (result) + rbd_warn(NULL, "obj_request %s result %d xferred %u\n", + img_request->write_request ? "write" : "read", + result, xferred); + + more = blk_end_request(img_request->rq, result, xferred); + which++; } - ret = -ENOMEM; - ops = rbd_create_rw_ops(1, opcode, payload_len); - if (!ops) - goto done; + rbd_assert(more ^ (which == img_request->obj_request_count)); + img_request->next_completion = which; +out: + spin_unlock_irq(&img_request->completion_lock); - /* we've taken care of segment sizes earlier when we - cloned the bios. We should never have a segment - truncated at this point */ - rbd_assert(seg_len == len); - - ret = rbd_do_request(rq, rbd_dev, snapc, snapid, - seg_name, seg_ofs, seg_len, - bio, - NULL, 0, - flags, - ops, - coll, coll_index, - rbd_req_cb, 0, NULL); - - rbd_destroy_ops(ops); -done: - kfree(seg_name); - return ret; + if (!more) + rbd_img_request_complete(img_request); } -/* - * Request sync osd read - */ -static int rbd_req_sync_read(struct rbd_device *rbd_dev, - u64 snapid, - const char *object_name, - u64 ofs, u64 len, - char *buf, - u64 *ver) -{ - struct ceph_osd_req_op *ops; - int ret; +static int rbd_img_request_submit(struct rbd_img_request *img_request) +{ + struct rbd_device *rbd_dev = img_request->rbd_dev; + struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + struct rbd_obj_request *obj_request; - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_READ, 0); - if (!ops) - return -ENOMEM; + dout("%s: img %p\n", __func__, img_request); + for_each_obj_request(img_request, obj_request) { + int ret; - ret = rbd_req_sync_op(rbd_dev, NULL, - snapid, - CEPH_OSD_FLAG_READ, - ops, object_name, ofs, len, buf, NULL, ver); - rbd_destroy_ops(ops); + obj_request->callback = rbd_img_obj_callback; + ret = rbd_obj_request_submit(osdc, obj_request); + if (ret) + return ret; + /* + * The image request has its own reference to each + * of its object requests, so we can safely drop the + * initial one here. + */ + rbd_obj_request_put(obj_request); + } - return ret; + return 0; } -/* - * Request sync osd watch - */ -static int rbd_req_sync_notify_ack(struct rbd_device *rbd_dev, - u64 ver, - u64 notify_id) +static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, + u64 ver, u64 notify_id) { - struct ceph_osd_req_op *ops; + struct rbd_obj_request *obj_request; + struct ceph_osd_req_op *op; + struct ceph_osd_client *osdc; int ret; - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY_ACK, 0); - if (!ops) + obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, + OBJ_REQUEST_NODATA); + if (!obj_request) return -ENOMEM; - ops[0].watch.ver = cpu_to_le64(ver); - ops[0].watch.cookie = notify_id; - ops[0].watch.flag = 0; + ret = -ENOMEM; + op = rbd_osd_req_op_create(CEPH_OSD_OP_NOTIFY_ACK, notify_id, ver); + if (!op) + goto out; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out; - ret = rbd_do_request(NULL, rbd_dev, NULL, CEPH_NOSNAP, - rbd_dev->header_name, 0, 0, NULL, - NULL, 0, - CEPH_OSD_FLAG_READ, - ops, - NULL, 0, - rbd_simple_req_cb, 0, NULL); + osdc = &rbd_dev->rbd_client->client->osdc; + obj_request->callback = rbd_obj_request_put; + ret = rbd_obj_request_submit(osdc, obj_request); +out: + if (ret) + rbd_obj_request_put(obj_request); - rbd_destroy_ops(ops); return ret; } @@ -1381,95 +1791,103 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) if (!rbd_dev) return; - dout("rbd_watch_cb %s notify_id=%llu opcode=%u\n", + dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__, rbd_dev->header_name, (unsigned long long) notify_id, (unsigned int) opcode); rc = rbd_dev_refresh(rbd_dev, &hver); if (rc) - pr_warning(RBD_DRV_NAME "%d got notification but failed to " - " update snaps: %d\n", rbd_dev->major, rc); + rbd_warn(rbd_dev, "got notification but failed to " + " update snaps: %d\n", rc); - rbd_req_sync_notify_ack(rbd_dev, hver, notify_id); + rbd_obj_notify_ack(rbd_dev, hver, notify_id); } /* - * Request sync osd watch + * Request sync osd watch/unwatch. The value of "start" determines + * whether a watch request is being initiated or torn down. */ -static int rbd_req_sync_watch(struct rbd_device *rbd_dev) +static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start) { - struct ceph_osd_req_op *ops; struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + struct rbd_obj_request *obj_request; + struct ceph_osd_req_op *op; int ret; - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); - if (!ops) - return -ENOMEM; + rbd_assert(start ^ !!rbd_dev->watch_event); + rbd_assert(start ^ !!rbd_dev->watch_request); - ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0, - (void *)rbd_dev, &rbd_dev->watch_event); - if (ret < 0) - goto fail; + if (start) { + ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev, + &rbd_dev->watch_event); + if (ret < 0) + return ret; + rbd_assert(rbd_dev->watch_event != NULL); + } - ops[0].watch.ver = cpu_to_le64(rbd_dev->header.obj_version); - ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); - ops[0].watch.flag = 1; + ret = -ENOMEM; + obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, + OBJ_REQUEST_NODATA); + if (!obj_request) + goto out_cancel; + + op = rbd_osd_req_op_create(CEPH_OSD_OP_WATCH, + rbd_dev->watch_event->cookie, + rbd_dev->header.obj_version, start); + if (!op) + goto out_cancel; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out_cancel; + + if (start) + ceph_osdc_set_request_linger(osdc, obj_request->osd_req); + else + ceph_osdc_unregister_linger_request(osdc, + rbd_dev->watch_request->osd_req); + ret = rbd_obj_request_submit(osdc, obj_request); + if (ret) + goto out_cancel; + ret = rbd_obj_request_wait(obj_request); + if (ret) + goto out_cancel; + ret = obj_request->result; + if (ret) + goto out_cancel; - ret = rbd_req_sync_op(rbd_dev, NULL, - CEPH_NOSNAP, - CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, - ops, - rbd_dev->header_name, - 0, 0, NULL, - &rbd_dev->watch_request, NULL); + /* + * A watch request is set to linger, so the underlying osd + * request won't go away until we unregister it. We retain + * a pointer to the object request during that time (in + * rbd_dev->watch_request), so we'll keep a reference to + * it. We'll drop that reference (below) after we've + * unregistered it. + */ + if (start) { + rbd_dev->watch_request = obj_request; - if (ret < 0) - goto fail_event; + return 0; + } - rbd_destroy_ops(ops); - return 0; + /* We have successfully torn down the watch request */ -fail_event: + rbd_obj_request_put(rbd_dev->watch_request); + rbd_dev->watch_request = NULL; +out_cancel: + /* Cancel the event if we're tearing down, or on error */ ceph_osdc_cancel_event(rbd_dev->watch_event); rbd_dev->watch_event = NULL; -fail: - rbd_destroy_ops(ops); - return ret; -} + if (obj_request) + rbd_obj_request_put(obj_request); -/* - * Request sync osd unwatch - */ -static int rbd_req_sync_unwatch(struct rbd_device *rbd_dev) -{ - struct ceph_osd_req_op *ops; - int ret; - - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); - if (!ops) - return -ENOMEM; - - ops[0].watch.ver = 0; - ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); - ops[0].watch.flag = 0; - - ret = rbd_req_sync_op(rbd_dev, NULL, - CEPH_NOSNAP, - CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, - ops, - rbd_dev->header_name, - 0, 0, NULL, NULL, NULL); - - - rbd_destroy_ops(ops); - ceph_osdc_cancel_event(rbd_dev->watch_event); - rbd_dev->watch_event = NULL; return ret; } /* * Synchronous osd object method call */ -static int rbd_req_sync_exec(struct rbd_device *rbd_dev, +static int rbd_obj_method_sync(struct rbd_device *rbd_dev, const char *object_name, const char *class_name, const char *method_name, @@ -1477,169 +1895,154 @@ static int rbd_req_sync_exec(struct rbd_device *rbd_dev, size_t outbound_size, char *inbound, size_t inbound_size, - int flags, - u64 *ver) + u64 *version) { - struct ceph_osd_req_op *ops; - int class_name_len = strlen(class_name); - int method_name_len = strlen(method_name); - int payload_size; + struct rbd_obj_request *obj_request; + struct ceph_osd_client *osdc; + struct ceph_osd_req_op *op; + struct page **pages; + u32 page_count; int ret; /* - * Any input parameters required by the method we're calling - * will be sent along with the class and method names as - * part of the message payload. That data and its size are - * supplied via the indata and indata_len fields (named from - * the perspective of the server side) in the OSD request - * operation. + * Method calls are ultimately read operations but they + * don't involve object data (so no offset or length). + * The result should placed into the inbound buffer + * provided. They also supply outbound data--parameters for + * the object method. Currently if this is present it will + * be a snapshot id. */ - payload_size = class_name_len + method_name_len + outbound_size; - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_CALL, payload_size); - if (!ops) - return -ENOMEM; + page_count = (u32) calc_pages_for(0, inbound_size); + pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); + if (IS_ERR(pages)) + return PTR_ERR(pages); - ops[0].cls.class_name = class_name; - ops[0].cls.class_len = (__u8) class_name_len; - ops[0].cls.method_name = method_name; - ops[0].cls.method_len = (__u8) method_name_len; - ops[0].cls.argc = 0; - ops[0].cls.indata = outbound; - ops[0].cls.indata_len = outbound_size; + ret = -ENOMEM; + obj_request = rbd_obj_request_create(object_name, 0, 0, + OBJ_REQUEST_PAGES); + if (!obj_request) + goto out; - ret = rbd_req_sync_op(rbd_dev, NULL, - CEPH_NOSNAP, - flags, ops, - object_name, 0, inbound_size, inbound, - NULL, ver); + obj_request->pages = pages; + obj_request->page_count = page_count; - rbd_destroy_ops(ops); + op = rbd_osd_req_op_create(CEPH_OSD_OP_CALL, class_name, + method_name, outbound, outbound_size); + if (!op) + goto out; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out; - dout("cls_exec returned %d\n", ret); - return ret; -} + osdc = &rbd_dev->rbd_client->client->osdc; + ret = rbd_obj_request_submit(osdc, obj_request); + if (ret) + goto out; + ret = rbd_obj_request_wait(obj_request); + if (ret) + goto out; -static struct rbd_req_coll *rbd_alloc_coll(int num_reqs) -{ - struct rbd_req_coll *coll = - kzalloc(sizeof(struct rbd_req_coll) + - sizeof(struct rbd_req_status) * num_reqs, - GFP_ATOMIC); + ret = obj_request->result; + if (ret < 0) + goto out; + ret = 0; + ceph_copy_from_page_vector(pages, inbound, 0, obj_request->xferred); + if (version) + *version = obj_request->version; +out: + if (obj_request) + rbd_obj_request_put(obj_request); + else + ceph_release_page_vector(pages, page_count); - if (!coll) - return NULL; - coll->total = num_reqs; - kref_init(&coll->kref); - return coll; + return ret; } -/* - * block device queue callback - */ -static void rbd_rq_fn(struct request_queue *q) +static void rbd_request_fn(struct request_queue *q) + __releases(q->queue_lock) __acquires(q->queue_lock) { struct rbd_device *rbd_dev = q->queuedata; + bool read_only = rbd_dev->mapping.read_only; struct request *rq; + int result; while ((rq = blk_fetch_request(q))) { - struct bio *bio; - bool do_write; - unsigned int size; - u64 ofs; - int num_segs, cur_seg = 0; - struct rbd_req_coll *coll; - struct ceph_snap_context *snapc; - unsigned int bio_offset; - - dout("fetched request\n"); - - /* filter out block requests we don't understand */ - if ((rq->cmd_type != REQ_TYPE_FS)) { - __blk_end_request_all(rq, 0); - continue; - } + bool write_request = rq_data_dir(rq) == WRITE; + struct rbd_img_request *img_request; + u64 offset; + u64 length; + + /* Ignore any non-FS requests that filter through. */ - /* deduce our operation (read, write) */ - do_write = (rq_data_dir(rq) == WRITE); - if (do_write && rbd_dev->mapping.read_only) { - __blk_end_request_all(rq, -EROFS); + if (rq->cmd_type != REQ_TYPE_FS) { + dout("%s: non-fs request type %d\n", __func__, + (int) rq->cmd_type); + __blk_end_request_all(rq, 0); continue; } - spin_unlock_irq(q->queue_lock); + /* Ignore/skip any zero-length requests */ - down_read(&rbd_dev->header_rwsem); + offset = (u64) blk_rq_pos(rq) << SECTOR_SHIFT; + length = (u64) blk_rq_bytes(rq); - if (!rbd_dev->exists) { - rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP); - up_read(&rbd_dev->header_rwsem); - dout("request for non-existent snapshot"); - spin_lock_irq(q->queue_lock); - __blk_end_request_all(rq, -ENXIO); + if (!length) { + dout("%s: zero-length request\n", __func__); + __blk_end_request_all(rq, 0); continue; } - snapc = ceph_get_snap_context(rbd_dev->header.snapc); - - up_read(&rbd_dev->header_rwsem); - - size = blk_rq_bytes(rq); - ofs = blk_rq_pos(rq) * SECTOR_SIZE; - bio = rq->bio; + spin_unlock_irq(q->queue_lock); - dout("%s 0x%x bytes at 0x%llx\n", - do_write ? "write" : "read", - size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE); + /* Disallow writes to a read-only device */ - num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); - if (num_segs <= 0) { - spin_lock_irq(q->queue_lock); - __blk_end_request_all(rq, num_segs); - ceph_put_snap_context(snapc); - continue; + if (write_request) { + result = -EROFS; + if (read_only) + goto end_request; + rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP); } - coll = rbd_alloc_coll(num_segs); - if (!coll) { - spin_lock_irq(q->queue_lock); - __blk_end_request_all(rq, -ENOMEM); - ceph_put_snap_context(snapc); - continue; - } - - bio_offset = 0; - do { - u64 limit = rbd_segment_length(rbd_dev, ofs, size); - unsigned int chain_size; - struct bio *bio_chain; - - BUG_ON(limit > (u64) UINT_MAX); - chain_size = (unsigned int) limit; - dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt); - kref_get(&coll->kref); + /* + * Quit early if the mapped snapshot no longer + * exists. It's still possible the snapshot will + * have disappeared by the time our request arrives + * at the osd, but there's no sense in sending it if + * we already know. + */ + if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) { + dout("request for non-existent snapshot"); + rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP); + result = -ENXIO; + goto end_request; + } - /* Pass a cloned bio chain via an osd request */ + result = -EINVAL; + if (WARN_ON(offset && length > U64_MAX - offset + 1)) + goto end_request; /* Shouldn't happen */ - bio_chain = bio_chain_clone_range(&bio, - &bio_offset, chain_size, - GFP_ATOMIC); - if (bio_chain) - (void) rbd_do_op(rq, rbd_dev, snapc, - ofs, chain_size, - bio_chain, coll, cur_seg); - else - rbd_coll_end_req_index(rq, coll, cur_seg, - -ENOMEM, chain_size); - size -= chain_size; - ofs += chain_size; + result = -ENOMEM; + img_request = rbd_img_request_create(rbd_dev, offset, length, + write_request); + if (!img_request) + goto end_request; - cur_seg++; - } while (size > 0); - kref_put(&coll->kref, rbd_coll_release); + img_request->rq = rq; + result = rbd_img_request_fill_bio(img_request, rq->bio); + if (!result) + result = rbd_img_request_submit(img_request); + if (result) + rbd_img_request_put(img_request); +end_request: spin_lock_irq(q->queue_lock); - - ceph_put_snap_context(snapc); + if (result < 0) { + rbd_warn(rbd_dev, "obj_request %s result %d\n", + write_request ? "write" : "read", result); + __blk_end_request_all(rq, result); + } } } @@ -1703,6 +2106,71 @@ static void rbd_free_disk(struct rbd_device *rbd_dev) put_disk(disk); } +static int rbd_obj_read_sync(struct rbd_device *rbd_dev, + const char *object_name, + u64 offset, u64 length, + char *buf, u64 *version) + +{ + struct ceph_osd_req_op *op; + struct rbd_obj_request *obj_request; + struct ceph_osd_client *osdc; + struct page **pages = NULL; + u32 page_count; + size_t size; + int ret; + + page_count = (u32) calc_pages_for(offset, length); + pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); + if (IS_ERR(pages)) + ret = PTR_ERR(pages); + + ret = -ENOMEM; + obj_request = rbd_obj_request_create(object_name, offset, length, + OBJ_REQUEST_PAGES); + if (!obj_request) + goto out; + + obj_request->pages = pages; + obj_request->page_count = page_count; + + op = rbd_osd_req_op_create(CEPH_OSD_OP_READ, offset, length); + if (!op) + goto out; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out; + + osdc = &rbd_dev->rbd_client->client->osdc; + ret = rbd_obj_request_submit(osdc, obj_request); + if (ret) + goto out; + ret = rbd_obj_request_wait(obj_request); + if (ret) + goto out; + + ret = obj_request->result; + if (ret < 0) + goto out; + + rbd_assert(obj_request->xferred <= (u64) SIZE_MAX); + size = (size_t) obj_request->xferred; + ceph_copy_from_page_vector(pages, buf, 0, size); + rbd_assert(size <= (size_t) INT_MAX); + ret = (int) size; + if (version) + *version = obj_request->version; +out: + if (obj_request) + rbd_obj_request_put(obj_request); + else + ceph_release_page_vector(pages, page_count); + + return ret; +} + /* * Read the complete header for the given rbd device. * @@ -1741,24 +2209,20 @@ rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version) if (!ondisk) return ERR_PTR(-ENOMEM); - ret = rbd_req_sync_read(rbd_dev, CEPH_NOSNAP, - rbd_dev->header_name, + ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_name, 0, size, (char *) ondisk, version); - if (ret < 0) goto out_err; if (WARN_ON((size_t) ret < size)) { ret = -ENXIO; - pr_warning("short header read for image %s" - " (want %zd got %d)\n", - rbd_dev->spec->image_name, size, ret); + rbd_warn(rbd_dev, "short header read (want %zd got %d)", + size, ret); goto out_err; } if (!rbd_dev_ondisk_valid(ondisk)) { ret = -ENXIO; - pr_warning("invalid header for image %s\n", - rbd_dev->spec->image_name); + rbd_warn(rbd_dev, "invalid header"); goto out_err; } @@ -1895,8 +2359,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) disk->fops = &rbd_bd_ops; disk->private_data = rbd_dev; - /* init rq */ - q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock); + q = blk_init_queue(rbd_request_fn, &rbd_dev->lock); if (!q) goto out_disk; @@ -2233,7 +2696,7 @@ static void rbd_spec_free(struct kref *kref) kfree(spec); } -struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, +static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, struct rbd_spec *spec) { struct rbd_device *rbd_dev; @@ -2243,6 +2706,7 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, return NULL; spin_lock_init(&rbd_dev->lock); + rbd_dev->flags = 0; INIT_LIST_HEAD(&rbd_dev->node); INIT_LIST_HEAD(&rbd_dev->snaps); init_rwsem(&rbd_dev->header_rwsem); @@ -2250,6 +2714,13 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, rbd_dev->spec = spec; rbd_dev->rbd_client = rbdc; + /* Initialize the layout used for all rbd requests */ + + rbd_dev->layout.fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); + rbd_dev->layout.fl_stripe_count = cpu_to_le32(1); + rbd_dev->layout.fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); + rbd_dev->layout.fl_pg_pool = cpu_to_le32((u32) spec->pool_id); + return rbd_dev; } @@ -2360,12 +2831,11 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, __le64 size; } __attribute__ ((packed)) size_buf = { 0 }; - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_size", (char *) &snapid, sizeof (snapid), - (char *) &size_buf, sizeof (size_buf), - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + (char *) &size_buf, sizeof (size_buf), NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) return ret; @@ -2396,15 +2866,13 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) if (!reply_buf) return -ENOMEM; - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_object_prefix", NULL, 0, - reply_buf, RBD_OBJ_PREFIX_LEN_MAX, - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + reply_buf, RBD_OBJ_PREFIX_LEN_MAX, NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; - ret = 0; /* rbd_req_sync_exec() can return positive */ p = reply_buf; rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p, @@ -2435,12 +2903,12 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, u64 incompat; int ret; - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_features", (char *) &snapid, sizeof (snapid), (char *) &features_buf, sizeof (features_buf), - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) return ret; @@ -2474,7 +2942,6 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) void *end; char *image_id; u64 overlap; - size_t len = 0; int ret; parent_spec = rbd_spec_alloc(); @@ -2492,12 +2959,11 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) } snapid = cpu_to_le64(CEPH_NOSNAP); - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_parent", (char *) &snapid, sizeof (snapid), - (char *) reply_buf, size, - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + (char *) reply_buf, size, NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out_err; @@ -2508,13 +2974,18 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) if (parent_spec->pool_id == CEPH_NOPOOL) goto out; /* No parent? No problem. */ - image_id = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL); + /* The ceph file layout needs to fit pool id in 32 bits */ + + ret = -EIO; + if (WARN_ON(parent_spec->pool_id > (u64) U32_MAX)) + goto out; + + image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); if (IS_ERR(image_id)) { ret = PTR_ERR(image_id); goto out_err; } parent_spec->image_id = image_id; - parent_spec->image_id_len = len; ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err); ceph_decode_64_safe(&p, end, overlap, out_err); @@ -2544,26 +3015,25 @@ static char *rbd_dev_image_name(struct rbd_device *rbd_dev) rbd_assert(!rbd_dev->spec->image_name); - image_id_size = sizeof (__le32) + rbd_dev->spec->image_id_len; + len = strlen(rbd_dev->spec->image_id); + image_id_size = sizeof (__le32) + len; image_id = kmalloc(image_id_size, GFP_KERNEL); if (!image_id) return NULL; p = image_id; end = (char *) image_id + image_id_size; - ceph_encode_string(&p, end, rbd_dev->spec->image_id, - (u32) rbd_dev->spec->image_id_len); + ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32) len); size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX; reply_buf = kmalloc(size, GFP_KERNEL); if (!reply_buf) goto out; - ret = rbd_req_sync_exec(rbd_dev, RBD_DIRECTORY, + ret = rbd_obj_method_sync(rbd_dev, RBD_DIRECTORY, "rbd", "dir_get_name", image_id, image_id_size, - (char *) reply_buf, size, - CEPH_OSD_FLAG_READ, NULL); + (char *) reply_buf, size, NULL); if (ret < 0) goto out; p = reply_buf; @@ -2602,8 +3072,11 @@ static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev) osdc = &rbd_dev->rbd_client->client->osdc; name = ceph_pg_pool_name_by_id(osdc->osdmap, rbd_dev->spec->pool_id); - if (!name) - return -EIO; /* pool id too large (>= 2^31) */ + if (!name) { + rbd_warn(rbd_dev, "there is no pool with id %llu", + rbd_dev->spec->pool_id); /* Really a BUG() */ + return -EIO; + } rbd_dev->spec->pool_name = kstrdup(name, GFP_KERNEL); if (!rbd_dev->spec->pool_name) @@ -2612,19 +3085,17 @@ static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev) /* Fetch the image name; tolerate failure here */ name = rbd_dev_image_name(rbd_dev); - if (name) { - rbd_dev->spec->image_name_len = strlen(name); + if (name) rbd_dev->spec->image_name = (char *) name; - } else { - pr_warning(RBD_DRV_NAME "%d " - "unable to get image name for image id %s\n", - rbd_dev->major, rbd_dev->spec->image_id); - } + else + rbd_warn(rbd_dev, "unable to get image name"); /* Look up the snapshot name. */ name = rbd_snap_name(rbd_dev, rbd_dev->spec->snap_id); if (!name) { + rbd_warn(rbd_dev, "no snapshot with id %llu", + rbd_dev->spec->snap_id); /* Really a BUG() */ ret = -EIO; goto out_err; } @@ -2665,12 +3136,11 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver) if (!reply_buf) return -ENOMEM; - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_snapcontext", NULL, 0, - reply_buf, size, - CEPH_OSD_FLAG_READ, ver); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + reply_buf, size, ver); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; @@ -2735,12 +3205,11 @@ static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which) return ERR_PTR(-ENOMEM); snap_id = cpu_to_le64(rbd_dev->header.snapc->snaps[which]); - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_snapshot_name", (char *) &snap_id, sizeof (snap_id), - reply_buf, size, - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + reply_buf, size, NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; @@ -2766,7 +3235,7 @@ out: static char *rbd_dev_v2_snap_info(struct rbd_device *rbd_dev, u32 which, u64 *snap_size, u64 *snap_features) { - __le64 snap_id; + u64 snap_id; u8 order; int ret; @@ -2865,10 +3334,17 @@ static int rbd_dev_snaps_update(struct rbd_device *rbd_dev) if (snap_id == CEPH_NOSNAP || (snap && snap->id > snap_id)) { struct list_head *next = links->next; - /* Existing snapshot not in the new snap context */ - + /* + * A previously-existing snapshot is not in + * the new snap context. + * + * If the now missing snapshot is the one the + * image is mapped to, clear its exists flag + * so we can avoid sending any more requests + * to it. + */ if (rbd_dev->spec->snap_id == snap->id) - rbd_dev->exists = false; + clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); rbd_remove_snap_dev(snap); dout("%ssnap id %llu has been removed\n", rbd_dev->spec->snap_id == snap->id ? @@ -2942,7 +3418,7 @@ static int rbd_dev_snaps_register(struct rbd_device *rbd_dev) struct rbd_snap *snap; int ret = 0; - dout("%s called\n", __func__); + dout("%s:\n", __func__); if (WARN_ON(!device_is_registered(&rbd_dev->dev))) return -EIO; @@ -2983,22 +3459,6 @@ static void rbd_bus_del_dev(struct rbd_device *rbd_dev) device_unregister(&rbd_dev->dev); } -static int rbd_init_watch_dev(struct rbd_device *rbd_dev) -{ - int ret, rc; - - do { - ret = rbd_req_sync_watch(rbd_dev); - if (ret == -ERANGE) { - rc = rbd_dev_refresh(rbd_dev, NULL); - if (rc < 0) - return rc; - } - } while (ret == -ERANGE); - - return ret; -} - static atomic64_t rbd_dev_id_max = ATOMIC64_INIT(0); /* @@ -3138,11 +3598,9 @@ static inline char *dup_token(const char **buf, size_t *lenp) size_t len; len = next_token(buf); - dup = kmalloc(len + 1, GFP_KERNEL); + dup = kmemdup(*buf, len + 1, GFP_KERNEL); if (!dup) return NULL; - - memcpy(dup, *buf, len); *(dup + len) = '\0'; *buf += len; @@ -3210,8 +3668,10 @@ static int rbd_add_parse_args(const char *buf, /* The first four tokens are required */ len = next_token(&buf); - if (!len) - return -EINVAL; /* Missing monitor address(es) */ + if (!len) { + rbd_warn(NULL, "no monitor address(es) provided"); + return -EINVAL; + } mon_addrs = buf; mon_addrs_size = len + 1; buf += len; @@ -3220,8 +3680,10 @@ static int rbd_add_parse_args(const char *buf, options = dup_token(&buf, NULL); if (!options) return -ENOMEM; - if (!*options) - goto out_err; /* Missing options */ + if (!*options) { + rbd_warn(NULL, "no options provided"); + goto out_err; + } spec = rbd_spec_alloc(); if (!spec) @@ -3230,14 +3692,18 @@ static int rbd_add_parse_args(const char *buf, spec->pool_name = dup_token(&buf, NULL); if (!spec->pool_name) goto out_mem; - if (!*spec->pool_name) - goto out_err; /* Missing pool name */ + if (!*spec->pool_name) { + rbd_warn(NULL, "no pool name provided"); + goto out_err; + } - spec->image_name = dup_token(&buf, &spec->image_name_len); + spec->image_name = dup_token(&buf, NULL); if (!spec->image_name) goto out_mem; - if (!*spec->image_name) - goto out_err; /* Missing image name */ + if (!*spec->image_name) { + rbd_warn(NULL, "no image name provided"); + goto out_err; + } /* * Snapshot name is optional; default is to use "-" @@ -3251,10 +3717,9 @@ static int rbd_add_parse_args(const char *buf, ret = -ENAMETOOLONG; goto out_err; } - spec->snap_name = kmalloc(len + 1, GFP_KERNEL); + spec->snap_name = kmemdup(buf, len + 1, GFP_KERNEL); if (!spec->snap_name) goto out_mem; - memcpy(spec->snap_name, buf, len); *(spec->snap_name + len) = '\0'; /* Initialize all rbd options to the defaults */ @@ -3323,7 +3788,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) * First, see if the format 2 image id file exists, and if * so, get the image's persistent id from it. */ - size = sizeof (RBD_ID_PREFIX) + rbd_dev->spec->image_name_len; + size = sizeof (RBD_ID_PREFIX) + strlen(rbd_dev->spec->image_name); object_name = kmalloc(size, GFP_NOIO); if (!object_name) return -ENOMEM; @@ -3339,21 +3804,18 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) goto out; } - ret = rbd_req_sync_exec(rbd_dev, object_name, + ret = rbd_obj_method_sync(rbd_dev, object_name, "rbd", "get_id", NULL, 0, - response, RBD_IMAGE_ID_LEN_MAX, - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + response, RBD_IMAGE_ID_LEN_MAX, NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; - ret = 0; /* rbd_req_sync_exec() can return positive */ p = response; rbd_dev->spec->image_id = ceph_extract_encoded_string(&p, p + RBD_IMAGE_ID_LEN_MAX, - &rbd_dev->spec->image_id_len, - GFP_NOIO); + NULL, GFP_NOIO); if (IS_ERR(rbd_dev->spec->image_id)) { ret = PTR_ERR(rbd_dev->spec->image_id); rbd_dev->spec->image_id = NULL; @@ -3377,11 +3839,10 @@ static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) rbd_dev->spec->image_id = kstrdup("", GFP_KERNEL); if (!rbd_dev->spec->image_id) return -ENOMEM; - rbd_dev->spec->image_id_len = 0; /* Record the header object name for this rbd image. */ - size = rbd_dev->spec->image_name_len + sizeof (RBD_SUFFIX); + size = strlen(rbd_dev->spec->image_name) + sizeof (RBD_SUFFIX); rbd_dev->header_name = kmalloc(size, GFP_KERNEL); if (!rbd_dev->header_name) { ret = -ENOMEM; @@ -3427,7 +3888,7 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) * Image id was filled in by the caller. Record the header * object name for this rbd image. */ - size = sizeof (RBD_HEADER_PREFIX) + rbd_dev->spec->image_id_len; + size = sizeof (RBD_HEADER_PREFIX) + strlen(rbd_dev->spec->image_id); rbd_dev->header_name = kmalloc(size, GFP_KERNEL); if (!rbd_dev->header_name) return -ENOMEM; @@ -3542,7 +4003,7 @@ static int rbd_dev_probe_finish(struct rbd_device *rbd_dev) if (ret) goto err_out_bus; - ret = rbd_init_watch_dev(rbd_dev); + ret = rbd_dev_header_watch_sync(rbd_dev, 1); if (ret) goto err_out_bus; @@ -3638,6 +4099,13 @@ static ssize_t rbd_add(struct bus_type *bus, goto err_out_client; spec->pool_id = (u64) rc; + /* The ceph file layout needs to fit pool id in 32 bits */ + + if (WARN_ON(spec->pool_id > (u64) U32_MAX)) { + rc = -EIO; + goto err_out_client; + } + rbd_dev = rbd_dev_create(rbdc, spec); if (!rbd_dev) goto err_out_client; @@ -3691,15 +4159,8 @@ static void rbd_dev_release(struct device *dev) { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - if (rbd_dev->watch_request) { - struct ceph_client *client = rbd_dev->rbd_client->client; - - ceph_osdc_unregister_linger_request(&client->osdc, - rbd_dev->watch_request); - } if (rbd_dev->watch_event) - rbd_req_sync_unwatch(rbd_dev); - + rbd_dev_header_watch_sync(rbd_dev, 0); /* clean up and free blkdev */ rbd_free_disk(rbd_dev); @@ -3743,10 +4204,14 @@ static ssize_t rbd_remove(struct bus_type *bus, goto done; } - if (rbd_dev->open_count) { + spin_lock_irq(&rbd_dev->lock); + if (rbd_dev->open_count) ret = -EBUSY; + else + set_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags); + spin_unlock_irq(&rbd_dev->lock); + if (ret < 0) goto done; - } rbd_remove_all_snaps(rbd_dev); rbd_bus_del_dev(rbd_dev); @@ -3782,10 +4247,15 @@ static void rbd_sysfs_cleanup(void) device_unregister(&rbd_root_dev); } -int __init rbd_init(void) +static int __init rbd_init(void) { int rc; + if (!libceph_compatible(NULL)) { + rbd_warn(NULL, "libceph incompatibility (quitting)"); + + return -EINVAL; + } rc = rbd_sysfs_init(); if (rc) return rc; @@ -3793,7 +4263,7 @@ int __init rbd_init(void) return 0; } -void __exit rbd_exit(void) +static void __exit rbd_exit(void) { rbd_sysfs_cleanup(); } diff --git a/drivers/block/rsxx/Makefile b/drivers/block/rsxx/Makefile new file mode 100644 index 000000000000..f35cd0b71f7b --- /dev/null +++ b/drivers/block/rsxx/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_BLK_DEV_RSXX) += rsxx.o +rsxx-y := config.o core.o cregs.o dev.o dma.o diff --git a/drivers/block/rsxx/config.c b/drivers/block/rsxx/config.c new file mode 100644 index 000000000000..a295e7e9ee41 --- /dev/null +++ b/drivers/block/rsxx/config.c @@ -0,0 +1,213 @@ +/* +* Filename: config.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/types.h> +#include <linux/crc32.h> +#include <linux/swab.h> + +#include "rsxx_priv.h" +#include "rsxx_cfg.h" + +static void initialize_config(void *config) +{ + struct rsxx_card_cfg *cfg = config; + + cfg->hdr.version = RSXX_CFG_VERSION; + + cfg->data.block_size = RSXX_HW_BLK_SIZE; + cfg->data.stripe_size = RSXX_HW_BLK_SIZE; + cfg->data.vendor_id = RSXX_VENDOR_ID_TMS_IBM; + cfg->data.cache_order = (-1); + cfg->data.intr_coal.mode = RSXX_INTR_COAL_DISABLED; + cfg->data.intr_coal.count = 0; + cfg->data.intr_coal.latency = 0; +} + +static u32 config_data_crc32(struct rsxx_card_cfg *cfg) +{ + /* + * Return the compliment of the CRC to ensure compatibility + * (i.e. this is how early rsxx drivers did it.) + */ + + return ~crc32(~0, &cfg->data, sizeof(cfg->data)); +} + + +/*----------------- Config Byte Swap Functions -------------------*/ +static void config_hdr_be_to_cpu(struct card_cfg_hdr *hdr) +{ + hdr->version = be32_to_cpu((__force __be32) hdr->version); + hdr->crc = be32_to_cpu((__force __be32) hdr->crc); +} + +static void config_hdr_cpu_to_be(struct card_cfg_hdr *hdr) +{ + hdr->version = (__force u32) cpu_to_be32(hdr->version); + hdr->crc = (__force u32) cpu_to_be32(hdr->crc); +} + +static void config_data_swab(struct rsxx_card_cfg *cfg) +{ + u32 *data = (u32 *) &cfg->data; + int i; + + for (i = 0; i < (sizeof(cfg->data) / 4); i++) + data[i] = swab32(data[i]); +} + +static void config_data_le_to_cpu(struct rsxx_card_cfg *cfg) +{ + u32 *data = (u32 *) &cfg->data; + int i; + + for (i = 0; i < (sizeof(cfg->data) / 4); i++) + data[i] = le32_to_cpu((__force __le32) data[i]); +} + +static void config_data_cpu_to_le(struct rsxx_card_cfg *cfg) +{ + u32 *data = (u32 *) &cfg->data; + int i; + + for (i = 0; i < (sizeof(cfg->data) / 4); i++) + data[i] = (__force u32) cpu_to_le32(data[i]); +} + + +/*----------------- Config Operations ------------------*/ +static int rsxx_save_config(struct rsxx_cardinfo *card) +{ + struct rsxx_card_cfg cfg; + int st; + + memcpy(&cfg, &card->config, sizeof(cfg)); + + if (unlikely(cfg.hdr.version != RSXX_CFG_VERSION)) { + dev_err(CARD_TO_DEV(card), + "Cannot save config with invalid version %d\n", + cfg.hdr.version); + return -EINVAL; + } + + /* Convert data to little endian for the CRC calculation. */ + config_data_cpu_to_le(&cfg); + + cfg.hdr.crc = config_data_crc32(&cfg); + + /* + * Swap the data from little endian to big endian so it can be + * stored. + */ + config_data_swab(&cfg); + config_hdr_cpu_to_be(&cfg.hdr); + + st = rsxx_creg_write(card, CREG_ADD_CONFIG, sizeof(cfg), &cfg, 1); + if (st) + return st; + + return 0; +} + +int rsxx_load_config(struct rsxx_cardinfo *card) +{ + int st; + u32 crc; + + st = rsxx_creg_read(card, CREG_ADD_CONFIG, sizeof(card->config), + &card->config, 1); + if (st) { + dev_err(CARD_TO_DEV(card), + "Failed reading card config.\n"); + return st; + } + + config_hdr_be_to_cpu(&card->config.hdr); + + if (card->config.hdr.version == RSXX_CFG_VERSION) { + /* + * We calculate the CRC with the data in little endian, because + * early drivers did not take big endian CPUs into account. + * The data is always stored in big endian, so we need to byte + * swap it before calculating the CRC. + */ + + config_data_swab(&card->config); + + /* Check the CRC */ + crc = config_data_crc32(&card->config); + if (crc != card->config.hdr.crc) { + dev_err(CARD_TO_DEV(card), + "Config corruption detected!\n"); + dev_info(CARD_TO_DEV(card), + "CRC (sb x%08x is x%08x)\n", + card->config.hdr.crc, crc); + return -EIO; + } + + /* Convert the data to CPU byteorder */ + config_data_le_to_cpu(&card->config); + + } else if (card->config.hdr.version != 0) { + dev_err(CARD_TO_DEV(card), + "Invalid config version %d.\n", + card->config.hdr.version); + /* + * Config version changes require special handling from the + * user + */ + return -EINVAL; + } else { + dev_info(CARD_TO_DEV(card), + "Initializing card configuration.\n"); + initialize_config(card); + st = rsxx_save_config(card); + if (st) + return st; + } + + card->config_valid = 1; + + dev_dbg(CARD_TO_DEV(card), "version: x%08x\n", + card->config.hdr.version); + dev_dbg(CARD_TO_DEV(card), "crc: x%08x\n", + card->config.hdr.crc); + dev_dbg(CARD_TO_DEV(card), "block_size: x%08x\n", + card->config.data.block_size); + dev_dbg(CARD_TO_DEV(card), "stripe_size: x%08x\n", + card->config.data.stripe_size); + dev_dbg(CARD_TO_DEV(card), "vendor_id: x%08x\n", + card->config.data.vendor_id); + dev_dbg(CARD_TO_DEV(card), "cache_order: x%08x\n", + card->config.data.cache_order); + dev_dbg(CARD_TO_DEV(card), "mode: x%08x\n", + card->config.data.intr_coal.mode); + dev_dbg(CARD_TO_DEV(card), "count: x%08x\n", + card->config.data.intr_coal.count); + dev_dbg(CARD_TO_DEV(card), "latency: x%08x\n", + card->config.data.intr_coal.latency); + + return 0; +} + diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c new file mode 100644 index 000000000000..e5162487686a --- /dev/null +++ b/drivers/block/rsxx/core.c @@ -0,0 +1,649 @@ +/* +* Filename: core.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/reboot.h> +#include <linux/slab.h> +#include <linux/bitops.h> + +#include <linux/genhd.h> +#include <linux/idr.h> + +#include "rsxx_priv.h" +#include "rsxx_cfg.h" + +#define NO_LEGACY 0 + +MODULE_DESCRIPTION("IBM RamSan PCIe Flash SSD Device Driver"); +MODULE_AUTHOR("IBM <support@ramsan.com>"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRIVER_VERSION); + +static unsigned int force_legacy = NO_LEGACY; +module_param(force_legacy, uint, 0444); +MODULE_PARM_DESC(force_legacy, "Force the use of legacy type PCI interrupts"); + +static DEFINE_IDA(rsxx_disk_ida); +static DEFINE_SPINLOCK(rsxx_ida_lock); + +/*----------------- Interrupt Control & Handling -------------------*/ +static void __enable_intr(unsigned int *mask, unsigned int intr) +{ + *mask |= intr; +} + +static void __disable_intr(unsigned int *mask, unsigned int intr) +{ + *mask &= ~intr; +} + +/* + * NOTE: Disabling the IER will disable the hardware interrupt. + * Disabling the ISR will disable the software handling of the ISR bit. + * + * Enable/Disable interrupt functions assume the card->irq_lock + * is held by the caller. + */ +void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr) +{ + if (unlikely(card->halt)) + return; + + __enable_intr(&card->ier_mask, intr); + iowrite32(card->ier_mask, card->regmap + IER); +} + +void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr) +{ + __disable_intr(&card->ier_mask, intr); + iowrite32(card->ier_mask, card->regmap + IER); +} + +void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card, + unsigned int intr) +{ + if (unlikely(card->halt)) + return; + + __enable_intr(&card->isr_mask, intr); + __enable_intr(&card->ier_mask, intr); + iowrite32(card->ier_mask, card->regmap + IER); +} +void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card, + unsigned int intr) +{ + __disable_intr(&card->isr_mask, intr); + __disable_intr(&card->ier_mask, intr); + iowrite32(card->ier_mask, card->regmap + IER); +} + +static irqreturn_t rsxx_isr(int irq, void *pdata) +{ + struct rsxx_cardinfo *card = pdata; + unsigned int isr; + int handled = 0; + int reread_isr; + int i; + + spin_lock(&card->irq_lock); + + do { + reread_isr = 0; + + isr = ioread32(card->regmap + ISR); + if (isr == 0xffffffff) { + /* + * A few systems seem to have an intermittent issue + * where PCI reads return all Fs, but retrying the read + * a little later will return as expected. + */ + dev_info(CARD_TO_DEV(card), + "ISR = 0xFFFFFFFF, retrying later\n"); + break; + } + + isr &= card->isr_mask; + if (!isr) + break; + + for (i = 0; i < card->n_targets; i++) { + if (isr & CR_INTR_DMA(i)) { + if (card->ier_mask & CR_INTR_DMA(i)) { + rsxx_disable_ier(card, CR_INTR_DMA(i)); + reread_isr = 1; + } + queue_work(card->ctrl[i].done_wq, + &card->ctrl[i].dma_done_work); + handled++; + } + } + + if (isr & CR_INTR_CREG) { + schedule_work(&card->creg_ctrl.done_work); + handled++; + } + + if (isr & CR_INTR_EVENT) { + schedule_work(&card->event_work); + rsxx_disable_ier_and_isr(card, CR_INTR_EVENT); + handled++; + } + } while (reread_isr); + + spin_unlock(&card->irq_lock); + + return handled ? IRQ_HANDLED : IRQ_NONE; +} + +/*----------------- Card Event Handler -------------------*/ +static char *rsxx_card_state_to_str(unsigned int state) +{ + static char *state_strings[] = { + "Unknown", "Shutdown", "Starting", "Formatting", + "Uninitialized", "Good", "Shutting Down", + "Fault", "Read Only Fault", "dStroying" + }; + + return state_strings[ffs(state)]; +} + +static void card_state_change(struct rsxx_cardinfo *card, + unsigned int new_state) +{ + int st; + + dev_info(CARD_TO_DEV(card), + "card state change detected.(%s -> %s)\n", + rsxx_card_state_to_str(card->state), + rsxx_card_state_to_str(new_state)); + + card->state = new_state; + + /* Don't attach DMA interfaces if the card has an invalid config */ + if (!card->config_valid) + return; + + switch (new_state) { + case CARD_STATE_RD_ONLY_FAULT: + dev_crit(CARD_TO_DEV(card), + "Hardware has entered read-only mode!\n"); + /* + * Fall through so the DMA devices can be attached and + * the user can attempt to pull off their data. + */ + case CARD_STATE_GOOD: + st = rsxx_get_card_size8(card, &card->size8); + if (st) + dev_err(CARD_TO_DEV(card), + "Failed attaching DMA devices\n"); + + if (card->config_valid) + set_capacity(card->gendisk, card->size8 >> 9); + break; + + case CARD_STATE_FAULT: + dev_crit(CARD_TO_DEV(card), + "Hardware Fault reported!\n"); + /* Fall through. */ + + /* Everything else, detach DMA interface if it's attached. */ + case CARD_STATE_SHUTDOWN: + case CARD_STATE_STARTING: + case CARD_STATE_FORMATTING: + case CARD_STATE_UNINITIALIZED: + case CARD_STATE_SHUTTING_DOWN: + /* + * dStroy is a term coined by marketing to represent the low level + * secure erase. + */ + case CARD_STATE_DSTROYING: + set_capacity(card->gendisk, 0); + break; + } +} + +static void card_event_handler(struct work_struct *work) +{ + struct rsxx_cardinfo *card; + unsigned int state; + unsigned long flags; + int st; + + card = container_of(work, struct rsxx_cardinfo, event_work); + + if (unlikely(card->halt)) + return; + + /* + * Enable the interrupt now to avoid any weird race conditions where a + * state change might occur while rsxx_get_card_state() is + * processing a returned creg cmd. + */ + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_enable_ier_and_isr(card, CR_INTR_EVENT); + spin_unlock_irqrestore(&card->irq_lock, flags); + + st = rsxx_get_card_state(card, &state); + if (st) { + dev_info(CARD_TO_DEV(card), + "Failed reading state after event.\n"); + return; + } + + if (card->state != state) + card_state_change(card, state); + + if (card->creg_ctrl.creg_stats.stat & CREG_STAT_LOG_PENDING) + rsxx_read_hw_log(card); +} + +/*----------------- Card Operations -------------------*/ +static int card_shutdown(struct rsxx_cardinfo *card) +{ + unsigned int state; + signed long start; + const int timeout = msecs_to_jiffies(120000); + int st; + + /* We can't issue a shutdown if the card is in a transition state */ + start = jiffies; + do { + st = rsxx_get_card_state(card, &state); + if (st) + return st; + } while (state == CARD_STATE_STARTING && + (jiffies - start < timeout)); + + if (state == CARD_STATE_STARTING) + return -ETIMEDOUT; + + /* Only issue a shutdown if we need to */ + if ((state != CARD_STATE_SHUTTING_DOWN) && + (state != CARD_STATE_SHUTDOWN)) { + st = rsxx_issue_card_cmd(card, CARD_CMD_SHUTDOWN); + if (st) + return st; + } + + start = jiffies; + do { + st = rsxx_get_card_state(card, &state); + if (st) + return st; + } while (state != CARD_STATE_SHUTDOWN && + (jiffies - start < timeout)); + + if (state != CARD_STATE_SHUTDOWN) + return -ETIMEDOUT; + + return 0; +} + +/*----------------- Driver Initialization & Setup -------------------*/ +/* Returns: 0 if the driver is compatible with the device + -1 if the driver is NOT compatible with the device */ +static int rsxx_compatibility_check(struct rsxx_cardinfo *card) +{ + unsigned char pci_rev; + + pci_read_config_byte(card->dev, PCI_REVISION_ID, &pci_rev); + + if (pci_rev > RS70_PCI_REV_SUPPORTED) + return -1; + return 0; +} + +static int rsxx_pci_probe(struct pci_dev *dev, + const struct pci_device_id *id) +{ + struct rsxx_cardinfo *card; + int st; + + dev_info(&dev->dev, "PCI-Flash SSD discovered\n"); + + card = kzalloc(sizeof(*card), GFP_KERNEL); + if (!card) + return -ENOMEM; + + card->dev = dev; + pci_set_drvdata(dev, card); + + do { + if (!ida_pre_get(&rsxx_disk_ida, GFP_KERNEL)) { + st = -ENOMEM; + goto failed_ida_get; + } + + spin_lock(&rsxx_ida_lock); + st = ida_get_new(&rsxx_disk_ida, &card->disk_id); + spin_unlock(&rsxx_ida_lock); + } while (st == -EAGAIN); + + if (st) + goto failed_ida_get; + + st = pci_enable_device(dev); + if (st) + goto failed_enable; + + pci_set_master(dev); + pci_set_dma_max_seg_size(dev, RSXX_HW_BLK_SIZE); + + st = pci_set_dma_mask(dev, DMA_BIT_MASK(64)); + if (st) { + dev_err(CARD_TO_DEV(card), + "No usable DMA configuration,aborting\n"); + goto failed_dma_mask; + } + + st = pci_request_regions(dev, DRIVER_NAME); + if (st) { + dev_err(CARD_TO_DEV(card), + "Failed to request memory region\n"); + goto failed_request_regions; + } + + if (pci_resource_len(dev, 0) == 0) { + dev_err(CARD_TO_DEV(card), "BAR0 has length 0!\n"); + st = -ENOMEM; + goto failed_iomap; + } + + card->regmap = pci_iomap(dev, 0, 0); + if (!card->regmap) { + dev_err(CARD_TO_DEV(card), "Failed to map BAR0\n"); + st = -ENOMEM; + goto failed_iomap; + } + + spin_lock_init(&card->irq_lock); + card->halt = 0; + + spin_lock_irq(&card->irq_lock); + rsxx_disable_ier_and_isr(card, CR_INTR_ALL); + spin_unlock_irq(&card->irq_lock); + + if (!force_legacy) { + st = pci_enable_msi(dev); + if (st) + dev_warn(CARD_TO_DEV(card), + "Failed to enable MSI\n"); + } + + st = request_irq(dev->irq, rsxx_isr, IRQF_DISABLED | IRQF_SHARED, + DRIVER_NAME, card); + if (st) { + dev_err(CARD_TO_DEV(card), + "Failed requesting IRQ%d\n", dev->irq); + goto failed_irq; + } + + /************* Setup Processor Command Interface *************/ + rsxx_creg_setup(card); + + spin_lock_irq(&card->irq_lock); + rsxx_enable_ier_and_isr(card, CR_INTR_CREG); + spin_unlock_irq(&card->irq_lock); + + st = rsxx_compatibility_check(card); + if (st) { + dev_warn(CARD_TO_DEV(card), + "Incompatible driver detected. Please update the driver.\n"); + st = -EINVAL; + goto failed_compatiblity_check; + } + + /************* Load Card Config *************/ + st = rsxx_load_config(card); + if (st) + dev_err(CARD_TO_DEV(card), + "Failed loading card config\n"); + + /************* Setup DMA Engine *************/ + st = rsxx_get_num_targets(card, &card->n_targets); + if (st) + dev_info(CARD_TO_DEV(card), + "Failed reading the number of DMA targets\n"); + + card->ctrl = kzalloc(card->n_targets * sizeof(*card->ctrl), GFP_KERNEL); + if (!card->ctrl) { + st = -ENOMEM; + goto failed_dma_setup; + } + + st = rsxx_dma_setup(card); + if (st) { + dev_info(CARD_TO_DEV(card), + "Failed to setup DMA engine\n"); + goto failed_dma_setup; + } + + /************* Setup Card Event Handler *************/ + INIT_WORK(&card->event_work, card_event_handler); + + st = rsxx_setup_dev(card); + if (st) + goto failed_create_dev; + + rsxx_get_card_state(card, &card->state); + + dev_info(CARD_TO_DEV(card), + "card state: %s\n", + rsxx_card_state_to_str(card->state)); + + /* + * Now that the DMA Engine and devices have been setup, + * we can enable the event interrupt(it kicks off actions in + * those layers so we couldn't enable it right away.) + */ + spin_lock_irq(&card->irq_lock); + rsxx_enable_ier_and_isr(card, CR_INTR_EVENT); + spin_unlock_irq(&card->irq_lock); + + if (card->state == CARD_STATE_SHUTDOWN) { + st = rsxx_issue_card_cmd(card, CARD_CMD_STARTUP); + if (st) + dev_crit(CARD_TO_DEV(card), + "Failed issuing card startup\n"); + } else if (card->state == CARD_STATE_GOOD || + card->state == CARD_STATE_RD_ONLY_FAULT) { + st = rsxx_get_card_size8(card, &card->size8); + if (st) + card->size8 = 0; + } + + rsxx_attach_dev(card); + + return 0; + +failed_create_dev: + rsxx_dma_destroy(card); +failed_dma_setup: +failed_compatiblity_check: + spin_lock_irq(&card->irq_lock); + rsxx_disable_ier_and_isr(card, CR_INTR_ALL); + spin_unlock_irq(&card->irq_lock); + free_irq(dev->irq, card); + if (!force_legacy) + pci_disable_msi(dev); +failed_irq: + pci_iounmap(dev, card->regmap); +failed_iomap: + pci_release_regions(dev); +failed_request_regions: +failed_dma_mask: + pci_disable_device(dev); +failed_enable: + spin_lock(&rsxx_ida_lock); + ida_remove(&rsxx_disk_ida, card->disk_id); + spin_unlock(&rsxx_ida_lock); +failed_ida_get: + kfree(card); + + return st; +} + +static void rsxx_pci_remove(struct pci_dev *dev) +{ + struct rsxx_cardinfo *card = pci_get_drvdata(dev); + unsigned long flags; + int st; + int i; + + if (!card) + return; + + dev_info(CARD_TO_DEV(card), + "Removing PCI-Flash SSD.\n"); + + rsxx_detach_dev(card); + + for (i = 0; i < card->n_targets; i++) { + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_DMA(i)); + spin_unlock_irqrestore(&card->irq_lock, flags); + } + + st = card_shutdown(card); + if (st) + dev_crit(CARD_TO_DEV(card), "Shutdown failed!\n"); + + /* Sync outstanding event handlers. */ + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_EVENT); + spin_unlock_irqrestore(&card->irq_lock, flags); + + /* Prevent work_structs from re-queuing themselves. */ + card->halt = 1; + + cancel_work_sync(&card->event_work); + + rsxx_destroy_dev(card); + rsxx_dma_destroy(card); + + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_ALL); + spin_unlock_irqrestore(&card->irq_lock, flags); + free_irq(dev->irq, card); + + if (!force_legacy) + pci_disable_msi(dev); + + rsxx_creg_destroy(card); + + pci_iounmap(dev, card->regmap); + + pci_disable_device(dev); + pci_release_regions(dev); + + kfree(card); +} + +static int rsxx_pci_suspend(struct pci_dev *dev, pm_message_t state) +{ + /* We don't support suspend at this time. */ + return -ENOSYS; +} + +static void rsxx_pci_shutdown(struct pci_dev *dev) +{ + struct rsxx_cardinfo *card = pci_get_drvdata(dev); + unsigned long flags; + int i; + + if (!card) + return; + + dev_info(CARD_TO_DEV(card), "Shutting down PCI-Flash SSD.\n"); + + rsxx_detach_dev(card); + + for (i = 0; i < card->n_targets; i++) { + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_DMA(i)); + spin_unlock_irqrestore(&card->irq_lock, flags); + } + + card_shutdown(card); +} + +static DEFINE_PCI_DEVICE_TABLE(rsxx_pci_ids) = { + {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70_FLASH)}, + {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70D_FLASH)}, + {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS80_FLASH)}, + {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS81_FLASH)}, + {0,}, +}; + +MODULE_DEVICE_TABLE(pci, rsxx_pci_ids); + +static struct pci_driver rsxx_pci_driver = { + .name = DRIVER_NAME, + .id_table = rsxx_pci_ids, + .probe = rsxx_pci_probe, + .remove = rsxx_pci_remove, + .suspend = rsxx_pci_suspend, + .shutdown = rsxx_pci_shutdown, +}; + +static int __init rsxx_core_init(void) +{ + int st; + + st = rsxx_dev_init(); + if (st) + return st; + + st = rsxx_dma_init(); + if (st) + goto dma_init_failed; + + st = rsxx_creg_init(); + if (st) + goto creg_init_failed; + + return pci_register_driver(&rsxx_pci_driver); + +creg_init_failed: + rsxx_dma_cleanup(); +dma_init_failed: + rsxx_dev_cleanup(); + + return st; +} + +static void __exit rsxx_core_cleanup(void) +{ + pci_unregister_driver(&rsxx_pci_driver); + rsxx_creg_cleanup(); + rsxx_dma_cleanup(); + rsxx_dev_cleanup(); +} + +module_init(rsxx_core_init); +module_exit(rsxx_core_cleanup); diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c new file mode 100644 index 000000000000..80bbe639fccd --- /dev/null +++ b/drivers/block/rsxx/cregs.c @@ -0,0 +1,758 @@ +/* +* Filename: cregs.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/completion.h> +#include <linux/slab.h> + +#include "rsxx_priv.h" + +#define CREG_TIMEOUT_MSEC 10000 + +typedef void (*creg_cmd_cb)(struct rsxx_cardinfo *card, + struct creg_cmd *cmd, + int st); + +struct creg_cmd { + struct list_head list; + creg_cmd_cb cb; + void *cb_private; + unsigned int op; + unsigned int addr; + int cnt8; + void *buf; + unsigned int stream; + unsigned int status; +}; + +static struct kmem_cache *creg_cmd_pool; + + +/*------------ Private Functions --------------*/ + +#if defined(__LITTLE_ENDIAN) +#define LITTLE_ENDIAN 1 +#elif defined(__BIG_ENDIAN) +#define LITTLE_ENDIAN 0 +#else +#error Unknown endianess!!! Aborting... +#endif + +static void copy_to_creg_data(struct rsxx_cardinfo *card, + int cnt8, + void *buf, + unsigned int stream) +{ + int i = 0; + u32 *data = buf; + + for (i = 0; cnt8 > 0; i++, cnt8 -= 4) { + /* + * Firmware implementation makes it necessary to byte swap on + * little endian processors. + */ + if (LITTLE_ENDIAN && stream) + iowrite32be(data[i], card->regmap + CREG_DATA(i)); + else + iowrite32(data[i], card->regmap + CREG_DATA(i)); + } +} + + +static void copy_from_creg_data(struct rsxx_cardinfo *card, + int cnt8, + void *buf, + unsigned int stream) +{ + int i = 0; + u32 *data = buf; + + for (i = 0; cnt8 > 0; i++, cnt8 -= 4) { + /* + * Firmware implementation makes it necessary to byte swap on + * little endian processors. + */ + if (LITTLE_ENDIAN && stream) + data[i] = ioread32be(card->regmap + CREG_DATA(i)); + else + data[i] = ioread32(card->regmap + CREG_DATA(i)); + } +} + +static struct creg_cmd *pop_active_cmd(struct rsxx_cardinfo *card) +{ + struct creg_cmd *cmd; + + /* + * Spin lock is needed because this can be called in atomic/interrupt + * context. + */ + spin_lock_bh(&card->creg_ctrl.lock); + cmd = card->creg_ctrl.active_cmd; + card->creg_ctrl.active_cmd = NULL; + spin_unlock_bh(&card->creg_ctrl.lock); + + return cmd; +} + +static void creg_issue_cmd(struct rsxx_cardinfo *card, struct creg_cmd *cmd) +{ + iowrite32(cmd->addr, card->regmap + CREG_ADD); + iowrite32(cmd->cnt8, card->regmap + CREG_CNT); + + if (cmd->op == CREG_OP_WRITE) { + if (cmd->buf) + copy_to_creg_data(card, cmd->cnt8, + cmd->buf, cmd->stream); + } + + /* + * Data copy must complete before initiating the command. This is + * needed for weakly ordered processors (i.e. PowerPC), so that all + * neccessary registers are written before we kick the hardware. + */ + wmb(); + + /* Setting the valid bit will kick off the command. */ + iowrite32(cmd->op, card->regmap + CREG_CMD); +} + +static void creg_kick_queue(struct rsxx_cardinfo *card) +{ + if (card->creg_ctrl.active || list_empty(&card->creg_ctrl.queue)) + return; + + card->creg_ctrl.active = 1; + card->creg_ctrl.active_cmd = list_first_entry(&card->creg_ctrl.queue, + struct creg_cmd, list); + list_del(&card->creg_ctrl.active_cmd->list); + card->creg_ctrl.q_depth--; + + /* + * We have to set the timer before we push the new command. Otherwise, + * we could create a race condition that would occur if the timer + * was not canceled, and expired after the new command was pushed, + * but before the command was issued to hardware. + */ + mod_timer(&card->creg_ctrl.cmd_timer, + jiffies + msecs_to_jiffies(CREG_TIMEOUT_MSEC)); + + creg_issue_cmd(card, card->creg_ctrl.active_cmd); +} + +static int creg_queue_cmd(struct rsxx_cardinfo *card, + unsigned int op, + unsigned int addr, + unsigned int cnt8, + void *buf, + int stream, + creg_cmd_cb callback, + void *cb_private) +{ + struct creg_cmd *cmd; + + /* Don't queue stuff up if we're halted. */ + if (unlikely(card->halt)) + return -EINVAL; + + if (card->creg_ctrl.reset) + return -EAGAIN; + + if (cnt8 > MAX_CREG_DATA8) + return -EINVAL; + + cmd = kmem_cache_alloc(creg_cmd_pool, GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + INIT_LIST_HEAD(&cmd->list); + + cmd->op = op; + cmd->addr = addr; + cmd->cnt8 = cnt8; + cmd->buf = buf; + cmd->stream = stream; + cmd->cb = callback; + cmd->cb_private = cb_private; + cmd->status = 0; + + spin_lock(&card->creg_ctrl.lock); + list_add_tail(&cmd->list, &card->creg_ctrl.queue); + card->creg_ctrl.q_depth++; + creg_kick_queue(card); + spin_unlock(&card->creg_ctrl.lock); + + return 0; +} + +static void creg_cmd_timed_out(unsigned long data) +{ + struct rsxx_cardinfo *card = (struct rsxx_cardinfo *) data; + struct creg_cmd *cmd; + + cmd = pop_active_cmd(card); + if (cmd == NULL) { + card->creg_ctrl.creg_stats.creg_timeout++; + dev_warn(CARD_TO_DEV(card), + "No active command associated with timeout!\n"); + return; + } + + if (cmd->cb) + cmd->cb(card, cmd, -ETIMEDOUT); + + kmem_cache_free(creg_cmd_pool, cmd); + + + spin_lock(&card->creg_ctrl.lock); + card->creg_ctrl.active = 0; + creg_kick_queue(card); + spin_unlock(&card->creg_ctrl.lock); +} + + +static void creg_cmd_done(struct work_struct *work) +{ + struct rsxx_cardinfo *card; + struct creg_cmd *cmd; + int st = 0; + + card = container_of(work, struct rsxx_cardinfo, + creg_ctrl.done_work); + + /* + * The timer could not be cancelled for some reason, + * race to pop the active command. + */ + if (del_timer_sync(&card->creg_ctrl.cmd_timer) == 0) + card->creg_ctrl.creg_stats.failed_cancel_timer++; + + cmd = pop_active_cmd(card); + if (cmd == NULL) { + dev_err(CARD_TO_DEV(card), + "Spurious creg interrupt!\n"); + return; + } + + card->creg_ctrl.creg_stats.stat = ioread32(card->regmap + CREG_STAT); + cmd->status = card->creg_ctrl.creg_stats.stat; + if ((cmd->status & CREG_STAT_STATUS_MASK) == 0) { + dev_err(CARD_TO_DEV(card), + "Invalid status on creg command\n"); + /* + * At this point we're probably reading garbage from HW. Don't + * do anything else that could mess up the system and let + * the sync function return an error. + */ + st = -EIO; + goto creg_done; + } else if (cmd->status & CREG_STAT_ERROR) { + st = -EIO; + } + + if ((cmd->op == CREG_OP_READ)) { + unsigned int cnt8 = ioread32(card->regmap + CREG_CNT); + + /* Paranoid Sanity Checks */ + if (!cmd->buf) { + dev_err(CARD_TO_DEV(card), + "Buffer not given for read.\n"); + st = -EIO; + goto creg_done; + } + if (cnt8 != cmd->cnt8) { + dev_err(CARD_TO_DEV(card), + "count mismatch\n"); + st = -EIO; + goto creg_done; + } + + copy_from_creg_data(card, cnt8, cmd->buf, cmd->stream); + } + +creg_done: + if (cmd->cb) + cmd->cb(card, cmd, st); + + kmem_cache_free(creg_cmd_pool, cmd); + + spin_lock(&card->creg_ctrl.lock); + card->creg_ctrl.active = 0; + creg_kick_queue(card); + spin_unlock(&card->creg_ctrl.lock); +} + +static void creg_reset(struct rsxx_cardinfo *card) +{ + struct creg_cmd *cmd = NULL; + struct creg_cmd *tmp; + unsigned long flags; + + /* + * mutex_trylock is used here because if reset_lock is taken then a + * reset is already happening. So, we can just go ahead and return. + */ + if (!mutex_trylock(&card->creg_ctrl.reset_lock)) + return; + + card->creg_ctrl.reset = 1; + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_CREG | CR_INTR_EVENT); + spin_unlock_irqrestore(&card->irq_lock, flags); + + dev_warn(CARD_TO_DEV(card), + "Resetting creg interface for recovery\n"); + + /* Cancel outstanding commands */ + spin_lock(&card->creg_ctrl.lock); + list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) { + list_del(&cmd->list); + card->creg_ctrl.q_depth--; + if (cmd->cb) + cmd->cb(card, cmd, -ECANCELED); + kmem_cache_free(creg_cmd_pool, cmd); + } + + cmd = card->creg_ctrl.active_cmd; + card->creg_ctrl.active_cmd = NULL; + if (cmd) { + if (timer_pending(&card->creg_ctrl.cmd_timer)) + del_timer_sync(&card->creg_ctrl.cmd_timer); + + if (cmd->cb) + cmd->cb(card, cmd, -ECANCELED); + kmem_cache_free(creg_cmd_pool, cmd); + + card->creg_ctrl.active = 0; + } + spin_unlock(&card->creg_ctrl.lock); + + card->creg_ctrl.reset = 0; + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_enable_ier_and_isr(card, CR_INTR_CREG | CR_INTR_EVENT); + spin_unlock_irqrestore(&card->irq_lock, flags); + + mutex_unlock(&card->creg_ctrl.reset_lock); +} + +/* Used for synchronous accesses */ +struct creg_completion { + struct completion *cmd_done; + int st; + u32 creg_status; +}; + +static void creg_cmd_done_cb(struct rsxx_cardinfo *card, + struct creg_cmd *cmd, + int st) +{ + struct creg_completion *cmd_completion; + + cmd_completion = cmd->cb_private; + BUG_ON(!cmd_completion); + + cmd_completion->st = st; + cmd_completion->creg_status = cmd->status; + complete(cmd_completion->cmd_done); +} + +static int __issue_creg_rw(struct rsxx_cardinfo *card, + unsigned int op, + unsigned int addr, + unsigned int cnt8, + void *buf, + int stream, + unsigned int *hw_stat) +{ + DECLARE_COMPLETION_ONSTACK(cmd_done); + struct creg_completion completion; + unsigned long timeout; + int st; + + completion.cmd_done = &cmd_done; + completion.st = 0; + completion.creg_status = 0; + + st = creg_queue_cmd(card, op, addr, cnt8, buf, stream, creg_cmd_done_cb, + &completion); + if (st) + return st; + + /* + * This timeout is neccessary for unresponsive hardware. The additional + * 20 seconds to used to guarantee that each cregs requests has time to + * complete. + */ + timeout = msecs_to_jiffies((CREG_TIMEOUT_MSEC * + card->creg_ctrl.q_depth) + 20000); + + /* + * The creg interface is guaranteed to complete. It has a timeout + * mechanism that will kick in if hardware does not respond. + */ + st = wait_for_completion_timeout(completion.cmd_done, timeout); + if (st == 0) { + /* + * This is really bad, because the kernel timer did not + * expire and notify us of a timeout! + */ + dev_crit(CARD_TO_DEV(card), + "cregs timer failed\n"); + creg_reset(card); + return -EIO; + } + + *hw_stat = completion.creg_status; + + if (completion.st) { + dev_warn(CARD_TO_DEV(card), + "creg command failed(%d x%08x)\n", + completion.st, addr); + return completion.st; + } + + return 0; +} + +static int issue_creg_rw(struct rsxx_cardinfo *card, + u32 addr, + unsigned int size8, + void *data, + int stream, + int read) +{ + unsigned int hw_stat; + unsigned int xfer; + unsigned int op; + int st; + + op = read ? CREG_OP_READ : CREG_OP_WRITE; + + do { + xfer = min_t(unsigned int, size8, MAX_CREG_DATA8); + + st = __issue_creg_rw(card, op, addr, xfer, + data, stream, &hw_stat); + if (st) + return st; + + data = (char *)data + xfer; + addr += xfer; + size8 -= xfer; + } while (size8); + + return 0; +} + +/* ---------------------------- Public API ---------------------------------- */ +int rsxx_creg_write(struct rsxx_cardinfo *card, + u32 addr, + unsigned int size8, + void *data, + int byte_stream) +{ + return issue_creg_rw(card, addr, size8, data, byte_stream, 0); +} + +int rsxx_creg_read(struct rsxx_cardinfo *card, + u32 addr, + unsigned int size8, + void *data, + int byte_stream) +{ + return issue_creg_rw(card, addr, size8, data, byte_stream, 1); +} + +int rsxx_get_card_state(struct rsxx_cardinfo *card, unsigned int *state) +{ + return rsxx_creg_read(card, CREG_ADD_CARD_STATE, + sizeof(*state), state, 0); +} + +int rsxx_get_card_size8(struct rsxx_cardinfo *card, u64 *size8) +{ + unsigned int size; + int st; + + st = rsxx_creg_read(card, CREG_ADD_CARD_SIZE, + sizeof(size), &size, 0); + if (st) + return st; + + *size8 = (u64)size * RSXX_HW_BLK_SIZE; + return 0; +} + +int rsxx_get_num_targets(struct rsxx_cardinfo *card, + unsigned int *n_targets) +{ + return rsxx_creg_read(card, CREG_ADD_NUM_TARGETS, + sizeof(*n_targets), n_targets, 0); +} + +int rsxx_get_card_capabilities(struct rsxx_cardinfo *card, + u32 *capabilities) +{ + return rsxx_creg_read(card, CREG_ADD_CAPABILITIES, + sizeof(*capabilities), capabilities, 0); +} + +int rsxx_issue_card_cmd(struct rsxx_cardinfo *card, u32 cmd) +{ + return rsxx_creg_write(card, CREG_ADD_CARD_CMD, + sizeof(cmd), &cmd, 0); +} + + +/*----------------- HW Log Functions -------------------*/ +static void hw_log_msg(struct rsxx_cardinfo *card, const char *str, int len) +{ + static char level; + + /* + * New messages start with "<#>", where # is the log level. Messages + * that extend past the log buffer will use the previous level + */ + if ((len > 3) && (str[0] == '<') && (str[2] == '>')) { + level = str[1]; + str += 3; /* Skip past the log level. */ + len -= 3; + } + + switch (level) { + case '0': + dev_emerg(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '1': + dev_alert(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '2': + dev_crit(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '3': + dev_err(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '4': + dev_warn(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '5': + dev_notice(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '6': + dev_info(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '7': + dev_dbg(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + default: + dev_info(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + } +} + +/* + * The substrncpy function copies the src string (which includes the + * terminating '\0' character), up to the count into the dest pointer. + * Returns the number of bytes copied to dest. + */ +static int substrncpy(char *dest, const char *src, int count) +{ + int max_cnt = count; + + while (count) { + count--; + *dest = *src; + if (*dest == '\0') + break; + src++; + dest++; + } + return max_cnt - count; +} + + +static void read_hw_log_done(struct rsxx_cardinfo *card, + struct creg_cmd *cmd, + int st) +{ + char *buf; + char *log_str; + int cnt; + int len; + int off; + + buf = cmd->buf; + off = 0; + + /* Failed getting the log message */ + if (st) + return; + + while (off < cmd->cnt8) { + log_str = &card->log.buf[card->log.buf_len]; + cnt = min(cmd->cnt8 - off, LOG_BUF_SIZE8 - card->log.buf_len); + len = substrncpy(log_str, &buf[off], cnt); + + off += len; + card->log.buf_len += len; + + /* + * Flush the log if we've hit the end of a message or if we've + * run out of buffer space. + */ + if ((log_str[len - 1] == '\0') || + (card->log.buf_len == LOG_BUF_SIZE8)) { + if (card->log.buf_len != 1) /* Don't log blank lines. */ + hw_log_msg(card, card->log.buf, + card->log.buf_len); + card->log.buf_len = 0; + } + + } + + if (cmd->status & CREG_STAT_LOG_PENDING) + rsxx_read_hw_log(card); +} + +int rsxx_read_hw_log(struct rsxx_cardinfo *card) +{ + int st; + + st = creg_queue_cmd(card, CREG_OP_READ, CREG_ADD_LOG, + sizeof(card->log.tmp), card->log.tmp, + 1, read_hw_log_done, NULL); + if (st) + dev_err(CARD_TO_DEV(card), + "Failed getting log text\n"); + + return st; +} + +/*-------------- IOCTL REG Access ------------------*/ +static int issue_reg_cmd(struct rsxx_cardinfo *card, + struct rsxx_reg_access *cmd, + int read) +{ + unsigned int op = read ? CREG_OP_READ : CREG_OP_WRITE; + + return __issue_creg_rw(card, op, cmd->addr, cmd->cnt, cmd->data, + cmd->stream, &cmd->stat); +} + +int rsxx_reg_access(struct rsxx_cardinfo *card, + struct rsxx_reg_access __user *ucmd, + int read) +{ + struct rsxx_reg_access cmd; + int st; + + st = copy_from_user(&cmd, ucmd, sizeof(cmd)); + if (st) + return -EFAULT; + + if (cmd.cnt > RSXX_MAX_REG_CNT) + return -EFAULT; + + st = issue_reg_cmd(card, &cmd, read); + if (st) + return st; + + st = put_user(cmd.stat, &ucmd->stat); + if (st) + return -EFAULT; + + if (read) { + st = copy_to_user(ucmd->data, cmd.data, cmd.cnt); + if (st) + return -EFAULT; + } + + return 0; +} + +/*------------ Initialization & Setup --------------*/ +int rsxx_creg_setup(struct rsxx_cardinfo *card) +{ + card->creg_ctrl.active_cmd = NULL; + + INIT_WORK(&card->creg_ctrl.done_work, creg_cmd_done); + mutex_init(&card->creg_ctrl.reset_lock); + INIT_LIST_HEAD(&card->creg_ctrl.queue); + spin_lock_init(&card->creg_ctrl.lock); + setup_timer(&card->creg_ctrl.cmd_timer, creg_cmd_timed_out, + (unsigned long) card); + + return 0; +} + +void rsxx_creg_destroy(struct rsxx_cardinfo *card) +{ + struct creg_cmd *cmd; + struct creg_cmd *tmp; + int cnt = 0; + + /* Cancel outstanding commands */ + spin_lock(&card->creg_ctrl.lock); + list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) { + list_del(&cmd->list); + if (cmd->cb) + cmd->cb(card, cmd, -ECANCELED); + kmem_cache_free(creg_cmd_pool, cmd); + cnt++; + } + + if (cnt) + dev_info(CARD_TO_DEV(card), + "Canceled %d queue creg commands\n", cnt); + + cmd = card->creg_ctrl.active_cmd; + card->creg_ctrl.active_cmd = NULL; + if (cmd) { + if (timer_pending(&card->creg_ctrl.cmd_timer)) + del_timer_sync(&card->creg_ctrl.cmd_timer); + + if (cmd->cb) + cmd->cb(card, cmd, -ECANCELED); + dev_info(CARD_TO_DEV(card), + "Canceled active creg command\n"); + kmem_cache_free(creg_cmd_pool, cmd); + } + spin_unlock(&card->creg_ctrl.lock); + + cancel_work_sync(&card->creg_ctrl.done_work); +} + + +int rsxx_creg_init(void) +{ + creg_cmd_pool = KMEM_CACHE(creg_cmd, SLAB_HWCACHE_ALIGN); + if (!creg_cmd_pool) + return -ENOMEM; + + return 0; +} + +void rsxx_creg_cleanup(void) +{ + kmem_cache_destroy(creg_cmd_pool); +} diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c new file mode 100644 index 000000000000..4346d17d2949 --- /dev/null +++ b/drivers/block/rsxx/dev.c @@ -0,0 +1,367 @@ +/* +* Filename: dev.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/slab.h> + +#include <linux/hdreg.h> +#include <linux/genhd.h> +#include <linux/blkdev.h> +#include <linux/bio.h> + +#include <linux/fs.h> + +#include "rsxx_priv.h" + +static unsigned int blkdev_minors = 64; +module_param(blkdev_minors, uint, 0444); +MODULE_PARM_DESC(blkdev_minors, "Number of minors(partitions)"); + +/* + * For now I'm making this tweakable in case any applications hit this limit. + * If you see a "bio too big" error in the log you will need to raise this + * value. + */ +static unsigned int blkdev_max_hw_sectors = 1024; +module_param(blkdev_max_hw_sectors, uint, 0444); +MODULE_PARM_DESC(blkdev_max_hw_sectors, "Max hw sectors for a single BIO"); + +static unsigned int enable_blkdev = 1; +module_param(enable_blkdev , uint, 0444); +MODULE_PARM_DESC(enable_blkdev, "Enable block device interfaces"); + + +struct rsxx_bio_meta { + struct bio *bio; + atomic_t pending_dmas; + atomic_t error; + unsigned long start_time; +}; + +static struct kmem_cache *bio_meta_pool; + +/*----------------- Block Device Operations -----------------*/ +static int rsxx_blkdev_ioctl(struct block_device *bdev, + fmode_t mode, + unsigned int cmd, + unsigned long arg) +{ + struct rsxx_cardinfo *card = bdev->bd_disk->private_data; + + switch (cmd) { + case RSXX_GETREG: + return rsxx_reg_access(card, (void __user *)arg, 1); + case RSXX_SETREG: + return rsxx_reg_access(card, (void __user *)arg, 0); + } + + return -ENOTTY; +} + +static int rsxx_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct rsxx_cardinfo *card = bdev->bd_disk->private_data; + u64 blocks = card->size8 >> 9; + + /* + * get geometry: Fake it. I haven't found any drivers that set + * geo->start, so we won't either. + */ + if (card->size8) { + geo->heads = 64; + geo->sectors = 16; + do_div(blocks, (geo->heads * geo->sectors)); + geo->cylinders = blocks; + } else { + geo->heads = 0; + geo->sectors = 0; + geo->cylinders = 0; + } + return 0; +} + +static const struct block_device_operations rsxx_fops = { + .owner = THIS_MODULE, + .getgeo = rsxx_getgeo, + .ioctl = rsxx_blkdev_ioctl, +}; + +static void disk_stats_start(struct rsxx_cardinfo *card, struct bio *bio) +{ + struct hd_struct *part0 = &card->gendisk->part0; + int rw = bio_data_dir(bio); + int cpu; + + cpu = part_stat_lock(); + + part_round_stats(cpu, part0); + part_inc_in_flight(part0, rw); + + part_stat_unlock(); +} + +static void disk_stats_complete(struct rsxx_cardinfo *card, + struct bio *bio, + unsigned long start_time) +{ + struct hd_struct *part0 = &card->gendisk->part0; + unsigned long duration = jiffies - start_time; + int rw = bio_data_dir(bio); + int cpu; + + cpu = part_stat_lock(); + + part_stat_add(cpu, part0, sectors[rw], bio_sectors(bio)); + part_stat_inc(cpu, part0, ios[rw]); + part_stat_add(cpu, part0, ticks[rw], duration); + + part_round_stats(cpu, part0); + part_dec_in_flight(part0, rw); + + part_stat_unlock(); +} + +static void bio_dma_done_cb(struct rsxx_cardinfo *card, + void *cb_data, + unsigned int error) +{ + struct rsxx_bio_meta *meta = cb_data; + + if (error) + atomic_set(&meta->error, 1); + + if (atomic_dec_and_test(&meta->pending_dmas)) { + disk_stats_complete(card, meta->bio, meta->start_time); + + bio_endio(meta->bio, atomic_read(&meta->error) ? -EIO : 0); + kmem_cache_free(bio_meta_pool, meta); + } +} + +static void rsxx_make_request(struct request_queue *q, struct bio *bio) +{ + struct rsxx_cardinfo *card = q->queuedata; + struct rsxx_bio_meta *bio_meta; + int st = -EINVAL; + + might_sleep(); + + if (unlikely(card->halt)) { + st = -EFAULT; + goto req_err; + } + + if (unlikely(card->dma_fault)) { + st = (-EFAULT); + goto req_err; + } + + if (bio->bi_size == 0) { + dev_err(CARD_TO_DEV(card), "size zero BIO!\n"); + goto req_err; + } + + bio_meta = kmem_cache_alloc(bio_meta_pool, GFP_KERNEL); + if (!bio_meta) { + st = -ENOMEM; + goto req_err; + } + + bio_meta->bio = bio; + atomic_set(&bio_meta->error, 0); + atomic_set(&bio_meta->pending_dmas, 0); + bio_meta->start_time = jiffies; + + disk_stats_start(card, bio); + + dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n", + bio_data_dir(bio) ? 'W' : 'R', bio_meta, + (u64)bio->bi_sector << 9, bio->bi_size); + + st = rsxx_dma_queue_bio(card, bio, &bio_meta->pending_dmas, + bio_dma_done_cb, bio_meta); + if (st) + goto queue_err; + + return; + +queue_err: + kmem_cache_free(bio_meta_pool, bio_meta); +req_err: + bio_endio(bio, st); +} + +/*----------------- Device Setup -------------------*/ +static bool rsxx_discard_supported(struct rsxx_cardinfo *card) +{ + unsigned char pci_rev; + + pci_read_config_byte(card->dev, PCI_REVISION_ID, &pci_rev); + + return (pci_rev >= RSXX_DISCARD_SUPPORT); +} + +static unsigned short rsxx_get_logical_block_size( + struct rsxx_cardinfo *card) +{ + u32 capabilities = 0; + int st; + + st = rsxx_get_card_capabilities(card, &capabilities); + if (st) + dev_warn(CARD_TO_DEV(card), + "Failed reading card capabilities register\n"); + + /* Earlier firmware did not have support for 512 byte accesses */ + if (capabilities & CARD_CAP_SUBPAGE_WRITES) + return 512; + else + return RSXX_HW_BLK_SIZE; +} + +int rsxx_attach_dev(struct rsxx_cardinfo *card) +{ + mutex_lock(&card->dev_lock); + + /* The block device requires the stripe size from the config. */ + if (enable_blkdev) { + if (card->config_valid) + set_capacity(card->gendisk, card->size8 >> 9); + else + set_capacity(card->gendisk, 0); + add_disk(card->gendisk); + + card->bdev_attached = 1; + } + + mutex_unlock(&card->dev_lock); + + return 0; +} + +void rsxx_detach_dev(struct rsxx_cardinfo *card) +{ + mutex_lock(&card->dev_lock); + + if (card->bdev_attached) { + del_gendisk(card->gendisk); + card->bdev_attached = 0; + } + + mutex_unlock(&card->dev_lock); +} + +int rsxx_setup_dev(struct rsxx_cardinfo *card) +{ + unsigned short blk_size; + + mutex_init(&card->dev_lock); + + if (!enable_blkdev) + return 0; + + card->major = register_blkdev(0, DRIVER_NAME); + if (card->major < 0) { + dev_err(CARD_TO_DEV(card), "Failed to get major number\n"); + return -ENOMEM; + } + + card->queue = blk_alloc_queue(GFP_KERNEL); + if (!card->queue) { + dev_err(CARD_TO_DEV(card), "Failed queue alloc\n"); + unregister_blkdev(card->major, DRIVER_NAME); + return -ENOMEM; + } + + card->gendisk = alloc_disk(blkdev_minors); + if (!card->gendisk) { + dev_err(CARD_TO_DEV(card), "Failed disk alloc\n"); + blk_cleanup_queue(card->queue); + unregister_blkdev(card->major, DRIVER_NAME); + return -ENOMEM; + } + + blk_size = rsxx_get_logical_block_size(card); + + blk_queue_make_request(card->queue, rsxx_make_request); + blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY); + blk_queue_dma_alignment(card->queue, blk_size - 1); + blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors); + blk_queue_logical_block_size(card->queue, blk_size); + blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE); + + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, card->queue); + if (rsxx_discard_supported(card)) { + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, card->queue); + blk_queue_max_discard_sectors(card->queue, + RSXX_HW_BLK_SIZE >> 9); + card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE; + card->queue->limits.discard_alignment = RSXX_HW_BLK_SIZE; + card->queue->limits.discard_zeroes_data = 1; + } + + card->queue->queuedata = card; + + snprintf(card->gendisk->disk_name, sizeof(card->gendisk->disk_name), + "rsxx%d", card->disk_id); + card->gendisk->driverfs_dev = &card->dev->dev; + card->gendisk->major = card->major; + card->gendisk->first_minor = 0; + card->gendisk->fops = &rsxx_fops; + card->gendisk->private_data = card; + card->gendisk->queue = card->queue; + + return 0; +} + +void rsxx_destroy_dev(struct rsxx_cardinfo *card) +{ + if (!enable_blkdev) + return; + + put_disk(card->gendisk); + card->gendisk = NULL; + + blk_cleanup_queue(card->queue); + unregister_blkdev(card->major, DRIVER_NAME); +} + +int rsxx_dev_init(void) +{ + bio_meta_pool = KMEM_CACHE(rsxx_bio_meta, SLAB_HWCACHE_ALIGN); + if (!bio_meta_pool) + return -ENOMEM; + + return 0; +} + +void rsxx_dev_cleanup(void) +{ + kmem_cache_destroy(bio_meta_pool); +} + + diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c new file mode 100644 index 000000000000..63176e67662f --- /dev/null +++ b/drivers/block/rsxx/dma.c @@ -0,0 +1,998 @@ +/* +* Filename: dma.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/slab.h> +#include "rsxx_priv.h" + +struct rsxx_dma { + struct list_head list; + u8 cmd; + unsigned int laddr; /* Logical address on the ramsan */ + struct { + u32 off; + u32 cnt; + } sub_page; + dma_addr_t dma_addr; + struct page *page; + unsigned int pg_off; /* Page Offset */ + rsxx_dma_cb cb; + void *cb_data; +}; + +/* This timeout is used to detect a stalled DMA channel */ +#define DMA_ACTIVITY_TIMEOUT msecs_to_jiffies(10000) + +struct hw_status { + u8 status; + u8 tag; + __le16 count; + __le32 _rsvd2; + __le64 _rsvd3; +} __packed; + +enum rsxx_dma_status { + DMA_SW_ERR = 0x1, + DMA_HW_FAULT = 0x2, + DMA_CANCELLED = 0x4, +}; + +struct hw_cmd { + u8 command; + u8 tag; + u8 _rsvd; + u8 sub_page; /* Bit[0:2]: 512byte offset */ + /* Bit[4:6]: 512byte count */ + __le32 device_addr; + __le64 host_addr; +} __packed; + +enum rsxx_hw_cmd { + HW_CMD_BLK_DISCARD = 0x70, + HW_CMD_BLK_WRITE = 0x80, + HW_CMD_BLK_READ = 0xC0, + HW_CMD_BLK_RECON_READ = 0xE0, +}; + +enum rsxx_hw_status { + HW_STATUS_CRC = 0x01, + HW_STATUS_HARD_ERR = 0x02, + HW_STATUS_SOFT_ERR = 0x04, + HW_STATUS_FAULT = 0x08, +}; + +#define STATUS_BUFFER_SIZE8 4096 +#define COMMAND_BUFFER_SIZE8 4096 + +static struct kmem_cache *rsxx_dma_pool; + +struct dma_tracker { + int next_tag; + struct rsxx_dma *dma; +}; + +#define DMA_TRACKER_LIST_SIZE8 (sizeof(struct dma_tracker_list) + \ + (sizeof(struct dma_tracker) * RSXX_MAX_OUTSTANDING_CMDS)) + +struct dma_tracker_list { + spinlock_t lock; + int head; + struct dma_tracker list[0]; +}; + + +/*----------------- Misc Utility Functions -------------------*/ +static unsigned int rsxx_addr8_to_laddr(u64 addr8, struct rsxx_cardinfo *card) +{ + unsigned long long tgt_addr8; + + tgt_addr8 = ((addr8 >> card->_stripe.upper_shift) & + card->_stripe.upper_mask) | + ((addr8) & card->_stripe.lower_mask); + do_div(tgt_addr8, RSXX_HW_BLK_SIZE); + return tgt_addr8; +} + +static unsigned int rsxx_get_dma_tgt(struct rsxx_cardinfo *card, u64 addr8) +{ + unsigned int tgt; + + tgt = (addr8 >> card->_stripe.target_shift) & card->_stripe.target_mask; + + return tgt; +} + +static void rsxx_dma_queue_reset(struct rsxx_cardinfo *card) +{ + /* Reset all DMA Command/Status Queues */ + iowrite32(DMA_QUEUE_RESET, card->regmap + RESET); +} + +static unsigned int get_dma_size(struct rsxx_dma *dma) +{ + if (dma->sub_page.cnt) + return dma->sub_page.cnt << 9; + else + return RSXX_HW_BLK_SIZE; +} + + +/*----------------- DMA Tracker -------------------*/ +static void set_tracker_dma(struct dma_tracker_list *trackers, + int tag, + struct rsxx_dma *dma) +{ + trackers->list[tag].dma = dma; +} + +static struct rsxx_dma *get_tracker_dma(struct dma_tracker_list *trackers, + int tag) +{ + return trackers->list[tag].dma; +} + +static int pop_tracker(struct dma_tracker_list *trackers) +{ + int tag; + + spin_lock(&trackers->lock); + tag = trackers->head; + if (tag != -1) { + trackers->head = trackers->list[tag].next_tag; + trackers->list[tag].next_tag = -1; + } + spin_unlock(&trackers->lock); + + return tag; +} + +static void push_tracker(struct dma_tracker_list *trackers, int tag) +{ + spin_lock(&trackers->lock); + trackers->list[tag].next_tag = trackers->head; + trackers->head = tag; + trackers->list[tag].dma = NULL; + spin_unlock(&trackers->lock); +} + + +/*----------------- Interrupt Coalescing -------------*/ +/* + * Interrupt Coalescing Register Format: + * Interrupt Timer (64ns units) [15:0] + * Interrupt Count [24:16] + * Reserved [31:25] +*/ +#define INTR_COAL_LATENCY_MASK (0x0000ffff) + +#define INTR_COAL_COUNT_SHIFT 16 +#define INTR_COAL_COUNT_BITS 9 +#define INTR_COAL_COUNT_MASK (((1 << INTR_COAL_COUNT_BITS) - 1) << \ + INTR_COAL_COUNT_SHIFT) +#define INTR_COAL_LATENCY_UNITS_NS 64 + + +static u32 dma_intr_coal_val(u32 mode, u32 count, u32 latency) +{ + u32 latency_units = latency / INTR_COAL_LATENCY_UNITS_NS; + + if (mode == RSXX_INTR_COAL_DISABLED) + return 0; + + return ((count << INTR_COAL_COUNT_SHIFT) & INTR_COAL_COUNT_MASK) | + (latency_units & INTR_COAL_LATENCY_MASK); + +} + +static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card) +{ + int i; + u32 q_depth = 0; + u32 intr_coal; + + if (card->config.data.intr_coal.mode != RSXX_INTR_COAL_AUTO_TUNE) + return; + + for (i = 0; i < card->n_targets; i++) + q_depth += atomic_read(&card->ctrl[i].stats.hw_q_depth); + + intr_coal = dma_intr_coal_val(card->config.data.intr_coal.mode, + q_depth / 2, + card->config.data.intr_coal.latency); + iowrite32(intr_coal, card->regmap + INTR_COAL); +} + +/*----------------- RSXX DMA Handling -------------------*/ +static void rsxx_complete_dma(struct rsxx_cardinfo *card, + struct rsxx_dma *dma, + unsigned int status) +{ + if (status & DMA_SW_ERR) + printk_ratelimited(KERN_ERR + "SW Error in DMA(cmd x%02x, laddr x%08x)\n", + dma->cmd, dma->laddr); + if (status & DMA_HW_FAULT) + printk_ratelimited(KERN_ERR + "HW Fault in DMA(cmd x%02x, laddr x%08x)\n", + dma->cmd, dma->laddr); + if (status & DMA_CANCELLED) + printk_ratelimited(KERN_ERR + "DMA Cancelled(cmd x%02x, laddr x%08x)\n", + dma->cmd, dma->laddr); + + if (dma->dma_addr) + pci_unmap_page(card->dev, dma->dma_addr, get_dma_size(dma), + dma->cmd == HW_CMD_BLK_WRITE ? + PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + + if (dma->cb) + dma->cb(card, dma->cb_data, status ? 1 : 0); + + kmem_cache_free(rsxx_dma_pool, dma); +} + +static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl, + struct rsxx_dma *dma) +{ + /* + * Requeued DMAs go to the front of the queue so they are issued + * first. + */ + spin_lock(&ctrl->queue_lock); + list_add(&dma->list, &ctrl->queue); + spin_unlock(&ctrl->queue_lock); +} + +static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl, + struct rsxx_dma *dma, + u8 hw_st) +{ + unsigned int status = 0; + int requeue_cmd = 0; + + dev_dbg(CARD_TO_DEV(ctrl->card), + "Handling DMA error(cmd x%02x, laddr x%08x st:x%02x)\n", + dma->cmd, dma->laddr, hw_st); + + if (hw_st & HW_STATUS_CRC) + ctrl->stats.crc_errors++; + if (hw_st & HW_STATUS_HARD_ERR) + ctrl->stats.hard_errors++; + if (hw_st & HW_STATUS_SOFT_ERR) + ctrl->stats.soft_errors++; + + switch (dma->cmd) { + case HW_CMD_BLK_READ: + if (hw_st & (HW_STATUS_CRC | HW_STATUS_HARD_ERR)) { + if (ctrl->card->scrub_hard) { + dma->cmd = HW_CMD_BLK_RECON_READ; + requeue_cmd = 1; + ctrl->stats.reads_retried++; + } else { + status |= DMA_HW_FAULT; + ctrl->stats.reads_failed++; + } + } else if (hw_st & HW_STATUS_FAULT) { + status |= DMA_HW_FAULT; + ctrl->stats.reads_failed++; + } + + break; + case HW_CMD_BLK_RECON_READ: + if (hw_st & (HW_STATUS_CRC | HW_STATUS_HARD_ERR)) { + /* Data could not be reconstructed. */ + status |= DMA_HW_FAULT; + ctrl->stats.reads_failed++; + } + + break; + case HW_CMD_BLK_WRITE: + status |= DMA_HW_FAULT; + ctrl->stats.writes_failed++; + + break; + case HW_CMD_BLK_DISCARD: + status |= DMA_HW_FAULT; + ctrl->stats.discards_failed++; + + break; + default: + dev_err(CARD_TO_DEV(ctrl->card), + "Unknown command in DMA!(cmd: x%02x " + "laddr x%08x st: x%02x\n", + dma->cmd, dma->laddr, hw_st); + status |= DMA_SW_ERR; + + break; + } + + if (requeue_cmd) + rsxx_requeue_dma(ctrl, dma); + else + rsxx_complete_dma(ctrl->card, dma, status); +} + +static void dma_engine_stalled(unsigned long data) +{ + struct rsxx_dma_ctrl *ctrl = (struct rsxx_dma_ctrl *)data; + + if (atomic_read(&ctrl->stats.hw_q_depth) == 0) + return; + + if (ctrl->cmd.idx != ioread32(ctrl->regmap + SW_CMD_IDX)) { + /* + * The dma engine was stalled because the SW_CMD_IDX write + * was lost. Issue it again to recover. + */ + dev_warn(CARD_TO_DEV(ctrl->card), + "SW_CMD_IDX write was lost, re-writing...\n"); + iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); + mod_timer(&ctrl->activity_timer, + jiffies + DMA_ACTIVITY_TIMEOUT); + } else { + dev_warn(CARD_TO_DEV(ctrl->card), + "DMA channel %d has stalled, faulting interface.\n", + ctrl->id); + ctrl->card->dma_fault = 1; + } +} + +static void rsxx_issue_dmas(struct work_struct *work) +{ + struct rsxx_dma_ctrl *ctrl; + struct rsxx_dma *dma; + int tag; + int cmds_pending = 0; + struct hw_cmd *hw_cmd_buf; + + ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work); + hw_cmd_buf = ctrl->cmd.buf; + + if (unlikely(ctrl->card->halt)) + return; + + while (1) { + spin_lock(&ctrl->queue_lock); + if (list_empty(&ctrl->queue)) { + spin_unlock(&ctrl->queue_lock); + break; + } + spin_unlock(&ctrl->queue_lock); + + tag = pop_tracker(ctrl->trackers); + if (tag == -1) + break; + + spin_lock(&ctrl->queue_lock); + dma = list_entry(ctrl->queue.next, struct rsxx_dma, list); + list_del(&dma->list); + ctrl->stats.sw_q_depth--; + spin_unlock(&ctrl->queue_lock); + + /* + * This will catch any DMAs that slipped in right before the + * fault, but was queued after all the other DMAs were + * cancelled. + */ + if (unlikely(ctrl->card->dma_fault)) { + push_tracker(ctrl->trackers, tag); + rsxx_complete_dma(ctrl->card, dma, DMA_CANCELLED); + continue; + } + + set_tracker_dma(ctrl->trackers, tag, dma); + hw_cmd_buf[ctrl->cmd.idx].command = dma->cmd; + hw_cmd_buf[ctrl->cmd.idx].tag = tag; + hw_cmd_buf[ctrl->cmd.idx]._rsvd = 0; + hw_cmd_buf[ctrl->cmd.idx].sub_page = + ((dma->sub_page.cnt & 0x7) << 4) | + (dma->sub_page.off & 0x7); + + hw_cmd_buf[ctrl->cmd.idx].device_addr = + cpu_to_le32(dma->laddr); + + hw_cmd_buf[ctrl->cmd.idx].host_addr = + cpu_to_le64(dma->dma_addr); + + dev_dbg(CARD_TO_DEV(ctrl->card), + "Issue DMA%d(laddr %d tag %d) to idx %d\n", + ctrl->id, dma->laddr, tag, ctrl->cmd.idx); + + ctrl->cmd.idx = (ctrl->cmd.idx + 1) & RSXX_CS_IDX_MASK; + cmds_pending++; + + if (dma->cmd == HW_CMD_BLK_WRITE) + ctrl->stats.writes_issued++; + else if (dma->cmd == HW_CMD_BLK_DISCARD) + ctrl->stats.discards_issued++; + else + ctrl->stats.reads_issued++; + } + + /* Let HW know we've queued commands. */ + if (cmds_pending) { + /* + * We must guarantee that the CPU writes to 'ctrl->cmd.buf' + * (which is in PCI-consistent system-memory) from the loop + * above make it into the coherency domain before the + * following PIO "trigger" updating the cmd.idx. A WMB is + * sufficient. We need not explicitly CPU cache-flush since + * the memory is a PCI-consistent (ie; coherent) mapping. + */ + wmb(); + + atomic_add(cmds_pending, &ctrl->stats.hw_q_depth); + mod_timer(&ctrl->activity_timer, + jiffies + DMA_ACTIVITY_TIMEOUT); + iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); + } +} + +static void rsxx_dma_done(struct work_struct *work) +{ + struct rsxx_dma_ctrl *ctrl; + struct rsxx_dma *dma; + unsigned long flags; + u16 count; + u8 status; + u8 tag; + struct hw_status *hw_st_buf; + + ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work); + hw_st_buf = ctrl->status.buf; + + if (unlikely(ctrl->card->halt) || + unlikely(ctrl->card->dma_fault)) + return; + + count = le16_to_cpu(hw_st_buf[ctrl->status.idx].count); + + while (count == ctrl->e_cnt) { + /* + * The read memory-barrier is necessary to keep aggressive + * processors/optimizers (such as the PPC Apple G5) from + * reordering the following status-buffer tag & status read + * *before* the count read on subsequent iterations of the + * loop! + */ + rmb(); + + status = hw_st_buf[ctrl->status.idx].status; + tag = hw_st_buf[ctrl->status.idx].tag; + + dma = get_tracker_dma(ctrl->trackers, tag); + if (dma == NULL) { + spin_lock_irqsave(&ctrl->card->irq_lock, flags); + rsxx_disable_ier(ctrl->card, CR_INTR_DMA_ALL); + spin_unlock_irqrestore(&ctrl->card->irq_lock, flags); + + dev_err(CARD_TO_DEV(ctrl->card), + "No tracker for tag %d " + "(idx %d id %d)\n", + tag, ctrl->status.idx, ctrl->id); + return; + } + + dev_dbg(CARD_TO_DEV(ctrl->card), + "Completing DMA%d" + "(laddr x%x tag %d st: x%x cnt: x%04x) from idx %d.\n", + ctrl->id, dma->laddr, tag, status, count, + ctrl->status.idx); + + atomic_dec(&ctrl->stats.hw_q_depth); + + mod_timer(&ctrl->activity_timer, + jiffies + DMA_ACTIVITY_TIMEOUT); + + if (status) + rsxx_handle_dma_error(ctrl, dma, status); + else + rsxx_complete_dma(ctrl->card, dma, 0); + + push_tracker(ctrl->trackers, tag); + + ctrl->status.idx = (ctrl->status.idx + 1) & + RSXX_CS_IDX_MASK; + ctrl->e_cnt++; + + count = le16_to_cpu(hw_st_buf[ctrl->status.idx].count); + } + + dma_intr_coal_auto_tune(ctrl->card); + + if (atomic_read(&ctrl->stats.hw_q_depth) == 0) + del_timer_sync(&ctrl->activity_timer); + + spin_lock_irqsave(&ctrl->card->irq_lock, flags); + rsxx_enable_ier(ctrl->card, CR_INTR_DMA(ctrl->id)); + spin_unlock_irqrestore(&ctrl->card->irq_lock, flags); + + spin_lock(&ctrl->queue_lock); + if (ctrl->stats.sw_q_depth) + queue_work(ctrl->issue_wq, &ctrl->issue_dma_work); + spin_unlock(&ctrl->queue_lock); +} + +static int rsxx_cleanup_dma_queue(struct rsxx_cardinfo *card, + struct list_head *q) +{ + struct rsxx_dma *dma; + struct rsxx_dma *tmp; + int cnt = 0; + + list_for_each_entry_safe(dma, tmp, q, list) { + list_del(&dma->list); + + if (dma->dma_addr) + pci_unmap_page(card->dev, dma->dma_addr, + get_dma_size(dma), + (dma->cmd == HW_CMD_BLK_WRITE) ? + PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + kmem_cache_free(rsxx_dma_pool, dma); + cnt++; + } + + return cnt; +} + +static int rsxx_queue_discard(struct rsxx_cardinfo *card, + struct list_head *q, + unsigned int laddr, + rsxx_dma_cb cb, + void *cb_data) +{ + struct rsxx_dma *dma; + + dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL); + if (!dma) + return -ENOMEM; + + dma->cmd = HW_CMD_BLK_DISCARD; + dma->laddr = laddr; + dma->dma_addr = 0; + dma->sub_page.off = 0; + dma->sub_page.cnt = 0; + dma->page = NULL; + dma->pg_off = 0; + dma->cb = cb; + dma->cb_data = cb_data; + + dev_dbg(CARD_TO_DEV(card), "Queuing[D] laddr %x\n", dma->laddr); + + list_add_tail(&dma->list, q); + + return 0; +} + +static int rsxx_queue_dma(struct rsxx_cardinfo *card, + struct list_head *q, + int dir, + unsigned int dma_off, + unsigned int dma_len, + unsigned int laddr, + struct page *page, + unsigned int pg_off, + rsxx_dma_cb cb, + void *cb_data) +{ + struct rsxx_dma *dma; + + dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL); + if (!dma) + return -ENOMEM; + + dma->dma_addr = pci_map_page(card->dev, page, pg_off, dma_len, + dir ? PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + if (!dma->dma_addr) { + kmem_cache_free(rsxx_dma_pool, dma); + return -ENOMEM; + } + + dma->cmd = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ; + dma->laddr = laddr; + dma->sub_page.off = (dma_off >> 9); + dma->sub_page.cnt = (dma_len >> 9); + dma->page = page; + dma->pg_off = pg_off; + dma->cb = cb; + dma->cb_data = cb_data; + + dev_dbg(CARD_TO_DEV(card), + "Queuing[%c] laddr %x off %d cnt %d page %p pg_off %d\n", + dir ? 'W' : 'R', dma->laddr, dma->sub_page.off, + dma->sub_page.cnt, dma->page, dma->pg_off); + + /* Queue the DMA */ + list_add_tail(&dma->list, q); + + return 0; +} + +int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, + struct bio *bio, + atomic_t *n_dmas, + rsxx_dma_cb cb, + void *cb_data) +{ + struct list_head dma_list[RSXX_MAX_TARGETS]; + struct bio_vec *bvec; + unsigned long long addr8; + unsigned int laddr; + unsigned int bv_len; + unsigned int bv_off; + unsigned int dma_off; + unsigned int dma_len; + int dma_cnt[RSXX_MAX_TARGETS]; + int tgt; + int st; + int i; + + addr8 = bio->bi_sector << 9; /* sectors are 512 bytes */ + atomic_set(n_dmas, 0); + + for (i = 0; i < card->n_targets; i++) { + INIT_LIST_HEAD(&dma_list[i]); + dma_cnt[i] = 0; + } + + if (bio->bi_rw & REQ_DISCARD) { + bv_len = bio->bi_size; + + while (bv_len > 0) { + tgt = rsxx_get_dma_tgt(card, addr8); + laddr = rsxx_addr8_to_laddr(addr8, card); + + st = rsxx_queue_discard(card, &dma_list[tgt], laddr, + cb, cb_data); + if (st) + goto bvec_err; + + dma_cnt[tgt]++; + atomic_inc(n_dmas); + addr8 += RSXX_HW_BLK_SIZE; + bv_len -= RSXX_HW_BLK_SIZE; + } + } else { + bio_for_each_segment(bvec, bio, i) { + bv_len = bvec->bv_len; + bv_off = bvec->bv_offset; + + while (bv_len > 0) { + tgt = rsxx_get_dma_tgt(card, addr8); + laddr = rsxx_addr8_to_laddr(addr8, card); + dma_off = addr8 & RSXX_HW_BLK_MASK; + dma_len = min(bv_len, + RSXX_HW_BLK_SIZE - dma_off); + + st = rsxx_queue_dma(card, &dma_list[tgt], + bio_data_dir(bio), + dma_off, dma_len, + laddr, bvec->bv_page, + bv_off, cb, cb_data); + if (st) + goto bvec_err; + + dma_cnt[tgt]++; + atomic_inc(n_dmas); + addr8 += dma_len; + bv_off += dma_len; + bv_len -= dma_len; + } + } + } + + for (i = 0; i < card->n_targets; i++) { + if (!list_empty(&dma_list[i])) { + spin_lock(&card->ctrl[i].queue_lock); + card->ctrl[i].stats.sw_q_depth += dma_cnt[i]; + list_splice_tail(&dma_list[i], &card->ctrl[i].queue); + spin_unlock(&card->ctrl[i].queue_lock); + + queue_work(card->ctrl[i].issue_wq, + &card->ctrl[i].issue_dma_work); + } + } + + return 0; + +bvec_err: + for (i = 0; i < card->n_targets; i++) + rsxx_cleanup_dma_queue(card, &dma_list[i]); + + return st; +} + + +/*----------------- DMA Engine Initialization & Setup -------------------*/ +static int rsxx_dma_ctrl_init(struct pci_dev *dev, + struct rsxx_dma_ctrl *ctrl) +{ + int i; + + memset(&ctrl->stats, 0, sizeof(ctrl->stats)); + + ctrl->status.buf = pci_alloc_consistent(dev, STATUS_BUFFER_SIZE8, + &ctrl->status.dma_addr); + ctrl->cmd.buf = pci_alloc_consistent(dev, COMMAND_BUFFER_SIZE8, + &ctrl->cmd.dma_addr); + if (ctrl->status.buf == NULL || ctrl->cmd.buf == NULL) + return -ENOMEM; + + ctrl->trackers = vmalloc(DMA_TRACKER_LIST_SIZE8); + if (!ctrl->trackers) + return -ENOMEM; + + ctrl->trackers->head = 0; + for (i = 0; i < RSXX_MAX_OUTSTANDING_CMDS; i++) { + ctrl->trackers->list[i].next_tag = i + 1; + ctrl->trackers->list[i].dma = NULL; + } + ctrl->trackers->list[RSXX_MAX_OUTSTANDING_CMDS-1].next_tag = -1; + spin_lock_init(&ctrl->trackers->lock); + + spin_lock_init(&ctrl->queue_lock); + INIT_LIST_HEAD(&ctrl->queue); + + setup_timer(&ctrl->activity_timer, dma_engine_stalled, + (unsigned long)ctrl); + + ctrl->issue_wq = alloc_ordered_workqueue(DRIVER_NAME"_issue", 0); + if (!ctrl->issue_wq) + return -ENOMEM; + + ctrl->done_wq = alloc_ordered_workqueue(DRIVER_NAME"_done", 0); + if (!ctrl->done_wq) + return -ENOMEM; + + INIT_WORK(&ctrl->issue_dma_work, rsxx_issue_dmas); + INIT_WORK(&ctrl->dma_done_work, rsxx_dma_done); + + memset(ctrl->status.buf, 0xac, STATUS_BUFFER_SIZE8); + iowrite32(lower_32_bits(ctrl->status.dma_addr), + ctrl->regmap + SB_ADD_LO); + iowrite32(upper_32_bits(ctrl->status.dma_addr), + ctrl->regmap + SB_ADD_HI); + + memset(ctrl->cmd.buf, 0x83, COMMAND_BUFFER_SIZE8); + iowrite32(lower_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_LO); + iowrite32(upper_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_HI); + + ctrl->status.idx = ioread32(ctrl->regmap + HW_STATUS_CNT); + if (ctrl->status.idx > RSXX_MAX_OUTSTANDING_CMDS) { + dev_crit(&dev->dev, "Failed reading status cnt x%x\n", + ctrl->status.idx); + return -EINVAL; + } + iowrite32(ctrl->status.idx, ctrl->regmap + HW_STATUS_CNT); + iowrite32(ctrl->status.idx, ctrl->regmap + SW_STATUS_CNT); + + ctrl->cmd.idx = ioread32(ctrl->regmap + HW_CMD_IDX); + if (ctrl->cmd.idx > RSXX_MAX_OUTSTANDING_CMDS) { + dev_crit(&dev->dev, "Failed reading cmd cnt x%x\n", + ctrl->status.idx); + return -EINVAL; + } + iowrite32(ctrl->cmd.idx, ctrl->regmap + HW_CMD_IDX); + iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); + + wmb(); + + return 0; +} + +static int rsxx_dma_stripe_setup(struct rsxx_cardinfo *card, + unsigned int stripe_size8) +{ + if (!is_power_of_2(stripe_size8)) { + dev_err(CARD_TO_DEV(card), + "stripe_size is NOT a power of 2!\n"); + return -EINVAL; + } + + card->_stripe.lower_mask = stripe_size8 - 1; + + card->_stripe.upper_mask = ~(card->_stripe.lower_mask); + card->_stripe.upper_shift = ffs(card->n_targets) - 1; + + card->_stripe.target_mask = card->n_targets - 1; + card->_stripe.target_shift = ffs(stripe_size8) - 1; + + dev_dbg(CARD_TO_DEV(card), "_stripe.lower_mask = x%016llx\n", + card->_stripe.lower_mask); + dev_dbg(CARD_TO_DEV(card), "_stripe.upper_shift = x%016llx\n", + card->_stripe.upper_shift); + dev_dbg(CARD_TO_DEV(card), "_stripe.upper_mask = x%016llx\n", + card->_stripe.upper_mask); + dev_dbg(CARD_TO_DEV(card), "_stripe.target_mask = x%016llx\n", + card->_stripe.target_mask); + dev_dbg(CARD_TO_DEV(card), "_stripe.target_shift = x%016llx\n", + card->_stripe.target_shift); + + return 0; +} + +static int rsxx_dma_configure(struct rsxx_cardinfo *card) +{ + u32 intr_coal; + + intr_coal = dma_intr_coal_val(card->config.data.intr_coal.mode, + card->config.data.intr_coal.count, + card->config.data.intr_coal.latency); + iowrite32(intr_coal, card->regmap + INTR_COAL); + + return rsxx_dma_stripe_setup(card, card->config.data.stripe_size); +} + +int rsxx_dma_setup(struct rsxx_cardinfo *card) +{ + unsigned long flags; + int st; + int i; + + dev_info(CARD_TO_DEV(card), + "Initializing %d DMA targets\n", + card->n_targets); + + /* Regmap is divided up into 4K chunks. One for each DMA channel */ + for (i = 0; i < card->n_targets; i++) + card->ctrl[i].regmap = card->regmap + (i * 4096); + + card->dma_fault = 0; + + /* Reset the DMA queues */ + rsxx_dma_queue_reset(card); + + /************* Setup DMA Control *************/ + for (i = 0; i < card->n_targets; i++) { + st = rsxx_dma_ctrl_init(card->dev, &card->ctrl[i]); + if (st) + goto failed_dma_setup; + + card->ctrl[i].card = card; + card->ctrl[i].id = i; + } + + card->scrub_hard = 1; + + if (card->config_valid) + rsxx_dma_configure(card); + + /* Enable the interrupts after all setup has completed. */ + for (i = 0; i < card->n_targets; i++) { + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_enable_ier_and_isr(card, CR_INTR_DMA(i)); + spin_unlock_irqrestore(&card->irq_lock, flags); + } + + return 0; + +failed_dma_setup: + for (i = 0; i < card->n_targets; i++) { + struct rsxx_dma_ctrl *ctrl = &card->ctrl[i]; + + if (ctrl->issue_wq) { + destroy_workqueue(ctrl->issue_wq); + ctrl->issue_wq = NULL; + } + + if (ctrl->done_wq) { + destroy_workqueue(ctrl->done_wq); + ctrl->done_wq = NULL; + } + + if (ctrl->trackers) + vfree(ctrl->trackers); + + if (ctrl->status.buf) + pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8, + ctrl->status.buf, + ctrl->status.dma_addr); + if (ctrl->cmd.buf) + pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8, + ctrl->cmd.buf, ctrl->cmd.dma_addr); + } + + return st; +} + + +void rsxx_dma_destroy(struct rsxx_cardinfo *card) +{ + struct rsxx_dma_ctrl *ctrl; + struct rsxx_dma *dma; + int i, j; + int cnt = 0; + + for (i = 0; i < card->n_targets; i++) { + ctrl = &card->ctrl[i]; + + if (ctrl->issue_wq) { + destroy_workqueue(ctrl->issue_wq); + ctrl->issue_wq = NULL; + } + + if (ctrl->done_wq) { + destroy_workqueue(ctrl->done_wq); + ctrl->done_wq = NULL; + } + + if (timer_pending(&ctrl->activity_timer)) + del_timer_sync(&ctrl->activity_timer); + + /* Clean up the DMA queue */ + spin_lock(&ctrl->queue_lock); + cnt = rsxx_cleanup_dma_queue(card, &ctrl->queue); + spin_unlock(&ctrl->queue_lock); + + if (cnt) + dev_info(CARD_TO_DEV(card), + "Freed %d queued DMAs on channel %d\n", + cnt, i); + + /* Clean up issued DMAs */ + for (j = 0; j < RSXX_MAX_OUTSTANDING_CMDS; j++) { + dma = get_tracker_dma(ctrl->trackers, j); + if (dma) { + pci_unmap_page(card->dev, dma->dma_addr, + get_dma_size(dma), + (dma->cmd == HW_CMD_BLK_WRITE) ? + PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + kmem_cache_free(rsxx_dma_pool, dma); + cnt++; + } + } + + if (cnt) + dev_info(CARD_TO_DEV(card), + "Freed %d pending DMAs on channel %d\n", + cnt, i); + + vfree(ctrl->trackers); + + pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8, + ctrl->status.buf, ctrl->status.dma_addr); + pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8, + ctrl->cmd.buf, ctrl->cmd.dma_addr); + } +} + + +int rsxx_dma_init(void) +{ + rsxx_dma_pool = KMEM_CACHE(rsxx_dma, SLAB_HWCACHE_ALIGN); + if (!rsxx_dma_pool) + return -ENOMEM; + + return 0; +} + + +void rsxx_dma_cleanup(void) +{ + kmem_cache_destroy(rsxx_dma_pool); +} + diff --git a/drivers/block/rsxx/rsxx.h b/drivers/block/rsxx/rsxx.h new file mode 100644 index 000000000000..2e50b65902b7 --- /dev/null +++ b/drivers/block/rsxx/rsxx.h @@ -0,0 +1,45 @@ +/* +* Filename: rsxx.h +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef __RSXX_H__ +#define __RSXX_H__ + +/*----------------- IOCTL Definitions -------------------*/ + +struct rsxx_reg_access { + __u32 addr; + __u32 cnt; + __u32 stat; + __u32 stream; + __u32 data[8]; +}; + +#define RSXX_MAX_REG_CNT (8 * (sizeof(__u32))) + +#define RSXX_IOC_MAGIC 'r' + +#define RSXX_GETREG _IOWR(RSXX_IOC_MAGIC, 0x20, struct rsxx_reg_access) +#define RSXX_SETREG _IOWR(RSXX_IOC_MAGIC, 0x21, struct rsxx_reg_access) + +#endif /* __RSXX_H_ */ diff --git a/drivers/block/rsxx/rsxx_cfg.h b/drivers/block/rsxx/rsxx_cfg.h new file mode 100644 index 000000000000..c025fe5fdb70 --- /dev/null +++ b/drivers/block/rsxx/rsxx_cfg.h @@ -0,0 +1,72 @@ +/* +* Filename: rsXX_cfg.h +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef __RSXX_CFG_H__ +#define __RSXX_CFG_H__ + +/* NOTE: Config values will be saved in network byte order (i.e. Big endian) */ +#include <linux/types.h> + +/* + * The card config version must match the driver's expected version. If it does + * not, the DMA interfaces will not be attached and the user will need to + * initialize/upgrade the card configuration using the card config utility. + */ +#define RSXX_CFG_VERSION 4 + +struct card_cfg_hdr { + __u32 version; + __u32 crc; +}; + +struct card_cfg_data { + __u32 block_size; + __u32 stripe_size; + __u32 vendor_id; + __u32 cache_order; + struct { + __u32 mode; /* Disabled, manual, auto-tune... */ + __u32 count; /* Number of intr to coalesce */ + __u32 latency;/* Max wait time (in ns) */ + } intr_coal; +}; + +struct rsxx_card_cfg { + struct card_cfg_hdr hdr; + struct card_cfg_data data; +}; + +/* Vendor ID Values */ +#define RSXX_VENDOR_ID_TMS_IBM 0 +#define RSXX_VENDOR_ID_DSI 1 +#define RSXX_VENDOR_COUNT 2 + +/* Interrupt Coalescing Values */ +#define RSXX_INTR_COAL_DISABLED 0 +#define RSXX_INTR_COAL_EXPLICIT 1 +#define RSXX_INTR_COAL_AUTO_TUNE 2 + + +#endif /* __RSXX_CFG_H__ */ + diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h new file mode 100644 index 000000000000..a1ac907d8f4c --- /dev/null +++ b/drivers/block/rsxx/rsxx_priv.h @@ -0,0 +1,399 @@ +/* +* Filename: rsxx_priv.h +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef __RSXX_PRIV_H__ +#define __RSXX_PRIV_H__ + +#include <linux/version.h> +#include <linux/semaphore.h> + +#include <linux/fs.h> +#include <linux/interrupt.h> +#include <linux/mutex.h> +#include <linux/pci.h> +#include <linux/spinlock.h> +#include <linux/sysfs.h> +#include <linux/workqueue.h> +#include <linux/bio.h> +#include <linux/vmalloc.h> +#include <linux/timer.h> +#include <linux/ioctl.h> + +#include "rsxx.h" +#include "rsxx_cfg.h" + +struct proc_cmd; + +#define PCI_VENDOR_ID_TMS_IBM 0x15B6 +#define PCI_DEVICE_ID_RS70_FLASH 0x0019 +#define PCI_DEVICE_ID_RS70D_FLASH 0x001A +#define PCI_DEVICE_ID_RS80_FLASH 0x001C +#define PCI_DEVICE_ID_RS81_FLASH 0x001E + +#define RS70_PCI_REV_SUPPORTED 4 + +#define DRIVER_NAME "rsxx" +#define DRIVER_VERSION "3.7" + +/* Block size is 4096 */ +#define RSXX_HW_BLK_SHIFT 12 +#define RSXX_HW_BLK_SIZE (1 << RSXX_HW_BLK_SHIFT) +#define RSXX_HW_BLK_MASK (RSXX_HW_BLK_SIZE - 1) + +#define MAX_CREG_DATA8 32 +#define LOG_BUF_SIZE8 128 + +#define RSXX_MAX_OUTSTANDING_CMDS 255 +#define RSXX_CS_IDX_MASK 0xff + +#define RSXX_MAX_TARGETS 8 + +struct dma_tracker_list; + +/* DMA Command/Status Buffer structure */ +struct rsxx_cs_buffer { + dma_addr_t dma_addr; + void *buf; + u32 idx; +}; + +struct rsxx_dma_stats { + u32 crc_errors; + u32 hard_errors; + u32 soft_errors; + u32 writes_issued; + u32 writes_failed; + u32 reads_issued; + u32 reads_failed; + u32 reads_retried; + u32 discards_issued; + u32 discards_failed; + u32 done_rescheduled; + u32 issue_rescheduled; + u32 sw_q_depth; /* Number of DMAs on the SW queue. */ + atomic_t hw_q_depth; /* Number of DMAs queued to HW. */ +}; + +struct rsxx_dma_ctrl { + struct rsxx_cardinfo *card; + int id; + void __iomem *regmap; + struct rsxx_cs_buffer status; + struct rsxx_cs_buffer cmd; + u16 e_cnt; + spinlock_t queue_lock; + struct list_head queue; + struct workqueue_struct *issue_wq; + struct work_struct issue_dma_work; + struct workqueue_struct *done_wq; + struct work_struct dma_done_work; + struct timer_list activity_timer; + struct dma_tracker_list *trackers; + struct rsxx_dma_stats stats; +}; + +struct rsxx_cardinfo { + struct pci_dev *dev; + unsigned int halt; + + void __iomem *regmap; + spinlock_t irq_lock; + unsigned int isr_mask; + unsigned int ier_mask; + + struct rsxx_card_cfg config; + int config_valid; + + /* Embedded CPU Communication */ + struct { + spinlock_t lock; + bool active; + struct creg_cmd *active_cmd; + struct work_struct done_work; + struct list_head queue; + unsigned int q_depth; + /* Cache the creg status to prevent ioreads */ + struct { + u32 stat; + u32 failed_cancel_timer; + u32 creg_timeout; + } creg_stats; + struct timer_list cmd_timer; + struct mutex reset_lock; + int reset; + } creg_ctrl; + + struct { + char tmp[MAX_CREG_DATA8]; + char buf[LOG_BUF_SIZE8]; /* terminated */ + int buf_len; + } log; + + struct work_struct event_work; + unsigned int state; + u64 size8; + + /* Lock the device attach/detach function */ + struct mutex dev_lock; + + /* Block Device Variables */ + bool bdev_attached; + int disk_id; + int major; + struct request_queue *queue; + struct gendisk *gendisk; + struct { + /* Used to convert a byte address to a device address. */ + u64 lower_mask; + u64 upper_shift; + u64 upper_mask; + u64 target_mask; + u64 target_shift; + } _stripe; + unsigned int dma_fault; + + int scrub_hard; + + int n_targets; + struct rsxx_dma_ctrl *ctrl; +}; + +enum rsxx_pci_regmap { + HWID = 0x00, /* Hardware Identification Register */ + SCRATCH = 0x04, /* Scratch/Debug Register */ + RESET = 0x08, /* Reset Register */ + ISR = 0x10, /* Interrupt Status Register */ + IER = 0x14, /* Interrupt Enable Register */ + IPR = 0x18, /* Interrupt Poll Register */ + CB_ADD_LO = 0x20, /* Command Host Buffer Address [31:0] */ + CB_ADD_HI = 0x24, /* Command Host Buffer Address [63:32]*/ + HW_CMD_IDX = 0x28, /* Hardware Processed Command Index */ + SW_CMD_IDX = 0x2C, /* Software Processed Command Index */ + SB_ADD_LO = 0x30, /* Status Host Buffer Address [31:0] */ + SB_ADD_HI = 0x34, /* Status Host Buffer Address [63:32] */ + HW_STATUS_CNT = 0x38, /* Hardware Status Counter */ + SW_STATUS_CNT = 0x3C, /* Deprecated */ + CREG_CMD = 0x40, /* CPU Command Register */ + CREG_ADD = 0x44, /* CPU Address Register */ + CREG_CNT = 0x48, /* CPU Count Register */ + CREG_STAT = 0x4C, /* CPU Status Register */ + CREG_DATA0 = 0x50, /* CPU Data Registers */ + CREG_DATA1 = 0x54, + CREG_DATA2 = 0x58, + CREG_DATA3 = 0x5C, + CREG_DATA4 = 0x60, + CREG_DATA5 = 0x64, + CREG_DATA6 = 0x68, + CREG_DATA7 = 0x6c, + INTR_COAL = 0x70, /* Interrupt Coalescing Register */ + HW_ERROR = 0x74, /* Card Error Register */ + PCI_DEBUG0 = 0x78, /* PCI Debug Registers */ + PCI_DEBUG1 = 0x7C, + PCI_DEBUG2 = 0x80, + PCI_DEBUG3 = 0x84, + PCI_DEBUG4 = 0x88, + PCI_DEBUG5 = 0x8C, + PCI_DEBUG6 = 0x90, + PCI_DEBUG7 = 0x94, + PCI_POWER_THROTTLE = 0x98, + PERF_CTRL = 0x9c, + PERF_TIMER_LO = 0xa0, + PERF_TIMER_HI = 0xa4, + PERF_RD512_LO = 0xa8, + PERF_RD512_HI = 0xac, + PERF_WR512_LO = 0xb0, + PERF_WR512_HI = 0xb4, +}; + +enum rsxx_intr { + CR_INTR_DMA0 = 0x00000001, + CR_INTR_CREG = 0x00000002, + CR_INTR_DMA1 = 0x00000004, + CR_INTR_EVENT = 0x00000008, + CR_INTR_DMA2 = 0x00000010, + CR_INTR_DMA3 = 0x00000020, + CR_INTR_DMA4 = 0x00000040, + CR_INTR_DMA5 = 0x00000080, + CR_INTR_DMA6 = 0x00000100, + CR_INTR_DMA7 = 0x00000200, + CR_INTR_DMA_ALL = 0x000003f5, + CR_INTR_ALL = 0xffffffff, +}; + +static inline int CR_INTR_DMA(int N) +{ + static const unsigned int _CR_INTR_DMA[] = { + CR_INTR_DMA0, CR_INTR_DMA1, CR_INTR_DMA2, CR_INTR_DMA3, + CR_INTR_DMA4, CR_INTR_DMA5, CR_INTR_DMA6, CR_INTR_DMA7 + }; + return _CR_INTR_DMA[N]; +} +enum rsxx_pci_reset { + DMA_QUEUE_RESET = 0x00000001, +}; + +enum rsxx_pci_revision { + RSXX_DISCARD_SUPPORT = 2, +}; + +enum rsxx_creg_cmd { + CREG_CMD_TAG_MASK = 0x0000FF00, + CREG_OP_WRITE = 0x000000C0, + CREG_OP_READ = 0x000000E0, +}; + +enum rsxx_creg_addr { + CREG_ADD_CARD_CMD = 0x80001000, + CREG_ADD_CARD_STATE = 0x80001004, + CREG_ADD_CARD_SIZE = 0x8000100c, + CREG_ADD_CAPABILITIES = 0x80001050, + CREG_ADD_LOG = 0x80002000, + CREG_ADD_NUM_TARGETS = 0x80003000, + CREG_ADD_CONFIG = 0xB0000000, +}; + +enum rsxx_creg_card_cmd { + CARD_CMD_STARTUP = 1, + CARD_CMD_SHUTDOWN = 2, + CARD_CMD_LOW_LEVEL_FORMAT = 3, + CARD_CMD_FPGA_RECONFIG_BR = 4, + CARD_CMD_FPGA_RECONFIG_MAIN = 5, + CARD_CMD_BACKUP = 6, + CARD_CMD_RESET = 7, + CARD_CMD_deprecated = 8, + CARD_CMD_UNINITIALIZE = 9, + CARD_CMD_DSTROY_EMERGENCY = 10, + CARD_CMD_DSTROY_NORMAL = 11, + CARD_CMD_DSTROY_EXTENDED = 12, + CARD_CMD_DSTROY_ABORT = 13, +}; + +enum rsxx_card_state { + CARD_STATE_SHUTDOWN = 0x00000001, + CARD_STATE_STARTING = 0x00000002, + CARD_STATE_FORMATTING = 0x00000004, + CARD_STATE_UNINITIALIZED = 0x00000008, + CARD_STATE_GOOD = 0x00000010, + CARD_STATE_SHUTTING_DOWN = 0x00000020, + CARD_STATE_FAULT = 0x00000040, + CARD_STATE_RD_ONLY_FAULT = 0x00000080, + CARD_STATE_DSTROYING = 0x00000100, +}; + +enum rsxx_led { + LED_DEFAULT = 0x0, + LED_IDENTIFY = 0x1, + LED_SOAK = 0x2, +}; + +enum rsxx_creg_flash_lock { + CREG_FLASH_LOCK = 1, + CREG_FLASH_UNLOCK = 2, +}; + +enum rsxx_card_capabilities { + CARD_CAP_SUBPAGE_WRITES = 0x00000080, +}; + +enum rsxx_creg_stat { + CREG_STAT_STATUS_MASK = 0x00000003, + CREG_STAT_SUCCESS = 0x1, + CREG_STAT_ERROR = 0x2, + CREG_STAT_CHAR_PENDING = 0x00000004, /* Character I/O pending bit */ + CREG_STAT_LOG_PENDING = 0x00000008, /* HW log message pending bit */ + CREG_STAT_TAG_MASK = 0x0000ff00, +}; + +static inline unsigned int CREG_DATA(int N) +{ + return CREG_DATA0 + (N << 2); +} + +/*----------------- Convenient Log Wrappers -------------------*/ +#define CARD_TO_DEV(__CARD) (&(__CARD)->dev->dev) + +/***** config.c *****/ +int rsxx_load_config(struct rsxx_cardinfo *card); + +/***** core.c *****/ +void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr); +void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr); +void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card, + unsigned int intr); +void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card, + unsigned int intr); + +/***** dev.c *****/ +int rsxx_attach_dev(struct rsxx_cardinfo *card); +void rsxx_detach_dev(struct rsxx_cardinfo *card); +int rsxx_setup_dev(struct rsxx_cardinfo *card); +void rsxx_destroy_dev(struct rsxx_cardinfo *card); +int rsxx_dev_init(void); +void rsxx_dev_cleanup(void); + +/***** dma.c ****/ +typedef void (*rsxx_dma_cb)(struct rsxx_cardinfo *card, + void *cb_data, + unsigned int status); +int rsxx_dma_setup(struct rsxx_cardinfo *card); +void rsxx_dma_destroy(struct rsxx_cardinfo *card); +int rsxx_dma_init(void); +void rsxx_dma_cleanup(void); +int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, + struct bio *bio, + atomic_t *n_dmas, + rsxx_dma_cb cb, + void *cb_data); + +/***** cregs.c *****/ +int rsxx_creg_write(struct rsxx_cardinfo *card, u32 addr, + unsigned int size8, + void *data, + int byte_stream); +int rsxx_creg_read(struct rsxx_cardinfo *card, + u32 addr, + unsigned int size8, + void *data, + int byte_stream); +int rsxx_read_hw_log(struct rsxx_cardinfo *card); +int rsxx_get_card_state(struct rsxx_cardinfo *card, + unsigned int *state); +int rsxx_get_card_size8(struct rsxx_cardinfo *card, u64 *size8); +int rsxx_get_num_targets(struct rsxx_cardinfo *card, + unsigned int *n_targets); +int rsxx_get_card_capabilities(struct rsxx_cardinfo *card, + u32 *capabilities); +int rsxx_issue_card_cmd(struct rsxx_cardinfo *card, u32 cmd); +int rsxx_creg_setup(struct rsxx_cardinfo *card); +void rsxx_creg_destroy(struct rsxx_cardinfo *card); +int rsxx_creg_init(void); +void rsxx_creg_cleanup(void); + +int rsxx_reg_access(struct rsxx_cardinfo *card, + struct rsxx_reg_access __user *ucmd, + int read); + + + +#endif /* __DRIVERS_BLOCK_RSXX_H__ */ diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 57763c54363a..758f2ac878cf 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -1090,10 +1090,13 @@ static const struct block_device_operations floppy_fops = { static void swim3_mb_event(struct macio_dev* mdev, int mb_state) { struct floppy_state *fs = macio_get_drvdata(mdev); - struct swim3 __iomem *sw = fs->swim3; + struct swim3 __iomem *sw; if (!fs) return; + + sw = fs->swim3; + if (mb_state != MB_FD) return; diff --git a/drivers/block/xd.c b/drivers/block/xd.c deleted file mode 100644 index ff540520bada..000000000000 --- a/drivers/block/xd.c +++ /dev/null @@ -1,1123 +0,0 @@ -/* - * This file contains the driver for an XT hard disk controller - * (at least the DTC 5150X) for Linux. - * - * Author: Pat Mackinlay, pat@it.com.au - * Date: 29/09/92 - * - * Revised: 01/01/93, ... - * - * Ref: DTC 5150X Controller Specification (thanks to Kevin Fowler, - * kevinf@agora.rain.com) - * Also thanks to: Salvador Abreu, Dave Thaler, Risto Kankkunen and - * Wim Van Dorst. - * - * Revised: 04/04/94 by Risto Kankkunen - * Moved the detection code from xd_init() to xd_geninit() as it needed - * interrupts enabled and Linus didn't want to enable them in that first - * phase. xd_geninit() is the place to do these kinds of things anyway, - * he says. - * - * Modularized: 04/10/96 by Todd Fries, tfries@umr.edu - * - * Revised: 13/12/97 by Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl - * Fixed some problems with disk initialization and module initiation. - * Added support for manual geometry setting (except Seagate controllers) - * in form: - * xd_geo=<cyl_xda>,<head_xda>,<sec_xda>[,<cyl_xdb>,<head_xdb>,<sec_xdb>] - * Recovered DMA access. Abridged messages. Added support for DTC5051CX, - * WD1002-27X & XEBEC controllers. Driver uses now some jumper settings. - * Extended ioctl() support. - * - * Bugfix: 15/02/01, Paul G. - inform queue layer of tiny xd_maxsect. - * - */ - -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/mm.h> -#include <linux/fs.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/genhd.h> -#include <linux/hdreg.h> -#include <linux/ioport.h> -#include <linux/init.h> -#include <linux/wait.h> -#include <linux/blkdev.h> -#include <linux/mutex.h> -#include <linux/blkpg.h> -#include <linux/delay.h> -#include <linux/io.h> -#include <linux/gfp.h> - -#include <asm/uaccess.h> -#include <asm/dma.h> - -#include "xd.h" - -static DEFINE_MUTEX(xd_mutex); -static void __init do_xd_setup (int *integers); -#ifdef MODULE -static int xd[5] = { -1,-1,-1,-1, }; -#endif - -#define XD_DONT_USE_DMA 0 /* Initial value. may be overriden using - "nodma" module option */ -#define XD_INIT_DISK_DELAY (30) /* 30 ms delay during disk initialization */ - -/* Above may need to be increased if a problem with the 2nd drive detection - (ST11M controller) or resetting a controller (WD) appears */ - -static XD_INFO xd_info[XD_MAXDRIVES]; - -/* If you try this driver and find that your card is not detected by the driver at bootup, you need to add your BIOS - signature and details to the following list of signatures. A BIOS signature is a string embedded into the first - few bytes of your controller's on-board ROM BIOS. To find out what yours is, use something like MS-DOS's DEBUG - command. Run DEBUG, and then you can examine your BIOS signature with: - - d xxxx:0000 - - where xxxx is the segment of your controller (like C800 or D000 or something). On the ASCII dump at the right, you should - be able to see a string mentioning the manufacturer's copyright etc. Add this string into the table below. The parameters - in the table are, in order: - - offset ; this is the offset (in bytes) from the start of your ROM where the signature starts - signature ; this is the actual text of the signature - xd_?_init_controller ; this is the controller init routine used by your controller - xd_?_init_drive ; this is the drive init routine used by your controller - - The controllers directly supported at the moment are: DTC 5150x, WD 1004A27X, ST11M/R and override. If your controller is - made by the same manufacturer as one of these, try using the same init routines as they do. If that doesn't work, your - best bet is to use the "override" routines. These routines use a "portable" method of getting the disk's geometry, and - may work with your card. If none of these seem to work, try sending me some email and I'll see what I can do <grin>. - - NOTE: You can now specify your XT controller's parameters from the command line in the form xd=TYPE,IRQ,IO,DMA. The driver - should be able to detect your drive's geometry from this info. (eg: xd=0,5,0x320,3 is the "standard"). */ - -#include <asm/page.h> -#define xd_dma_mem_alloc(size) __get_dma_pages(GFP_KERNEL,get_order(size)) -#define xd_dma_mem_free(addr, size) free_pages(addr, get_order(size)) -static char *xd_dma_buffer; - -static XD_SIGNATURE xd_sigs[] __initdata = { - { 0x0000,"Override geometry handler",NULL,xd_override_init_drive,"n unknown" }, /* Pat Mackinlay, pat@it.com.au */ - { 0x0008,"[BXD06 (C) DTC 17-MAY-1985]",xd_dtc_init_controller,xd_dtc5150cx_init_drive," DTC 5150CX" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */ - { 0x000B,"CRD18A Not an IBM rom. (C) Copyright Data Technology Corp. 05/31/88",xd_dtc_init_controller,xd_dtc_init_drive," DTC 5150X" }, /* Todd Fries, tfries@umr.edu */ - { 0x000B,"CXD23A Not an IBM ROM (C)Copyright Data Technology Corp 12/03/88",xd_dtc_init_controller,xd_dtc_init_drive," DTC 5150X" }, /* Pat Mackinlay, pat@it.com.au */ - { 0x0008,"07/15/86(C) Copyright 1986 Western Digital Corp.",xd_wd_init_controller,xd_wd_init_drive," Western Dig. 1002-27X" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */ - { 0x0008,"06/24/88(C) Copyright 1988 Western Digital Corp.",xd_wd_init_controller,xd_wd_init_drive," Western Dig. WDXT-GEN2" }, /* Dan Newcombe, newcombe@aa.csc.peachnet.edu */ - { 0x0015,"SEAGATE ST11 BIOS REVISION",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11M/R" }, /* Salvador Abreu, spa@fct.unl.pt */ - { 0x0010,"ST11R BIOS",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11M/R" }, /* Risto Kankkunen, risto.kankkunen@cs.helsinki.fi */ - { 0x0010,"ST11 BIOS v1.7",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11R" }, /* Alan Hourihane, alanh@fairlite.demon.co.uk */ - { 0x1000,"(c)Copyright 1987 SMS",xd_omti_init_controller,xd_omti_init_drive,"n OMTI 5520" }, /* Dirk Melchers, dirk@merlin.nbg.sub.org */ - { 0x0006,"COPYRIGHT XEBEC (C) 1984",xd_xebec_init_controller,xd_xebec_init_drive," XEBEC" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */ - { 0x0008,"(C) Copyright 1984 Western Digital Corp", xd_wd_init_controller, xd_wd_init_drive," Western Dig. 1002s-wx2" }, - { 0x0008,"(C) Copyright 1986 Western Digital Corporation", xd_wd_init_controller, xd_wd_init_drive," 1986 Western Digital" }, /* jfree@sovereign.org */ -}; - -static unsigned int xd_bases[] __initdata = -{ - 0xC8000, 0xCA000, 0xCC000, - 0xCE000, 0xD0000, 0xD2000, - 0xD4000, 0xD6000, 0xD8000, - 0xDA000, 0xDC000, 0xDE000, - 0xE0000 -}; - -static DEFINE_SPINLOCK(xd_lock); - -static struct gendisk *xd_gendisk[2]; - -static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo); - -static const struct block_device_operations xd_fops = { - .owner = THIS_MODULE, - .ioctl = xd_ioctl, - .getgeo = xd_getgeo, -}; -static DECLARE_WAIT_QUEUE_HEAD(xd_wait_int); -static u_char xd_drives, xd_irq = 5, xd_dma = 3, xd_maxsectors; -static u_char xd_override __initdata = 0, xd_type __initdata = 0; -static u_short xd_iobase = 0x320; -static int xd_geo[XD_MAXDRIVES*3] __initdata = { 0, }; - -static volatile int xdc_busy; -static struct timer_list xd_watchdog_int; - -static volatile u_char xd_error; -static bool nodma = XD_DONT_USE_DMA; - -static struct request_queue *xd_queue; - -/* xd_init: register the block device number and set up pointer tables */ -static int __init xd_init(void) -{ - u_char i,controller; - unsigned int address; - int err; - -#ifdef MODULE - { - u_char count = 0; - for (i = 4; i > 0; i--) - if (((xd[i] = xd[i-1]) >= 0) && !count) - count = i; - if ((xd[0] = count)) - do_xd_setup(xd); - } -#endif - - init_timer (&xd_watchdog_int); xd_watchdog_int.function = xd_watchdog; - - err = -EBUSY; - if (register_blkdev(XT_DISK_MAJOR, "xd")) - goto out1; - - err = -ENOMEM; - xd_queue = blk_init_queue(do_xd_request, &xd_lock); - if (!xd_queue) - goto out1a; - - if (xd_detect(&controller,&address)) { - - printk("Detected a%s controller (type %d) at address %06x\n", - xd_sigs[controller].name,controller,address); - if (!request_region(xd_iobase,4,"xd")) { - printk("xd: Ports at 0x%x are not available\n", - xd_iobase); - goto out2; - } - if (controller) - xd_sigs[controller].init_controller(address); - xd_drives = xd_initdrives(xd_sigs[controller].init_drive); - - printk("Detected %d hard drive%s (using IRQ%d & DMA%d)\n", - xd_drives,xd_drives == 1 ? "" : "s",xd_irq,xd_dma); - } - - /* - * With the drive detected, xd_maxsectors should now be known. - * If xd_maxsectors is 0, nothing was detected and we fall through - * to return -ENODEV - */ - if (!xd_dma_buffer && xd_maxsectors) { - xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200); - if (!xd_dma_buffer) { - printk(KERN_ERR "xd: Out of memory.\n"); - goto out3; - } - } - - err = -ENODEV; - if (!xd_drives) - goto out3; - - for (i = 0; i < xd_drives; i++) { - XD_INFO *p = &xd_info[i]; - struct gendisk *disk = alloc_disk(64); - if (!disk) - goto Enomem; - p->unit = i; - disk->major = XT_DISK_MAJOR; - disk->first_minor = i<<6; - sprintf(disk->disk_name, "xd%c", i+'a'); - disk->fops = &xd_fops; - disk->private_data = p; - disk->queue = xd_queue; - set_capacity(disk, p->heads * p->cylinders * p->sectors); - printk(" %s: CHS=%d/%d/%d\n", disk->disk_name, - p->cylinders, p->heads, p->sectors); - xd_gendisk[i] = disk; - } - - err = -EBUSY; - if (request_irq(xd_irq,xd_interrupt_handler, 0, "XT hard disk", NULL)) { - printk("xd: unable to get IRQ%d\n",xd_irq); - goto out4; - } - - if (request_dma(xd_dma,"xd")) { - printk("xd: unable to get DMA%d\n",xd_dma); - goto out5; - } - - /* xd_maxsectors depends on controller - so set after detection */ - blk_queue_max_hw_sectors(xd_queue, xd_maxsectors); - - for (i = 0; i < xd_drives; i++) - add_disk(xd_gendisk[i]); - - return 0; - -out5: - free_irq(xd_irq, NULL); -out4: - for (i = 0; i < xd_drives; i++) - put_disk(xd_gendisk[i]); -out3: - if (xd_maxsectors) - release_region(xd_iobase,4); - - if (xd_dma_buffer) - xd_dma_mem_free((unsigned long)xd_dma_buffer, - xd_maxsectors * 0x200); -out2: - blk_cleanup_queue(xd_queue); -out1a: - unregister_blkdev(XT_DISK_MAJOR, "xd"); -out1: - return err; -Enomem: - err = -ENOMEM; - while (i--) - put_disk(xd_gendisk[i]); - goto out3; -} - -/* xd_detect: scan the possible BIOS ROM locations for the signature strings */ -static u_char __init xd_detect (u_char *controller, unsigned int *address) -{ - int i, j; - - if (xd_override) - { - *controller = xd_type; - *address = 0; - return(1); - } - - for (i = 0; i < ARRAY_SIZE(xd_bases); i++) { - void __iomem *p = ioremap(xd_bases[i], 0x2000); - if (!p) - continue; - for (j = 1; j < ARRAY_SIZE(xd_sigs); j++) { - const char *s = xd_sigs[j].string; - if (check_signature(p + xd_sigs[j].offset, s, strlen(s))) { - *controller = j; - xd_type = j; - *address = xd_bases[i]; - iounmap(p); - return 1; - } - } - iounmap(p); - } - return 0; -} - -/* do_xd_request: handle an incoming request */ -static void do_xd_request (struct request_queue * q) -{ - struct request *req; - - if (xdc_busy) - return; - - req = blk_fetch_request(q); - while (req) { - unsigned block = blk_rq_pos(req); - unsigned count = blk_rq_cur_sectors(req); - XD_INFO *disk = req->rq_disk->private_data; - int res = -EIO; - int retry; - - if (req->cmd_type != REQ_TYPE_FS) - goto done; - if (block + count > get_capacity(req->rq_disk)) - goto done; - for (retry = 0; (retry < XD_RETRIES) && !res; retry++) - res = xd_readwrite(rq_data_dir(req), disk, req->buffer, - block, count); - done: - /* wrap up, 0 = success, -errno = fail */ - if (!__blk_end_request_cur(req, res)) - req = blk_fetch_request(q); - } -} - -static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo) -{ - XD_INFO *p = bdev->bd_disk->private_data; - - geo->heads = p->heads; - geo->sectors = p->sectors; - geo->cylinders = p->cylinders; - return 0; -} - -/* xd_ioctl: handle device ioctl's */ -static int xd_locked_ioctl(struct block_device *bdev, fmode_t mode, u_int cmd, u_long arg) -{ - switch (cmd) { - case HDIO_SET_DMA: - if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (xdc_busy) return -EBUSY; - nodma = !arg; - if (nodma && xd_dma_buffer) { - xd_dma_mem_free((unsigned long)xd_dma_buffer, - xd_maxsectors * 0x200); - xd_dma_buffer = NULL; - } else if (!nodma && !xd_dma_buffer) { - xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200); - if (!xd_dma_buffer) { - nodma = XD_DONT_USE_DMA; - return -ENOMEM; - } - } - return 0; - case HDIO_GET_DMA: - return put_user(!nodma, (long __user *) arg); - case HDIO_GET_MULTCOUNT: - return put_user(xd_maxsectors, (long __user *) arg); - default: - return -EINVAL; - } -} - -static int xd_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long param) -{ - int ret; - - mutex_lock(&xd_mutex); - ret = xd_locked_ioctl(bdev, mode, cmd, param); - mutex_unlock(&xd_mutex); - - return ret; -} - -/* xd_readwrite: handle a read/write request */ -static int xd_readwrite (u_char operation,XD_INFO *p,char *buffer,u_int block,u_int count) -{ - int drive = p->unit; - u_char cmdblk[6],sense[4]; - u_short track,cylinder; - u_char head,sector,control,mode = PIO_MODE,temp; - char **real_buffer; - register int i; - -#ifdef DEBUG_READWRITE - printk("xd_readwrite: operation = %s, drive = %d, buffer = 0x%X, block = %d, count = %d\n",operation == READ ? "read" : "write",drive,buffer,block,count); -#endif /* DEBUG_READWRITE */ - - spin_unlock_irq(&xd_lock); - - control = p->control; - if (!xd_dma_buffer) - xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200); - while (count) { - temp = count < xd_maxsectors ? count : xd_maxsectors; - - track = block / p->sectors; - head = track % p->heads; - cylinder = track / p->heads; - sector = block % p->sectors; - -#ifdef DEBUG_READWRITE - printk("xd_readwrite: drive = %d, head = %d, cylinder = %d, sector = %d, count = %d\n",drive,head,cylinder,sector,temp); -#endif /* DEBUG_READWRITE */ - - if (xd_dma_buffer) { - mode = xd_setup_dma(operation == READ ? DMA_MODE_READ : DMA_MODE_WRITE,(u_char *)(xd_dma_buffer),temp * 0x200); - real_buffer = &xd_dma_buffer; - for (i=0; i < (temp * 0x200); i++) - xd_dma_buffer[i] = buffer[i]; - } - else - real_buffer = &buffer; - - xd_build(cmdblk,operation == READ ? CMD_READ : CMD_WRITE,drive,head,cylinder,sector,temp & 0xFF,control); - - switch (xd_command(cmdblk,mode,(u_char *)(*real_buffer),(u_char *)(*real_buffer),sense,XD_TIMEOUT)) { - case 1: - printk("xd%c: %s timeout, recalibrating drive\n",'a'+drive,(operation == READ ? "read" : "write")); - xd_recalibrate(drive); - spin_lock_irq(&xd_lock); - return -EIO; - case 2: - if (sense[0] & 0x30) { - printk("xd%c: %s - ",'a'+drive,(operation == READ ? "reading" : "writing")); - switch ((sense[0] & 0x30) >> 4) { - case 0: printk("drive error, code = 0x%X",sense[0] & 0x0F); - break; - case 1: printk("controller error, code = 0x%X",sense[0] & 0x0F); - break; - case 2: printk("command error, code = 0x%X",sense[0] & 0x0F); - break; - case 3: printk("miscellaneous error, code = 0x%X",sense[0] & 0x0F); - break; - } - } - if (sense[0] & 0x80) - printk(" - CHS = %d/%d/%d\n",((sense[2] & 0xC0) << 2) | sense[3],sense[1] & 0x1F,sense[2] & 0x3F); - /* reported drive number = (sense[1] & 0xE0) >> 5 */ - else - printk(" - no valid disk address\n"); - spin_lock_irq(&xd_lock); - return -EIO; - } - if (xd_dma_buffer) - for (i=0; i < (temp * 0x200); i++) - buffer[i] = xd_dma_buffer[i]; - - count -= temp, buffer += temp * 0x200, block += temp; - } - spin_lock_irq(&xd_lock); - return 0; -} - -/* xd_recalibrate: recalibrate a given drive and reset controller if necessary */ -static void xd_recalibrate (u_char drive) -{ - u_char cmdblk[6]; - - xd_build(cmdblk,CMD_RECALIBRATE,drive,0,0,0,0,0); - if (xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 8)) - printk("xd%c: warning! error recalibrating, controller may be unstable\n", 'a'+drive); -} - -/* xd_interrupt_handler: interrupt service routine */ -static irqreturn_t xd_interrupt_handler(int irq, void *dev_id) -{ - if (inb(XD_STATUS) & STAT_INTERRUPT) { /* check if it was our device */ -#ifdef DEBUG_OTHER - printk("xd_interrupt_handler: interrupt detected\n"); -#endif /* DEBUG_OTHER */ - outb(0,XD_CONTROL); /* acknowledge interrupt */ - wake_up(&xd_wait_int); /* and wake up sleeping processes */ - return IRQ_HANDLED; - } - else - printk("xd: unexpected interrupt\n"); - return IRQ_NONE; -} - -/* xd_setup_dma: set up the DMA controller for a data transfer */ -static u_char xd_setup_dma (u_char mode,u_char *buffer,u_int count) -{ - unsigned long f; - - if (nodma) - return (PIO_MODE); - if (((unsigned long) buffer & 0xFFFF0000) != (((unsigned long) buffer + count) & 0xFFFF0000)) { -#ifdef DEBUG_OTHER - printk("xd_setup_dma: using PIO, transfer overlaps 64k boundary\n"); -#endif /* DEBUG_OTHER */ - return (PIO_MODE); - } - - f=claim_dma_lock(); - disable_dma(xd_dma); - clear_dma_ff(xd_dma); - set_dma_mode(xd_dma,mode); - set_dma_addr(xd_dma, (unsigned long) buffer); - set_dma_count(xd_dma,count); - - release_dma_lock(f); - - return (DMA_MODE); /* use DMA and INT */ -} - -/* xd_build: put stuff into an array in a format suitable for the controller */ -static u_char *xd_build (u_char *cmdblk,u_char command,u_char drive,u_char head,u_short cylinder,u_char sector,u_char count,u_char control) -{ - cmdblk[0] = command; - cmdblk[1] = ((drive & 0x07) << 5) | (head & 0x1F); - cmdblk[2] = ((cylinder & 0x300) >> 2) | (sector & 0x3F); - cmdblk[3] = cylinder & 0xFF; - cmdblk[4] = count; - cmdblk[5] = control; - - return (cmdblk); -} - -static void xd_watchdog (unsigned long unused) -{ - xd_error = 1; - wake_up(&xd_wait_int); -} - -/* xd_waitport: waits until port & mask == flags or a timeout occurs. return 1 for a timeout */ -static inline u_char xd_waitport (u_short port,u_char flags,u_char mask,u_long timeout) -{ - u_long expiry = jiffies + timeout; - int success; - - xdc_busy = 1; - while ((success = ((inb(port) & mask) != flags)) && time_before(jiffies, expiry)) - schedule_timeout_uninterruptible(1); - xdc_busy = 0; - return (success); -} - -static inline u_int xd_wait_for_IRQ (void) -{ - unsigned long flags; - xd_watchdog_int.expires = jiffies + 8 * HZ; - add_timer(&xd_watchdog_int); - - flags=claim_dma_lock(); - enable_dma(xd_dma); - release_dma_lock(flags); - - sleep_on(&xd_wait_int); - del_timer(&xd_watchdog_int); - xdc_busy = 0; - - flags=claim_dma_lock(); - disable_dma(xd_dma); - release_dma_lock(flags); - - if (xd_error) { - printk("xd: missed IRQ - command aborted\n"); - xd_error = 0; - return (1); - } - return (0); -} - -/* xd_command: handle all data transfers necessary for a single command */ -static u_int xd_command (u_char *command,u_char mode,u_char *indata,u_char *outdata,u_char *sense,u_long timeout) -{ - u_char cmdblk[6],csb,complete = 0; - -#ifdef DEBUG_COMMAND - printk("xd_command: command = 0x%X, mode = 0x%X, indata = 0x%X, outdata = 0x%X, sense = 0x%X\n",command,mode,indata,outdata,sense); -#endif /* DEBUG_COMMAND */ - - outb(0,XD_SELECT); - outb(mode,XD_CONTROL); - - if (xd_waitport(XD_STATUS,STAT_SELECT,STAT_SELECT,timeout)) - return (1); - - while (!complete) { - if (xd_waitport(XD_STATUS,STAT_READY,STAT_READY,timeout)) - return (1); - - switch (inb(XD_STATUS) & (STAT_COMMAND | STAT_INPUT)) { - case 0: - if (mode == DMA_MODE) { - if (xd_wait_for_IRQ()) - return (1); - } else - outb(outdata ? *outdata++ : 0,XD_DATA); - break; - case STAT_INPUT: - if (mode == DMA_MODE) { - if (xd_wait_for_IRQ()) - return (1); - } else - if (indata) - *indata++ = inb(XD_DATA); - else - inb(XD_DATA); - break; - case STAT_COMMAND: - outb(command ? *command++ : 0,XD_DATA); - break; - case STAT_COMMAND | STAT_INPUT: - complete = 1; - break; - } - } - csb = inb(XD_DATA); - - if (xd_waitport(XD_STATUS,0,STAT_SELECT,timeout)) /* wait until deselected */ - return (1); - - if (csb & CSB_ERROR) { /* read sense data if error */ - xd_build(cmdblk,CMD_SENSE,(csb & CSB_LUN) >> 5,0,0,0,0,0); - if (xd_command(cmdblk,0,sense,NULL,NULL,XD_TIMEOUT)) - printk("xd: warning! sense command failed!\n"); - } - -#ifdef DEBUG_COMMAND - printk("xd_command: completed with csb = 0x%X\n",csb); -#endif /* DEBUG_COMMAND */ - - return (csb & CSB_ERROR); -} - -static u_char __init xd_initdrives (void (*init_drive)(u_char drive)) -{ - u_char cmdblk[6],i,count = 0; - - for (i = 0; i < XD_MAXDRIVES; i++) { - xd_build(cmdblk,CMD_TESTREADY,i,0,0,0,0,0); - if (!xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT*8)) { - msleep_interruptible(XD_INIT_DISK_DELAY); - - init_drive(count); - count++; - - msleep_interruptible(XD_INIT_DISK_DELAY); - } - } - return (count); -} - -static void __init xd_manual_geo_set (u_char drive) -{ - xd_info[drive].heads = (u_char)(xd_geo[3 * drive + 1]); - xd_info[drive].cylinders = (u_short)(xd_geo[3 * drive]); - xd_info[drive].sectors = (u_char)(xd_geo[3 * drive + 2]); -} - -static void __init xd_dtc_init_controller (unsigned int address) -{ - switch (address) { - case 0x00000: - case 0xC8000: break; /*initial: 0x320 */ - case 0xCA000: xd_iobase = 0x324; - case 0xD0000: /*5150CX*/ - case 0xD8000: break; /*5150CX & 5150XL*/ - default: printk("xd_dtc_init_controller: unsupported BIOS address %06x\n",address); - break; - } - xd_maxsectors = 0x01; /* my card seems to have trouble doing multi-block transfers? */ - - outb(0,XD_RESET); /* reset the controller */ -} - - -static void __init xd_dtc5150cx_init_drive (u_char drive) -{ - /* values from controller's BIOS - BIOS chip may be removed */ - static u_short geometry_table[][4] = { - {0x200,8,0x200,0x100}, - {0x267,2,0x267,0x267}, - {0x264,4,0x264,0x80}, - {0x132,4,0x132,0x0}, - {0x132,2,0x80, 0x132}, - {0x177,8,0x177,0x0}, - {0x132,8,0x84, 0x0}, - {}, /* not used */ - {0x132,6,0x80, 0x100}, - {0x200,6,0x100,0x100}, - {0x264,2,0x264,0x80}, - {0x280,4,0x280,0x100}, - {0x2B9,3,0x2B9,0x2B9}, - {0x2B9,5,0x2B9,0x2B9}, - {0x280,6,0x280,0x100}, - {0x132,4,0x132,0x0}}; - u_char n; - - n = inb(XD_JUMPER); - n = (drive ? n : (n >> 2)) & 0x33; - n = (n | (n >> 2)) & 0x0F; - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); - else - if (n != 7) { - xd_info[drive].heads = (u_char)(geometry_table[n][1]); /* heads */ - xd_info[drive].cylinders = geometry_table[n][0]; /* cylinders */ - xd_info[drive].sectors = 17; /* sectors */ -#if 0 - xd_info[drive].rwrite = geometry_table[n][2]; /* reduced write */ - xd_info[drive].precomp = geometry_table[n][3] /* write precomp */ - xd_info[drive].ecc = 0x0B; /* ecc length */ -#endif /* 0 */ - } - else { - printk("xd%c: undetermined drive geometry\n",'a'+drive); - return; - } - xd_info[drive].control = 5; /* control byte */ - xd_setparam(CMD_DTCSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,geometry_table[n][2],geometry_table[n][3],0x0B); - xd_recalibrate(drive); -} - -static void __init xd_dtc_init_drive (u_char drive) -{ - u_char cmdblk[6],buf[64]; - - xd_build(cmdblk,CMD_DTCGETGEOM,drive,0,0,0,0,0); - if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) { - xd_info[drive].heads = buf[0x0A]; /* heads */ - xd_info[drive].cylinders = ((u_short *) (buf))[0x04]; /* cylinders */ - xd_info[drive].sectors = 17; /* sectors */ - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); -#if 0 - xd_info[drive].rwrite = ((u_short *) (buf + 1))[0x05]; /* reduced write */ - xd_info[drive].precomp = ((u_short *) (buf + 1))[0x06]; /* write precomp */ - xd_info[drive].ecc = buf[0x0F]; /* ecc length */ -#endif /* 0 */ - xd_info[drive].control = 0; /* control byte */ - - xd_setparam(CMD_DTCSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,((u_short *) (buf + 1))[0x05],((u_short *) (buf + 1))[0x06],buf[0x0F]); - xd_build(cmdblk,CMD_DTCSETSTEP,drive,0,0,0,0,7); - if (xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 2)) - printk("xd_dtc_init_drive: error setting step rate for xd%c\n", 'a'+drive); - } - else - printk("xd_dtc_init_drive: error reading geometry for xd%c\n", 'a'+drive); -} - -static void __init xd_wd_init_controller (unsigned int address) -{ - switch (address) { - case 0x00000: - case 0xC8000: break; /*initial: 0x320 */ - case 0xCA000: xd_iobase = 0x324; break; - case 0xCC000: xd_iobase = 0x328; break; - case 0xCE000: xd_iobase = 0x32C; break; - case 0xD0000: xd_iobase = 0x328; break; /* ? */ - case 0xD8000: xd_iobase = 0x32C; break; /* ? */ - default: printk("xd_wd_init_controller: unsupported BIOS address %06x\n",address); - break; - } - xd_maxsectors = 0x01; /* this one doesn't wrap properly either... */ - - outb(0,XD_RESET); /* reset the controller */ - - msleep(XD_INIT_DISK_DELAY); -} - -static void __init xd_wd_init_drive (u_char drive) -{ - /* values from controller's BIOS - BIOS may be disabled */ - static u_short geometry_table[][4] = { - {0x264,4,0x1C2,0x1C2}, /* common part */ - {0x132,4,0x099,0x0}, - {0x267,2,0x1C2,0x1C2}, - {0x267,4,0x1C2,0x1C2}, - - {0x334,6,0x335,0x335}, /* 1004 series RLL */ - {0x30E,4,0x30F,0x3DC}, - {0x30E,2,0x30F,0x30F}, - {0x267,4,0x268,0x268}, - - {0x3D5,5,0x3D6,0x3D6}, /* 1002 series RLL */ - {0x3DB,7,0x3DC,0x3DC}, - {0x264,4,0x265,0x265}, - {0x267,4,0x268,0x268}}; - - u_char cmdblk[6],buf[0x200]; - u_char n = 0,rll,jumper_state,use_jumper_geo; - u_char wd_1002 = (xd_sigs[xd_type].string[7] == '6'); - - jumper_state = ~(inb(0x322)); - if (jumper_state & 0x40) - xd_irq = 9; - rll = (jumper_state & 0x30) ? (0x04 << wd_1002) : 0; - xd_build(cmdblk,CMD_READ,drive,0,0,0,1,0); - if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) { - xd_info[drive].heads = buf[0x1AF]; /* heads */ - xd_info[drive].cylinders = ((u_short *) (buf + 1))[0xD6]; /* cylinders */ - xd_info[drive].sectors = 17; /* sectors */ - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); -#if 0 - xd_info[drive].rwrite = ((u_short *) (buf))[0xD8]; /* reduced write */ - xd_info[drive].wprecomp = ((u_short *) (buf))[0xDA]; /* write precomp */ - xd_info[drive].ecc = buf[0x1B4]; /* ecc length */ -#endif /* 0 */ - xd_info[drive].control = buf[0x1B5]; /* control byte */ - use_jumper_geo = !(xd_info[drive].heads) || !(xd_info[drive].cylinders); - if (xd_geo[3*drive]) { - xd_manual_geo_set(drive); - xd_info[drive].control = rll ? 7 : 5; - } - else if (use_jumper_geo) { - n = (((jumper_state & 0x0F) >> (drive << 1)) & 0x03) | rll; - xd_info[drive].cylinders = geometry_table[n][0]; - xd_info[drive].heads = (u_char)(geometry_table[n][1]); - xd_info[drive].control = rll ? 7 : 5; -#if 0 - xd_info[drive].rwrite = geometry_table[n][2]; - xd_info[drive].wprecomp = geometry_table[n][3]; - xd_info[drive].ecc = 0x0B; -#endif /* 0 */ - } - if (!wd_1002) { - if (use_jumper_geo) - xd_setparam(CMD_WDSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders, - geometry_table[n][2],geometry_table[n][3],0x0B); - else - xd_setparam(CMD_WDSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders, - ((u_short *) (buf))[0xD8],((u_short *) (buf))[0xDA],buf[0x1B4]); - } - /* 1002 based RLL controller requests converted addressing, but reports physical - (physical 26 sec., logical 17 sec.) - 1004 based ???? */ - if (rll & wd_1002) { - if ((xd_info[drive].cylinders *= 26, - xd_info[drive].cylinders /= 17) > 1023) - xd_info[drive].cylinders = 1023; /* 1024 ? */ -#if 0 - xd_info[drive].rwrite *= 26; - xd_info[drive].rwrite /= 17; - xd_info[drive].wprecomp *= 26 - xd_info[drive].wprecomp /= 17; -#endif /* 0 */ - } - } - else - printk("xd_wd_init_drive: error reading geometry for xd%c\n",'a'+drive); - -} - -static void __init xd_seagate_init_controller (unsigned int address) -{ - switch (address) { - case 0x00000: - case 0xC8000: break; /*initial: 0x320 */ - case 0xD0000: xd_iobase = 0x324; break; - case 0xD8000: xd_iobase = 0x328; break; - case 0xE0000: xd_iobase = 0x32C; break; - default: printk("xd_seagate_init_controller: unsupported BIOS address %06x\n",address); - break; - } - xd_maxsectors = 0x40; - - outb(0,XD_RESET); /* reset the controller */ -} - -static void __init xd_seagate_init_drive (u_char drive) -{ - u_char cmdblk[6],buf[0x200]; - - xd_build(cmdblk,CMD_ST11GETGEOM,drive,0,0,0,1,0); - if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) { - xd_info[drive].heads = buf[0x04]; /* heads */ - xd_info[drive].cylinders = (buf[0x02] << 8) | buf[0x03]; /* cylinders */ - xd_info[drive].sectors = buf[0x05]; /* sectors */ - xd_info[drive].control = 0; /* control byte */ - } - else - printk("xd_seagate_init_drive: error reading geometry from xd%c\n", 'a'+drive); -} - -/* Omti support courtesy Dirk Melchers */ -static void __init xd_omti_init_controller (unsigned int address) -{ - switch (address) { - case 0x00000: - case 0xC8000: break; /*initial: 0x320 */ - case 0xD0000: xd_iobase = 0x324; break; - case 0xD8000: xd_iobase = 0x328; break; - case 0xE0000: xd_iobase = 0x32C; break; - default: printk("xd_omti_init_controller: unsupported BIOS address %06x\n",address); - break; - } - - xd_maxsectors = 0x40; - - outb(0,XD_RESET); /* reset the controller */ -} - -static void __init xd_omti_init_drive (u_char drive) -{ - /* gets infos from drive */ - xd_override_init_drive(drive); - - /* set other parameters, Hardcoded, not that nice :-) */ - xd_info[drive].control = 2; -} - -/* Xebec support (AK) */ -static void __init xd_xebec_init_controller (unsigned int address) -{ -/* iobase may be set manually in range 0x300 - 0x33C - irq may be set manually to 2(9),3,4,5,6,7 - dma may be set manually to 1,2,3 - (How to detect them ???) -BIOS address may be set manually in range 0x0 - 0xF8000 -If you need non-standard settings use the xd=... command */ - - switch (address) { - case 0x00000: - case 0xC8000: /* initially: xd_iobase==0x320 */ - case 0xD0000: - case 0xD2000: - case 0xD4000: - case 0xD6000: - case 0xD8000: - case 0xDA000: - case 0xDC000: - case 0xDE000: - case 0xE0000: break; - default: printk("xd_xebec_init_controller: unsupported BIOS address %06x\n",address); - break; - } - - xd_maxsectors = 0x01; - outb(0,XD_RESET); /* reset the controller */ - - msleep(XD_INIT_DISK_DELAY); -} - -static void __init xd_xebec_init_drive (u_char drive) -{ - /* values from controller's BIOS - BIOS chip may be removed */ - static u_short geometry_table[][5] = { - {0x132,4,0x080,0x080,0x7}, - {0x132,4,0x080,0x080,0x17}, - {0x264,2,0x100,0x100,0x7}, - {0x264,2,0x100,0x100,0x17}, - {0x132,8,0x080,0x080,0x7}, - {0x132,8,0x080,0x080,0x17}, - {0x264,4,0x100,0x100,0x6}, - {0x264,4,0x100,0x100,0x17}, - {0x2BC,5,0x2BC,0x12C,0x6}, - {0x3A5,4,0x3A5,0x3A5,0x7}, - {0x26C,6,0x26C,0x26C,0x7}, - {0x200,8,0x200,0x100,0x17}, - {0x400,5,0x400,0x400,0x7}, - {0x400,6,0x400,0x400,0x7}, - {0x264,8,0x264,0x200,0x17}, - {0x33E,7,0x33E,0x200,0x7}}; - u_char n; - - n = inb(XD_JUMPER) & 0x0F; /* BIOS's drive number: same geometry - is assumed for BOTH drives */ - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); - else { - xd_info[drive].heads = (u_char)(geometry_table[n][1]); /* heads */ - xd_info[drive].cylinders = geometry_table[n][0]; /* cylinders */ - xd_info[drive].sectors = 17; /* sectors */ -#if 0 - xd_info[drive].rwrite = geometry_table[n][2]; /* reduced write */ - xd_info[drive].precomp = geometry_table[n][3] /* write precomp */ - xd_info[drive].ecc = 0x0B; /* ecc length */ -#endif /* 0 */ - } - xd_info[drive].control = geometry_table[n][4]; /* control byte */ - xd_setparam(CMD_XBSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,geometry_table[n][2],geometry_table[n][3],0x0B); - xd_recalibrate(drive); -} - -/* xd_override_init_drive: this finds disk geometry in a "binary search" style, narrowing in on the "correct" number of heads - etc. by trying values until it gets the highest successful value. Idea courtesy Salvador Abreu (spa@fct.unl.pt). */ -static void __init xd_override_init_drive (u_char drive) -{ - u_short min[] = { 0,0,0 },max[] = { 16,1024,64 },test[] = { 0,0,0 }; - u_char cmdblk[6],i; - - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); - else { - for (i = 0; i < 3; i++) { - while (min[i] != max[i] - 1) { - test[i] = (min[i] + max[i]) / 2; - xd_build(cmdblk,CMD_SEEK,drive,(u_char) test[0],(u_short) test[1],(u_char) test[2],0,0); - if (!xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 2)) - min[i] = test[i]; - else - max[i] = test[i]; - } - test[i] = min[i]; - } - xd_info[drive].heads = (u_char) min[0] + 1; - xd_info[drive].cylinders = (u_short) min[1] + 1; - xd_info[drive].sectors = (u_char) min[2] + 1; - } - xd_info[drive].control = 0; -} - -/* xd_setup: initialise controller from command line parameters */ -static void __init do_xd_setup (int *integers) -{ - switch (integers[0]) { - case 4: if (integers[4] < 0) - nodma = 1; - else if (integers[4] < 8) - xd_dma = integers[4]; - case 3: if ((integers[3] > 0) && (integers[3] <= 0x3FC)) - xd_iobase = integers[3]; - case 2: if ((integers[2] > 0) && (integers[2] < 16)) - xd_irq = integers[2]; - case 1: xd_override = 1; - if ((integers[1] >= 0) && (integers[1] < ARRAY_SIZE(xd_sigs))) - xd_type = integers[1]; - case 0: break; - default:printk("xd: too many parameters for xd\n"); - } - xd_maxsectors = 0x01; -} - -/* xd_setparam: set the drive characteristics */ -static void __init xd_setparam (u_char command,u_char drive,u_char heads,u_short cylinders,u_short rwrite,u_short wprecomp,u_char ecc) -{ - u_char cmdblk[14]; - - xd_build(cmdblk,command,drive,0,0,0,0,0); - cmdblk[6] = (u_char) (cylinders >> 8) & 0x03; - cmdblk[7] = (u_char) (cylinders & 0xFF); - cmdblk[8] = heads & 0x1F; - cmdblk[9] = (u_char) (rwrite >> 8) & 0x03; - cmdblk[10] = (u_char) (rwrite & 0xFF); - cmdblk[11] = (u_char) (wprecomp >> 8) & 0x03; - cmdblk[12] = (u_char) (wprecomp & 0xFF); - cmdblk[13] = ecc; - - /* Some controllers require geometry info as data, not command */ - - if (xd_command(cmdblk,PIO_MODE,NULL,&cmdblk[6],NULL,XD_TIMEOUT * 2)) - printk("xd: error setting characteristics for xd%c\n", 'a'+drive); -} - - -#ifdef MODULE - -module_param_array(xd, int, NULL, 0); -module_param_array(xd_geo, int, NULL, 0); -module_param(nodma, bool, 0); - -MODULE_LICENSE("GPL"); - -void cleanup_module(void) -{ - int i; - unregister_blkdev(XT_DISK_MAJOR, "xd"); - for (i = 0; i < xd_drives; i++) { - del_gendisk(xd_gendisk[i]); - put_disk(xd_gendisk[i]); - } - blk_cleanup_queue(xd_queue); - release_region(xd_iobase,4); - if (xd_drives) { - free_irq(xd_irq, NULL); - free_dma(xd_dma); - if (xd_dma_buffer) - xd_dma_mem_free((unsigned long)xd_dma_buffer, xd_maxsectors * 0x200); - } -} -#else - -static int __init xd_setup (char *str) -{ - int ints[5]; - get_options (str, ARRAY_SIZE (ints), ints); - do_xd_setup (ints); - return 1; -} - -/* xd_manual_geo_init: initialise drive geometry from command line parameters - (used only for WD drives) */ -static int __init xd_manual_geo_init (char *str) -{ - int i, integers[1 + 3*XD_MAXDRIVES]; - - get_options (str, ARRAY_SIZE (integers), integers); - if (integers[0]%3 != 0) { - printk("xd: incorrect number of parameters for xd_geo\n"); - return 1; - } - for (i = 0; (i < integers[0]) && (i < 3*XD_MAXDRIVES); i++) - xd_geo[i] = integers[i+1]; - return 1; -} - -__setup ("xd=", xd_setup); -__setup ("xd_geo=", xd_manual_geo_init); - -#endif /* MODULE */ - -module_init(xd_init); -MODULE_ALIAS_BLOCKDEV_MAJOR(XT_DISK_MAJOR); diff --git a/drivers/block/xd.h b/drivers/block/xd.h deleted file mode 100644 index 37cacef16e93..000000000000 --- a/drivers/block/xd.h +++ /dev/null @@ -1,134 +0,0 @@ -#ifndef _LINUX_XD_H -#define _LINUX_XD_H - -/* - * This file contains the definitions for the IO ports and errors etc. for XT hard disk controllers (at least the DTC 5150X). - * - * Author: Pat Mackinlay, pat@it.com.au - * Date: 29/09/92 - * - * Revised: 01/01/93, ... - * - * Ref: DTC 5150X Controller Specification (thanks to Kevin Fowler, kevinf@agora.rain.com) - * Also thanks to: Salvador Abreu, Dave Thaler, Risto Kankkunen and Wim Van Dorst. - */ - -#include <linux/interrupt.h> - -/* XT hard disk controller registers */ -#define XD_DATA (xd_iobase + 0x00) /* data RW register */ -#define XD_RESET (xd_iobase + 0x01) /* reset WO register */ -#define XD_STATUS (xd_iobase + 0x01) /* status RO register */ -#define XD_SELECT (xd_iobase + 0x02) /* select WO register */ -#define XD_JUMPER (xd_iobase + 0x02) /* jumper RO register */ -#define XD_CONTROL (xd_iobase + 0x03) /* DMAE/INTE WO register */ -#define XD_RESERVED (xd_iobase + 0x03) /* reserved */ - -/* XT hard disk controller commands (incomplete list) */ -#define CMD_TESTREADY 0x00 /* test drive ready */ -#define CMD_RECALIBRATE 0x01 /* recalibrate drive */ -#define CMD_SENSE 0x03 /* request sense */ -#define CMD_FORMATDRV 0x04 /* format drive */ -#define CMD_VERIFY 0x05 /* read verify */ -#define CMD_FORMATTRK 0x06 /* format track */ -#define CMD_FORMATBAD 0x07 /* format bad track */ -#define CMD_READ 0x08 /* read */ -#define CMD_WRITE 0x0A /* write */ -#define CMD_SEEK 0x0B /* seek */ - -/* Controller specific commands */ -#define CMD_DTCSETPARAM 0x0C /* set drive parameters (DTC 5150X & CX only?) */ -#define CMD_DTCGETECC 0x0D /* get ecc error length (DTC 5150X only?) */ -#define CMD_DTCREADBUF 0x0E /* read sector buffer (DTC 5150X only?) */ -#define CMD_DTCWRITEBUF 0x0F /* write sector buffer (DTC 5150X only?) */ -#define CMD_DTCREMAPTRK 0x11 /* assign alternate track (DTC 5150X only?) */ -#define CMD_DTCGETPARAM 0xFB /* get drive parameters (DTC 5150X only?) */ -#define CMD_DTCSETSTEP 0xFC /* set step rate (DTC 5150X only?) */ -#define CMD_DTCSETGEOM 0xFE /* set geometry data (DTC 5150X only?) */ -#define CMD_DTCGETGEOM 0xFF /* get geometry data (DTC 5150X only?) */ -#define CMD_ST11GETGEOM 0xF8 /* get geometry data (Seagate ST11R/M only?) */ -#define CMD_WDSETPARAM 0x0C /* set drive parameters (WD 1004A27X only?) */ -#define CMD_XBSETPARAM 0x0C /* set drive parameters (XEBEC only?) */ - -/* Bits for command status byte */ -#define CSB_ERROR 0x02 /* error */ -#define CSB_LUN 0x20 /* logical Unit Number */ - -/* XT hard disk controller status bits */ -#define STAT_READY 0x01 /* controller is ready */ -#define STAT_INPUT 0x02 /* data flowing from controller to host */ -#define STAT_COMMAND 0x04 /* controller in command phase */ -#define STAT_SELECT 0x08 /* controller is selected */ -#define STAT_REQUEST 0x10 /* controller requesting data */ -#define STAT_INTERRUPT 0x20 /* controller requesting interrupt */ - -/* XT hard disk controller control bits */ -#define PIO_MODE 0x00 /* control bits to set for PIO */ -#define DMA_MODE 0x03 /* control bits to set for DMA & interrupt */ - -#define XD_MAXDRIVES 2 /* maximum 2 drives */ -#define XD_TIMEOUT HZ /* 1 second timeout */ -#define XD_RETRIES 4 /* maximum 4 retries */ - -#undef DEBUG /* define for debugging output */ - -#ifdef DEBUG - #define DEBUG_STARTUP /* debug driver initialisation */ - #define DEBUG_OVERRIDE /* debug override geometry detection */ - #define DEBUG_READWRITE /* debug each read/write command */ - #define DEBUG_OTHER /* debug misc. interrupt/DMA stuff */ - #define DEBUG_COMMAND /* debug each controller command */ -#endif /* DEBUG */ - -/* this structure defines the XT drives and their types */ -typedef struct { - u_char heads; - u_short cylinders; - u_char sectors; - u_char control; - int unit; -} XD_INFO; - -/* this structure defines a ROM BIOS signature */ -typedef struct { - unsigned int offset; - const char *string; - void (*init_controller)(unsigned int address); - void (*init_drive)(u_char drive); - const char *name; -} XD_SIGNATURE; - -#ifndef MODULE -static int xd_manual_geo_init (char *command); -#endif /* MODULE */ -static u_char xd_detect (u_char *controller, unsigned int *address); -static u_char xd_initdrives (void (*init_drive)(u_char drive)); - -static void do_xd_request (struct request_queue * q); -static int xd_ioctl (struct block_device *bdev,fmode_t mode,unsigned int cmd,unsigned long arg); -static int xd_readwrite (u_char operation,XD_INFO *disk,char *buffer,u_int block,u_int count); -static void xd_recalibrate (u_char drive); - -static irqreturn_t xd_interrupt_handler(int irq, void *dev_id); -static u_char xd_setup_dma (u_char opcode,u_char *buffer,u_int count); -static u_char *xd_build (u_char *cmdblk,u_char command,u_char drive,u_char head,u_short cylinder,u_char sector,u_char count,u_char control); -static void xd_watchdog (unsigned long unused); -static inline u_char xd_waitport (u_short port,u_char flags,u_char mask,u_long timeout); -static u_int xd_command (u_char *command,u_char mode,u_char *indata,u_char *outdata,u_char *sense,u_long timeout); - -/* card specific setup and geometry gathering code */ -static void xd_dtc_init_controller (unsigned int address); -static void xd_dtc5150cx_init_drive (u_char drive); -static void xd_dtc_init_drive (u_char drive); -static void xd_wd_init_controller (unsigned int address); -static void xd_wd_init_drive (u_char drive); -static void xd_seagate_init_controller (unsigned int address); -static void xd_seagate_init_drive (u_char drive); -static void xd_omti_init_controller (unsigned int address); -static void xd_omti_init_drive (u_char drive); -static void xd_xebec_init_controller (unsigned int address); -static void xd_xebec_init_drive (u_char drive); -static void xd_setparam (u_char command,u_char drive,u_char heads,u_short cylinders,u_short rwrite,u_short wprecomp,u_char ecc); -static void xd_override_init_drive (u_char drive); - -#endif /* _LINUX_XD_H */ diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 5ac841ff6cc7..de1f319f7bd7 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -46,6 +46,7 @@ #include <xen/xen.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> +#include <xen/balloon.h> #include "common.h" /* @@ -239,6 +240,7 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num) ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap); BUG_ON(ret); + free_xenballooned_pages(segs_to_unmap, pages); segs_to_unmap = 0; } @@ -527,8 +529,8 @@ static int xen_blkbk_map(struct blkif_request *req, GFP_KERNEL); if (!persistent_gnt) return -ENOMEM; - persistent_gnt->page = alloc_page(GFP_KERNEL); - if (!persistent_gnt->page) { + if (alloc_xenballooned_pages(1, &persistent_gnt->page, + false)) { kfree(persistent_gnt); return -ENOMEM; } @@ -879,7 +881,6 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, goto fail_response; } - preq.dev = req->u.rw.handle; preq.sector_number = req->u.rw.sector_number; preq.nr_sects = 0; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 63980722db41..5e237f630c47 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -367,6 +367,7 @@ static int xen_blkbk_remove(struct xenbus_device *dev) be->blkif = NULL; } + kfree(be->mode); kfree(be); dev_set_drvdata(&dev->dev, NULL); return 0; @@ -502,6 +503,7 @@ static void backend_changed(struct xenbus_watch *watch, = container_of(watch, struct backend_info, backend_watch); struct xenbus_device *dev = be->dev; int cdrom = 0; + unsigned long handle; char *device_type; DPRINTK(""); @@ -521,10 +523,10 @@ static void backend_changed(struct xenbus_watch *watch, return; } - if ((be->major || be->minor) && - ((be->major != major) || (be->minor != minor))) { - pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n", - be->major, be->minor, major, minor); + if (be->major | be->minor) { + if (be->major != major || be->minor != minor) + pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n", + be->major, be->minor, major, minor); return; } @@ -542,36 +544,33 @@ static void backend_changed(struct xenbus_watch *watch, kfree(device_type); } - if (be->major == 0 && be->minor == 0) { - /* Front end dir is a number, which is used as the handle. */ - - char *p = strrchr(dev->otherend, '/') + 1; - long handle; - err = strict_strtoul(p, 0, &handle); - if (err) - return; + /* Front end dir is a number, which is used as the handle. */ + err = strict_strtoul(strrchr(dev->otherend, '/') + 1, 0, &handle); + if (err) + return; - be->major = major; - be->minor = minor; + be->major = major; + be->minor = minor; - err = xen_vbd_create(be->blkif, handle, major, minor, - (NULL == strchr(be->mode, 'w')), cdrom); - if (err) { - be->major = 0; - be->minor = 0; - xenbus_dev_fatal(dev, err, "creating vbd structure"); - return; - } + err = xen_vbd_create(be->blkif, handle, major, minor, + !strchr(be->mode, 'w'), cdrom); + if (err) + xenbus_dev_fatal(dev, err, "creating vbd structure"); + else { err = xenvbd_sysfs_addif(dev); if (err) { xen_vbd_free(&be->blkif->vbd); - be->major = 0; - be->minor = 0; xenbus_dev_fatal(dev, err, "creating sysfs entries"); - return; } + } + if (err) { + kfree(be->mode); + be->mode = NULL; + be->major = 0; + be->minor = 0; + } else { /* We're potentially connected now */ xen_update_blkif_status(be->blkif); } diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 11043c18ac5a..c3dae2e0f290 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -791,7 +791,7 @@ static void blkif_restart_queue(struct work_struct *work) static void blkif_free(struct blkfront_info *info, int suspend) { struct llist_node *all_gnts; - struct grant *persistent_gnt; + struct grant *persistent_gnt, *tmp; struct llist_node *n; /* Prevent new requests being issued until we fix things up. */ @@ -805,10 +805,17 @@ static void blkif_free(struct blkfront_info *info, int suspend) /* Remove all persistent grants */ if (info->persistent_gnts_c) { all_gnts = llist_del_all(&info->persistent_gnts); - llist_for_each_entry_safe(persistent_gnt, n, all_gnts, node) { + persistent_gnt = llist_entry(all_gnts, typeof(*(persistent_gnt)), node); + while (persistent_gnt) { gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); __free_page(pfn_to_page(persistent_gnt->pfn)); - kfree(persistent_gnt); + tmp = persistent_gnt; + n = persistent_gnt->node.next; + if (n) + persistent_gnt = llist_entry(n, typeof(*(persistent_gnt)), node); + else + persistent_gnt = NULL; + kfree(tmp); } info->persistent_gnts_c = 0; } diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 1c7fdcd22a98..0ac9b45a585e 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -1208,6 +1208,16 @@ static int smi_num; /* Used to sequence the SMIs */ #define DEFAULT_REGSPACING 1 #define DEFAULT_REGSIZE 1 +#ifdef CONFIG_ACPI +static bool si_tryacpi = 1; +#endif +#ifdef CONFIG_DMI +static bool si_trydmi = 1; +#endif +static bool si_tryplatform = 1; +#ifdef CONFIG_PCI +static bool si_trypci = 1; +#endif static bool si_trydefaults = 1; static char *si_type[SI_MAX_PARMS]; #define MAX_SI_TYPE_STR 30 @@ -1238,6 +1248,25 @@ MODULE_PARM_DESC(hotmod, "Add and remove interfaces. See" " Documentation/IPMI.txt in the kernel sources for the" " gory details."); +#ifdef CONFIG_ACPI +module_param_named(tryacpi, si_tryacpi, bool, 0); +MODULE_PARM_DESC(tryacpi, "Setting this to zero will disable the" + " default scan of the interfaces identified via ACPI"); +#endif +#ifdef CONFIG_DMI +module_param_named(trydmi, si_trydmi, bool, 0); +MODULE_PARM_DESC(trydmi, "Setting this to zero will disable the" + " default scan of the interfaces identified via DMI"); +#endif +module_param_named(tryplatform, si_tryplatform, bool, 0); +MODULE_PARM_DESC(tryacpi, "Setting this to zero will disable the" + " default scan of the interfaces identified via platform" + " interfaces like openfirmware"); +#ifdef CONFIG_PCI +module_param_named(trypci, si_trypci, bool, 0); +MODULE_PARM_DESC(tryacpi, "Setting this to zero will disable the" + " default scan of the interfaces identified via pci"); +#endif module_param_named(trydefaults, si_trydefaults, bool, 0); MODULE_PARM_DESC(trydefaults, "Setting this to 'false' will disable the" " default scan of the KCS and SMIC interface at the standard" @@ -3371,13 +3400,15 @@ static int init_ipmi_si(void) return 0; initialized = 1; - rv = platform_driver_register(&ipmi_driver); - if (rv) { - printk(KERN_ERR PFX "Unable to register driver: %d\n", rv); - return rv; + if (si_tryplatform) { + rv = platform_driver_register(&ipmi_driver); + if (rv) { + printk(KERN_ERR PFX "Unable to register " + "driver: %d\n", rv); + return rv; + } } - /* Parse out the si_type string into its components. */ str = si_type_str; if (*str != '\0') { @@ -3400,24 +3431,31 @@ static int init_ipmi_si(void) return 0; #ifdef CONFIG_PCI - rv = pci_register_driver(&ipmi_pci_driver); - if (rv) - printk(KERN_ERR PFX "Unable to register PCI driver: %d\n", rv); - else - pci_registered = 1; + if (si_trypci) { + rv = pci_register_driver(&ipmi_pci_driver); + if (rv) + printk(KERN_ERR PFX "Unable to register " + "PCI driver: %d\n", rv); + else + pci_registered = 1; + } #endif #ifdef CONFIG_ACPI - pnp_register_driver(&ipmi_pnp_driver); - pnp_registered = 1; + if (si_tryacpi) { + pnp_register_driver(&ipmi_pnp_driver); + pnp_registered = 1; + } #endif #ifdef CONFIG_DMI - dmi_find_bmc(); + if (si_trydmi) + dmi_find_bmc(); #endif #ifdef CONFIG_ACPI - spmi_find_bmc(); + if (si_tryacpi) + spmi_find_bmc(); #endif /* We prefer devices with interrupts, but in the case of a machine diff --git a/drivers/char/misc.c b/drivers/char/misc.c index 522136d40843..190d4423653f 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -183,19 +183,12 @@ static const struct file_operations misc_fops = { int misc_register(struct miscdevice * misc) { - struct miscdevice *c; dev_t dev; int err = 0; INIT_LIST_HEAD(&misc->list); mutex_lock(&misc_mtx); - list_for_each_entry(c, &misc_list, list) { - if (c->minor == misc->minor) { - mutex_unlock(&misc_mtx); - return -EBUSY; - } - } if (misc->minor == MISC_DYNAMIC_MINOR) { int i = find_first_zero_bit(misc_minors, DYNAMIC_MINORS); @@ -205,6 +198,15 @@ int misc_register(struct miscdevice * misc) } misc->minor = DYNAMIC_MINORS - i - 1; set_bit(i, misc_minors); + } else { + struct miscdevice *c; + + list_for_each_entry(c, &misc_list, list) { + if (c->minor == misc->minor) { + mutex_unlock(&misc_mtx); + return -EBUSY; + } + } } dev = MKDEV(MISC_MAJOR, misc->minor); diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index fabbfe1a9253..ed87b2405806 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -52,31 +52,29 @@ static void clk_summary_show_subtree(struct seq_file *s, struct clk *c, int level) { struct clk *child; - struct hlist_node *tmp; if (!c) return; clk_summary_show_one(s, c, level); - hlist_for_each_entry(child, tmp, &c->children, child_node) + hlist_for_each_entry(child, &c->children, child_node) clk_summary_show_subtree(s, child, level + 1); } static int clk_summary_show(struct seq_file *s, void *data) { struct clk *c; - struct hlist_node *tmp; seq_printf(s, " clock enable_cnt prepare_cnt rate\n"); seq_printf(s, "---------------------------------------------------------------------\n"); mutex_lock(&prepare_lock); - hlist_for_each_entry(c, tmp, &clk_root_list, child_node) + hlist_for_each_entry(c, &clk_root_list, child_node) clk_summary_show_subtree(s, c, 0); - hlist_for_each_entry(c, tmp, &clk_orphan_list, child_node) + hlist_for_each_entry(c, &clk_orphan_list, child_node) clk_summary_show_subtree(s, c, 0); mutex_unlock(&prepare_lock); @@ -111,14 +109,13 @@ static void clk_dump_one(struct seq_file *s, struct clk *c, int level) static void clk_dump_subtree(struct seq_file *s, struct clk *c, int level) { struct clk *child; - struct hlist_node *tmp; if (!c) return; clk_dump_one(s, c, level); - hlist_for_each_entry(child, tmp, &c->children, child_node) { + hlist_for_each_entry(child, &c->children, child_node) { seq_printf(s, ","); clk_dump_subtree(s, child, level + 1); } @@ -129,21 +126,20 @@ static void clk_dump_subtree(struct seq_file *s, struct clk *c, int level) static int clk_dump(struct seq_file *s, void *data) { struct clk *c; - struct hlist_node *tmp; bool first_node = true; seq_printf(s, "{"); mutex_lock(&prepare_lock); - hlist_for_each_entry(c, tmp, &clk_root_list, child_node) { + hlist_for_each_entry(c, &clk_root_list, child_node) { if (!first_node) seq_printf(s, ","); first_node = false; clk_dump_subtree(s, c, 0); } - hlist_for_each_entry(c, tmp, &clk_orphan_list, child_node) { + hlist_for_each_entry(c, &clk_orphan_list, child_node) { seq_printf(s, ","); clk_dump_subtree(s, c, 0); } @@ -222,7 +218,6 @@ out: static int clk_debug_create_subtree(struct clk *clk, struct dentry *pdentry) { struct clk *child; - struct hlist_node *tmp; int ret = -EINVAL;; if (!clk || !pdentry) @@ -233,7 +228,7 @@ static int clk_debug_create_subtree(struct clk *clk, struct dentry *pdentry) if (ret) goto out; - hlist_for_each_entry(child, tmp, &clk->children, child_node) + hlist_for_each_entry(child, &clk->children, child_node) clk_debug_create_subtree(child, clk->dentry); ret = 0; @@ -299,7 +294,6 @@ out: static int __init clk_debug_init(void) { struct clk *clk; - struct hlist_node *tmp; struct dentry *d; rootdir = debugfs_create_dir("clk", NULL); @@ -324,10 +318,10 @@ static int __init clk_debug_init(void) mutex_lock(&prepare_lock); - hlist_for_each_entry(clk, tmp, &clk_root_list, child_node) + hlist_for_each_entry(clk, &clk_root_list, child_node) clk_debug_create_subtree(clk, rootdir); - hlist_for_each_entry(clk, tmp, &clk_orphan_list, child_node) + hlist_for_each_entry(clk, &clk_orphan_list, child_node) clk_debug_create_subtree(clk, orphandir); inited = 1; @@ -345,13 +339,12 @@ static inline int clk_debug_register(struct clk *clk) { return 0; } static void clk_disable_unused_subtree(struct clk *clk) { struct clk *child; - struct hlist_node *tmp; unsigned long flags; if (!clk) goto out; - hlist_for_each_entry(child, tmp, &clk->children, child_node) + hlist_for_each_entry(child, &clk->children, child_node) clk_disable_unused_subtree(child); spin_lock_irqsave(&enable_lock, flags); @@ -384,14 +377,13 @@ out: static int clk_disable_unused(void) { struct clk *clk; - struct hlist_node *tmp; mutex_lock(&prepare_lock); - hlist_for_each_entry(clk, tmp, &clk_root_list, child_node) + hlist_for_each_entry(clk, &clk_root_list, child_node) clk_disable_unused_subtree(clk); - hlist_for_each_entry(clk, tmp, &clk_orphan_list, child_node) + hlist_for_each_entry(clk, &clk_orphan_list, child_node) clk_disable_unused_subtree(clk); mutex_unlock(&prepare_lock); @@ -484,12 +476,11 @@ static struct clk *__clk_lookup_subtree(const char *name, struct clk *clk) { struct clk *child; struct clk *ret; - struct hlist_node *tmp; if (!strcmp(clk->name, name)) return clk; - hlist_for_each_entry(child, tmp, &clk->children, child_node) { + hlist_for_each_entry(child, &clk->children, child_node) { ret = __clk_lookup_subtree(name, child); if (ret) return ret; @@ -502,20 +493,19 @@ struct clk *__clk_lookup(const char *name) { struct clk *root_clk; struct clk *ret; - struct hlist_node *tmp; if (!name) return NULL; /* search the 'proper' clk tree first */ - hlist_for_each_entry(root_clk, tmp, &clk_root_list, child_node) { + hlist_for_each_entry(root_clk, &clk_root_list, child_node) { ret = __clk_lookup_subtree(name, root_clk); if (ret) return ret; } /* if not found, then search the orphan tree */ - hlist_for_each_entry(root_clk, tmp, &clk_orphan_list, child_node) { + hlist_for_each_entry(root_clk, &clk_orphan_list, child_node) { ret = __clk_lookup_subtree(name, root_clk); if (ret) return ret; @@ -812,7 +802,6 @@ static void __clk_recalc_rates(struct clk *clk, unsigned long msg) { unsigned long old_rate; unsigned long parent_rate = 0; - struct hlist_node *tmp; struct clk *child; old_rate = clk->rate; @@ -832,7 +821,7 @@ static void __clk_recalc_rates(struct clk *clk, unsigned long msg) if (clk->notifier_count && msg) __clk_notify(clk, msg, old_rate, clk->rate); - hlist_for_each_entry(child, tmp, &clk->children, child_node) + hlist_for_each_entry(child, &clk->children, child_node) __clk_recalc_rates(child, msg); } @@ -878,7 +867,6 @@ EXPORT_SYMBOL_GPL(clk_get_rate); */ static int __clk_speculate_rates(struct clk *clk, unsigned long parent_rate) { - struct hlist_node *tmp; struct clk *child; unsigned long new_rate; int ret = NOTIFY_DONE; @@ -895,7 +883,7 @@ static int __clk_speculate_rates(struct clk *clk, unsigned long parent_rate) if (ret == NOTIFY_BAD) goto out; - hlist_for_each_entry(child, tmp, &clk->children, child_node) { + hlist_for_each_entry(child, &clk->children, child_node) { ret = __clk_speculate_rates(child, new_rate); if (ret == NOTIFY_BAD) break; @@ -908,11 +896,10 @@ out: static void clk_calc_subtree(struct clk *clk, unsigned long new_rate) { struct clk *child; - struct hlist_node *tmp; clk->new_rate = new_rate; - hlist_for_each_entry(child, tmp, &clk->children, child_node) { + hlist_for_each_entry(child, &clk->children, child_node) { if (child->ops->recalc_rate) child->new_rate = child->ops->recalc_rate(child->hw, new_rate); else @@ -983,7 +970,6 @@ out: */ static struct clk *clk_propagate_rate_change(struct clk *clk, unsigned long event) { - struct hlist_node *tmp; struct clk *child, *fail_clk = NULL; int ret = NOTIFY_DONE; @@ -996,7 +982,7 @@ static struct clk *clk_propagate_rate_change(struct clk *clk, unsigned long even fail_clk = clk; } - hlist_for_each_entry(child, tmp, &clk->children, child_node) { + hlist_for_each_entry(child, &clk->children, child_node) { clk = clk_propagate_rate_change(child, event); if (clk) fail_clk = clk; @@ -1014,7 +1000,6 @@ static void clk_change_rate(struct clk *clk) struct clk *child; unsigned long old_rate; unsigned long best_parent_rate = 0; - struct hlist_node *tmp; old_rate = clk->rate; @@ -1032,7 +1017,7 @@ static void clk_change_rate(struct clk *clk) if (clk->notifier_count && old_rate != clk->rate) __clk_notify(clk, POST_RATE_CHANGE, old_rate, clk->rate); - hlist_for_each_entry(child, tmp, &clk->children, child_node) + hlist_for_each_entry(child, &clk->children, child_node) clk_change_rate(child); } @@ -1348,7 +1333,7 @@ int __clk_init(struct device *dev, struct clk *clk) { int i, ret = 0; struct clk *orphan; - struct hlist_node *tmp, *tmp2; + struct hlist_node *tmp2; if (!clk) return -EINVAL; @@ -1448,7 +1433,7 @@ int __clk_init(struct device *dev, struct clk *clk) * walk the list of orphan clocks and reparent any that are children of * this clock */ - hlist_for_each_entry_safe(orphan, tmp, tmp2, &clk_orphan_list, child_node) { + hlist_for_each_entry_safe(orphan, tmp2, &clk_orphan_list, child_node) { if (orphan->ops->get_parent) { i = orphan->ops->get_parent(orphan->hw); if (!strcmp(clk->name, orphan->parent_names[i])) diff --git a/drivers/dca/dca-sysfs.c b/drivers/dca/dca-sysfs.c index 591b6597c00a..126cf295b198 100644 --- a/drivers/dca/dca-sysfs.c +++ b/drivers/dca/dca-sysfs.c @@ -53,22 +53,19 @@ void dca_sysfs_remove_req(struct dca_provider *dca, int slot) int dca_sysfs_add_provider(struct dca_provider *dca, struct device *dev) { struct device *cd; - int err = 0; + int ret; -idr_try_again: - if (!idr_pre_get(&dca_idr, GFP_KERNEL)) - return -ENOMEM; + idr_preload(GFP_KERNEL); spin_lock(&dca_idr_lock); - err = idr_get_new(&dca_idr, dca, &dca->id); + + ret = idr_alloc(&dca_idr, dca, 0, 0, GFP_NOWAIT); + if (ret >= 0) + dca->id = ret; + spin_unlock(&dca_idr_lock); - switch (err) { - case 0: - break; - case -EAGAIN: - goto idr_try_again; - default: - return err; - } + idr_preload_end(); + if (ret < 0) + return ret; cd = device_create(dca_class, dev, MKDEV(0, 0), NULL, "dca%d", dca->id); if (IS_ERR(cd)) { diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 242b8c0a3de8..b2728d6ba2fd 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -686,18 +686,14 @@ static int get_dma_id(struct dma_device *device) { int rc; - idr_retry: - if (!idr_pre_get(&dma_idr, GFP_KERNEL)) - return -ENOMEM; mutex_lock(&dma_list_mutex); - rc = idr_get_new(&dma_idr, NULL, &device->dev_id); - mutex_unlock(&dma_list_mutex); - if (rc == -EAGAIN) - goto idr_retry; - else if (rc != 0) - return rc; - return 0; + rc = idr_alloc(&dma_idr, NULL, 0, 0, GFP_KERNEL); + if (rc >= 0) + device->dev_id = rc; + + mutex_unlock(&dma_list_mutex); + return rc < 0 ? rc : 0; } /** diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index f8d22872d753..27ac423ab25e 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -487,27 +487,28 @@ static int ioctl_get_info(struct client *client, union ioctl_arg *arg) static int add_client_resource(struct client *client, struct client_resource *resource, gfp_t gfp_mask) { + bool preload = gfp_mask & __GFP_WAIT; unsigned long flags; int ret; - retry: - if (idr_pre_get(&client->resource_idr, gfp_mask) == 0) - return -ENOMEM; - + if (preload) + idr_preload(gfp_mask); spin_lock_irqsave(&client->lock, flags); + if (client->in_shutdown) ret = -ECANCELED; else - ret = idr_get_new(&client->resource_idr, resource, - &resource->handle); + ret = idr_alloc(&client->resource_idr, resource, 0, 0, + GFP_NOWAIT); if (ret >= 0) { + resource->handle = ret; client_get(client); schedule_if_iso_resource(resource); } - spin_unlock_irqrestore(&client->lock, flags); - if (ret == -EAGAIN) - goto retry; + spin_unlock_irqrestore(&client->lock, flags); + if (preload) + idr_preload_end(); return ret < 0 ? ret : 0; } @@ -1779,7 +1780,6 @@ static int fw_device_op_release(struct inode *inode, struct file *file) wait_event(client->tx_flush_wait, !has_outbound_transactions(client)); idr_for_each(&client->resource_idr, shutdown_resource, client); - idr_remove_all(&client->resource_idr); idr_destroy(&client->resource_idr); list_for_each_entry_safe(event, next_event, &client->event_list, link) diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index 3873d535b28d..03ce7d980c6a 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -1017,12 +1017,11 @@ static void fw_device_init(struct work_struct *work) fw_device_get(device); down_write(&fw_device_rwsem); - ret = idr_pre_get(&fw_device_idr, GFP_KERNEL) ? - idr_get_new(&fw_device_idr, device, &minor) : - -ENOMEM; + minor = idr_alloc(&fw_device_idr, device, 0, 1 << MINORBITS, + GFP_KERNEL); up_write(&fw_device_rwsem); - if (ret < 0) + if (minor < 0) goto error; device->device.bus = &fw_bus_type; diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index fed08b661711..7320bf891706 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -79,6 +79,7 @@ #include <linux/device.h> #include <linux/slab.h> #include <linux/pstore.h> +#include <linux/ctype.h> #include <linux/fs.h> #include <linux/ramfs.h> @@ -908,6 +909,48 @@ static struct inode *efivarfs_get_inode(struct super_block *sb, return inode; } +/* + * Return true if 'str' is a valid efivarfs filename of the form, + * + * VariableName-12345678-1234-1234-1234-1234567891bc + */ +static bool efivarfs_valid_name(const char *str, int len) +{ + static const char dashes[GUID_LEN] = { + [8] = 1, [13] = 1, [18] = 1, [23] = 1 + }; + const char *s = str + len - GUID_LEN; + int i; + + /* + * We need a GUID, plus at least one letter for the variable name, + * plus the '-' separator + */ + if (len < GUID_LEN + 2) + return false; + + /* GUID should be right after the first '-' */ + if (s - 1 != strchr(str, '-')) + return false; + + /* + * Validate that 's' is of the correct format, e.g. + * + * 12345678-1234-1234-1234-123456789abc + */ + for (i = 0; i < GUID_LEN; i++) { + if (dashes[i]) { + if (*s++ != '-') + return false; + } else { + if (!isxdigit(*s++)) + return false; + } + } + + return true; +} + static void efivarfs_hex_to_guid(const char *str, efi_guid_t *guid) { guid->b[0] = hex_to_bin(str[6]) << 4 | hex_to_bin(str[7]); @@ -936,11 +979,7 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry, struct efivar_entry *var; int namelen, i = 0, err = 0; - /* - * We need a GUID, plus at least one letter for the variable name, - * plus the '-' separator - */ - if (dentry->d_name.len < GUID_LEN + 2) + if (!efivarfs_valid_name(dentry->d_name.name, dentry->d_name.len)) return -EINVAL; inode = efivarfs_get_inode(dir->i_sb, dir, mode, 0); @@ -1012,6 +1051,84 @@ static int efivarfs_unlink(struct inode *dir, struct dentry *dentry) return -EINVAL; }; +/* + * Compare two efivarfs file names. + * + * An efivarfs filename is composed of two parts, + * + * 1. A case-sensitive variable name + * 2. A case-insensitive GUID + * + * So we need to perform a case-sensitive match on part 1 and a + * case-insensitive match on part 2. + */ +static int efivarfs_d_compare(const struct dentry *parent, const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, + const struct qstr *name) +{ + int guid = len - GUID_LEN; + + if (name->len != len) + return 1; + + /* Case-sensitive compare for the variable name */ + if (memcmp(str, name->name, guid)) + return 1; + + /* Case-insensitive compare for the GUID */ + return strncasecmp(name->name + guid, str + guid, GUID_LEN); +} + +static int efivarfs_d_hash(const struct dentry *dentry, + const struct inode *inode, struct qstr *qstr) +{ + unsigned long hash = init_name_hash(); + const unsigned char *s = qstr->name; + unsigned int len = qstr->len; + + if (!efivarfs_valid_name(s, len)) + return -EINVAL; + + while (len-- > GUID_LEN) + hash = partial_name_hash(*s++, hash); + + /* GUID is case-insensitive. */ + while (len--) + hash = partial_name_hash(tolower(*s++), hash); + + qstr->hash = end_name_hash(hash); + return 0; +} + +/* + * Retaining negative dentries for an in-memory filesystem just wastes + * memory and lookup time: arrange for them to be deleted immediately. + */ +static int efivarfs_delete_dentry(const struct dentry *dentry) +{ + return 1; +} + +static struct dentry_operations efivarfs_d_ops = { + .d_compare = efivarfs_d_compare, + .d_hash = efivarfs_d_hash, + .d_delete = efivarfs_delete_dentry, +}; + +static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name) +{ + struct qstr q; + + q.name = name; + q.len = strlen(name); + + if (efivarfs_d_hash(NULL, NULL, &q)) + return NULL; + + return d_alloc(parent, &q); +} + static int efivarfs_fill_super(struct super_block *sb, void *data, int silent) { struct inode *inode = NULL; @@ -1027,6 +1144,7 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = EFIVARFS_MAGIC; sb->s_op = &efivarfs_ops; + sb->s_d_op = &efivarfs_d_ops; sb->s_time_gran = 1; inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0); @@ -1067,7 +1185,7 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent) if (!inode) goto fail_name; - dentry = d_alloc_name(root, name); + dentry = efivarfs_alloc_dentry(root, name); if (!dentry) goto fail_inode; @@ -1084,7 +1202,7 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent) mutex_lock(&inode->i_mutex); inode->i_private = entry; - i_size_write(inode, size+4); + i_size_write(inode, size + sizeof(entry->var.Attributes)); mutex_unlock(&inode->i_mutex); d_add(dentry, inode); } @@ -1117,8 +1235,20 @@ static struct file_system_type efivarfs_type = { .kill_sb = efivarfs_kill_sb, }; +/* + * Handle negative dentry. + */ +static struct dentry *efivarfs_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + if (dentry->d_name.len > NAME_MAX) + return ERR_PTR(-ENAMETOOLONG); + d_add(dentry, NULL); + return NULL; +} + static const struct inode_operations efivarfs_dir_inode_operations = { - .lookup = simple_lookup, + .lookup = efivarfs_lookup, .unlink = efivarfs_unlink, .create = efivarfs_create, }; diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 4828fe7c66cb..fff9786cdc64 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -411,15 +411,10 @@ static int gpio_setup_irq(struct gpio_desc *desc, struct device *dev, goto err_out; } - do { - ret = -ENOMEM; - if (idr_pre_get(&dirent_idr, GFP_KERNEL)) - ret = idr_get_new_above(&dirent_idr, value_sd, - 1, &id); - } while (ret == -EAGAIN); - - if (ret) + ret = idr_alloc(&dirent_idr, value_sd, 1, 0, GFP_KERNEL); + if (ret < 0) goto free_sd; + id = ret; desc->flags &= GPIO_FLAGS_MASK; desc->flags |= (unsigned long)id << ID_SHIFT; diff --git a/drivers/gpu/drm/drm_context.c b/drivers/gpu/drm/drm_context.c index 45adf97e678f..725968d38976 100644 --- a/drivers/gpu/drm/drm_context.c +++ b/drivers/gpu/drm/drm_context.c @@ -74,24 +74,13 @@ void drm_ctxbitmap_free(struct drm_device * dev, int ctx_handle) */ static int drm_ctxbitmap_next(struct drm_device * dev) { - int new_id; int ret; -again: - if (idr_pre_get(&dev->ctx_idr, GFP_KERNEL) == 0) { - DRM_ERROR("Out of memory expanding drawable idr\n"); - return -ENOMEM; - } mutex_lock(&dev->struct_mutex); - ret = idr_get_new_above(&dev->ctx_idr, NULL, - DRM_RESERVED_CONTEXTS, &new_id); + ret = idr_alloc(&dev->ctx_idr, NULL, DRM_RESERVED_CONTEXTS, 0, + GFP_KERNEL); mutex_unlock(&dev->struct_mutex); - if (ret == -EAGAIN) - goto again; - else if (ret) - return ret; - - return new_id; + return ret; } /** @@ -118,7 +107,7 @@ int drm_ctxbitmap_init(struct drm_device * dev) void drm_ctxbitmap_cleanup(struct drm_device * dev) { mutex_lock(&dev->struct_mutex); - idr_remove_all(&dev->ctx_idr); + idr_destroy(&dev->ctx_idr); mutex_unlock(&dev->struct_mutex); } diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 3bdf2a650d9c..792c3e3795ca 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -266,32 +266,21 @@ char *drm_get_connector_status_name(enum drm_connector_status status) static int drm_mode_object_get(struct drm_device *dev, struct drm_mode_object *obj, uint32_t obj_type) { - int new_id = 0; int ret; -again: - if (idr_pre_get(&dev->mode_config.crtc_idr, GFP_KERNEL) == 0) { - DRM_ERROR("Ran out memory getting a mode number\n"); - return -ENOMEM; - } - mutex_lock(&dev->mode_config.idr_mutex); - ret = idr_get_new_above(&dev->mode_config.crtc_idr, obj, 1, &new_id); - - if (!ret) { + ret = idr_alloc(&dev->mode_config.crtc_idr, obj, 1, 0, GFP_KERNEL); + if (ret >= 0) { /* * Set up the object linking under the protection of the idr * lock so that other users can't see inconsistent state. */ - obj->id = new_id; + obj->id = ret; obj->type = obj_type; } mutex_unlock(&dev->mode_config.idr_mutex); - if (ret == -EAGAIN) - goto again; - - return ret; + return ret < 0 ? ret : 0; } /** @@ -1272,7 +1261,6 @@ void drm_mode_config_cleanup(struct drm_device *dev) crtc->funcs->destroy(crtc); } - idr_remove_all(&dev->mode_config.crtc_idr); idr_destroy(&dev->mode_config.crtc_idr); } EXPORT_SYMBOL(drm_mode_config_cleanup); diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index be174cab105a..25f91cd23e60 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -297,7 +297,6 @@ static void __exit drm_core_exit(void) unregister_chrdev(DRM_MAJOR, "drm"); - idr_remove_all(&drm_minors_idr); idr_destroy(&drm_minors_idr); } diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 24efae464e2c..af779ae19ebf 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -270,21 +270,19 @@ drm_gem_handle_create(struct drm_file *file_priv, int ret; /* - * Get the user-visible handle using idr. + * Get the user-visible handle using idr. Preload and perform + * allocation under our spinlock. */ -again: - /* ensure there is space available to allocate a handle */ - if (idr_pre_get(&file_priv->object_idr, GFP_KERNEL) == 0) - return -ENOMEM; - - /* do the allocation under our spinlock */ + idr_preload(GFP_KERNEL); spin_lock(&file_priv->table_lock); - ret = idr_get_new_above(&file_priv->object_idr, obj, 1, (int *)handlep); + + ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_NOWAIT); + spin_unlock(&file_priv->table_lock); - if (ret == -EAGAIN) - goto again; - else if (ret) + idr_preload_end(); + if (ret < 0) return ret; + *handlep = ret; drm_gem_object_handle_reference(obj); @@ -451,29 +449,25 @@ drm_gem_flink_ioctl(struct drm_device *dev, void *data, if (obj == NULL) return -ENOENT; -again: - if (idr_pre_get(&dev->object_name_idr, GFP_KERNEL) == 0) { - ret = -ENOMEM; - goto err; - } - + idr_preload(GFP_KERNEL); spin_lock(&dev->object_name_lock); if (!obj->name) { - ret = idr_get_new_above(&dev->object_name_idr, obj, 1, - &obj->name); + ret = idr_alloc(&dev->object_name_idr, obj, 1, 0, GFP_NOWAIT); + obj->name = ret; args->name = (uint64_t) obj->name; spin_unlock(&dev->object_name_lock); + idr_preload_end(); - if (ret == -EAGAIN) - goto again; - else if (ret) + if (ret < 0) goto err; + ret = 0; /* Allocate a reference for the name table. */ drm_gem_object_reference(obj); } else { args->name = (uint64_t) obj->name; spin_unlock(&dev->object_name_lock); + idr_preload_end(); ret = 0; } @@ -561,8 +555,6 @@ drm_gem_release(struct drm_device *dev, struct drm_file *file_private) { idr_for_each(&file_private->object_idr, &drm_gem_object_release_handle, file_private); - - idr_remove_all(&file_private->object_idr); idr_destroy(&file_private->object_idr); } diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c index 80254547a3f8..7e4bae760e27 100644 --- a/drivers/gpu/drm/drm_hashtab.c +++ b/drivers/gpu/drm/drm_hashtab.c @@ -60,14 +60,13 @@ void drm_ht_verbose_list(struct drm_open_hash *ht, unsigned long key) { struct drm_hash_item *entry; struct hlist_head *h_list; - struct hlist_node *list; unsigned int hashed_key; int count = 0; hashed_key = hash_long(key, ht->order); DRM_DEBUG("Key is 0x%08lx, Hashed key is 0x%08x\n", key, hashed_key); h_list = &ht->table[hashed_key]; - hlist_for_each_entry(entry, list, h_list, head) + hlist_for_each_entry(entry, h_list, head) DRM_DEBUG("count %d, key: 0x%08lx\n", count++, entry->key); } @@ -76,14 +75,13 @@ static struct hlist_node *drm_ht_find_key(struct drm_open_hash *ht, { struct drm_hash_item *entry; struct hlist_head *h_list; - struct hlist_node *list; unsigned int hashed_key; hashed_key = hash_long(key, ht->order); h_list = &ht->table[hashed_key]; - hlist_for_each_entry(entry, list, h_list, head) { + hlist_for_each_entry(entry, h_list, head) { if (entry->key == key) - return list; + return &entry->head; if (entry->key > key) break; } @@ -95,14 +93,13 @@ static struct hlist_node *drm_ht_find_key_rcu(struct drm_open_hash *ht, { struct drm_hash_item *entry; struct hlist_head *h_list; - struct hlist_node *list; unsigned int hashed_key; hashed_key = hash_long(key, ht->order); h_list = &ht->table[hashed_key]; - hlist_for_each_entry_rcu(entry, list, h_list, head) { + hlist_for_each_entry_rcu(entry, h_list, head) { if (entry->key == key) - return list; + return &entry->head; if (entry->key > key) break; } @@ -113,19 +110,19 @@ int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item) { struct drm_hash_item *entry; struct hlist_head *h_list; - struct hlist_node *list, *parent; + struct hlist_node *parent; unsigned int hashed_key; unsigned long key = item->key; hashed_key = hash_long(key, ht->order); h_list = &ht->table[hashed_key]; parent = NULL; - hlist_for_each_entry(entry, list, h_list, head) { + hlist_for_each_entry(entry, h_list, head) { if (entry->key == key) return -EINVAL; if (entry->key > key) break; - parent = list; + parent = &entry->head; } if (parent) { hlist_add_after_rcu(parent, &item->head); diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c index 200e104f1fa0..7d30802a018f 100644 --- a/drivers/gpu/drm/drm_stub.c +++ b/drivers/gpu/drm/drm_stub.c @@ -109,7 +109,6 @@ EXPORT_SYMBOL(drm_ut_debug_printk); static int drm_minor_get_id(struct drm_device *dev, int type) { - int new_id; int ret; int base = 0, limit = 63; @@ -121,25 +120,11 @@ static int drm_minor_get_id(struct drm_device *dev, int type) limit = base + 255; } -again: - if (idr_pre_get(&drm_minors_idr, GFP_KERNEL) == 0) { - DRM_ERROR("Out of memory expanding drawable idr\n"); - return -ENOMEM; - } mutex_lock(&dev->struct_mutex); - ret = idr_get_new_above(&drm_minors_idr, NULL, - base, &new_id); + ret = idr_alloc(&drm_minors_idr, NULL, base, limit, GFP_KERNEL); mutex_unlock(&dev->struct_mutex); - if (ret == -EAGAIN) - goto again; - else if (ret) - return ret; - if (new_id >= limit) { - idr_remove(&drm_minors_idr, new_id); - return -EINVAL; - } - return new_id; + return ret == -ENOSPC ? -EINVAL : ret; } struct drm_master *drm_master_create(struct drm_minor *minor) diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c index 1a556354e92f..1adce07ecb5b 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c +++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c @@ -137,21 +137,15 @@ static int ipp_create_id(struct idr *id_idr, struct mutex *lock, void *obj, DRM_DEBUG_KMS("%s\n", __func__); -again: - /* ensure there is space available to allocate a handle */ - if (idr_pre_get(id_idr, GFP_KERNEL) == 0) { - DRM_ERROR("failed to get idr.\n"); - return -ENOMEM; - } - /* do the allocation under our mutexlock */ mutex_lock(lock); - ret = idr_get_new_above(id_idr, obj, 1, (int *)idp); + ret = idr_alloc(id_idr, obj, 1, 0, GFP_KERNEL); mutex_unlock(lock); - if (ret == -EAGAIN) - goto again; + if (ret < 0) + return ret; - return ret; + *idp = ret; + return 0; } static void *ipp_find_obj(struct idr *id_idr, struct mutex *lock, u32 id) @@ -1786,8 +1780,6 @@ err_iommu: drm_iommu_detach_device(drm_dev, ippdrv->dev); err_idr: - idr_remove_all(&ctx->ipp_idr); - idr_remove_all(&ctx->prop_idr); idr_destroy(&ctx->ipp_idr); idr_destroy(&ctx->prop_idr); return ret; @@ -1965,8 +1957,6 @@ static int ipp_remove(struct platform_device *pdev) exynos_drm_subdrv_unregister(&ctx->subdrv); /* remove,destroy ipp idr */ - idr_remove_all(&ctx->ipp_idr); - idr_remove_all(&ctx->prop_idr); idr_destroy(&ctx->ipp_idr); idr_destroy(&ctx->prop_idr); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 21177d9df423..94d873a6cffb 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -139,7 +139,7 @@ create_hw_context(struct drm_device *dev, { struct drm_i915_private *dev_priv = dev->dev_private; struct i915_hw_context *ctx; - int ret, id; + int ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (ctx == NULL) @@ -164,22 +164,11 @@ create_hw_context(struct drm_device *dev, ctx->file_priv = file_priv; -again: - if (idr_pre_get(&file_priv->context_idr, GFP_KERNEL) == 0) { - ret = -ENOMEM; - DRM_DEBUG_DRIVER("idr allocation failed\n"); - goto err_out; - } - - ret = idr_get_new_above(&file_priv->context_idr, ctx, - DEFAULT_CONTEXT_ID + 1, &id); - if (ret == 0) - ctx->id = id; - - if (ret == -EAGAIN) - goto again; - else if (ret) + ret = idr_alloc(&file_priv->context_idr, ctx, DEFAULT_CONTEXT_ID + 1, 0, + GFP_KERNEL); + if (ret < 0) goto err_out; + ctx->id = ret; return ctx; diff --git a/drivers/gpu/drm/sis/sis_drv.c b/drivers/gpu/drm/sis/sis_drv.c index 841065b998a1..5a5325e6b759 100644 --- a/drivers/gpu/drm/sis/sis_drv.c +++ b/drivers/gpu/drm/sis/sis_drv.c @@ -58,7 +58,6 @@ static int sis_driver_unload(struct drm_device *dev) { drm_sis_private_t *dev_priv = dev->dev_private; - idr_remove_all(&dev_priv->object_idr); idr_destroy(&dev_priv->object_idr); kfree(dev_priv); diff --git a/drivers/gpu/drm/sis/sis_mm.c b/drivers/gpu/drm/sis/sis_mm.c index 2b2f78c428af..9a43d98e5003 100644 --- a/drivers/gpu/drm/sis/sis_mm.c +++ b/drivers/gpu/drm/sis/sis_mm.c @@ -128,17 +128,10 @@ static int sis_drm_alloc(struct drm_device *dev, struct drm_file *file, if (retval) goto fail_alloc; -again: - if (idr_pre_get(&dev_priv->object_idr, GFP_KERNEL) == 0) { - retval = -ENOMEM; - goto fail_idr; - } - - retval = idr_get_new_above(&dev_priv->object_idr, item, 1, &user_key); - if (retval == -EAGAIN) - goto again; - if (retval) + retval = idr_alloc(&dev_priv->object_idr, item, 1, 0, GFP_KERNEL); + if (retval < 0) goto fail_idr; + user_key = retval; list_add(&item->owner_list, &file_priv->obj_list); mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/via/via_map.c b/drivers/gpu/drm/via/via_map.c index c0f1cc7f5ca9..d0ab3fb32acd 100644 --- a/drivers/gpu/drm/via/via_map.c +++ b/drivers/gpu/drm/via/via_map.c @@ -120,7 +120,6 @@ int via_driver_unload(struct drm_device *dev) { drm_via_private_t *dev_priv = dev->dev_private; - idr_remove_all(&dev_priv->object_idr); idr_destroy(&dev_priv->object_idr); kfree(dev_priv); diff --git a/drivers/gpu/drm/via/via_mm.c b/drivers/gpu/drm/via/via_mm.c index 0d55432e02a2..0ab93ff09873 100644 --- a/drivers/gpu/drm/via/via_mm.c +++ b/drivers/gpu/drm/via/via_mm.c @@ -148,17 +148,10 @@ int via_mem_alloc(struct drm_device *dev, void *data, if (retval) goto fail_alloc; -again: - if (idr_pre_get(&dev_priv->object_idr, GFP_KERNEL) == 0) { - retval = -ENOMEM; - goto fail_idr; - } - - retval = idr_get_new_above(&dev_priv->object_idr, item, 1, &user_key); - if (retval == -EAGAIN) - goto again; - if (retval) + retval = idr_alloc(&dev_priv->object_idr, item, 1, 0, GFP_KERNEL); + if (retval < 0) goto fail_idr; + user_key = retval; list_add(&item->owner_list, &file_priv->obj_list); mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 16556170fb32..bc784254e78e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -177,17 +177,16 @@ int vmw_resource_alloc_id(struct vmw_resource *res) BUG_ON(res->id != -1); - do { - if (unlikely(idr_pre_get(idr, GFP_KERNEL) == 0)) - return -ENOMEM; - - write_lock(&dev_priv->resource_lock); - ret = idr_get_new_above(idr, res, 1, &res->id); - write_unlock(&dev_priv->resource_lock); + idr_preload(GFP_KERNEL); + write_lock(&dev_priv->resource_lock); - } while (ret == -EAGAIN); + ret = idr_alloc(idr, res, 1, 0, GFP_NOWAIT); + if (ret >= 0) + res->id = ret; - return ret; + write_unlock(&dev_priv->resource_lock); + idr_preload_end(); + return ret < 0 ? ret : 0; } /** diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 66a30f7ac882..991d38daa87d 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -935,25 +935,17 @@ out_list: */ int i2c_add_adapter(struct i2c_adapter *adapter) { - int id, res = 0; - -retry: - if (idr_pre_get(&i2c_adapter_idr, GFP_KERNEL) == 0) - return -ENOMEM; + int id; mutex_lock(&core_lock); - /* "above" here means "above or equal to", sigh */ - res = idr_get_new_above(&i2c_adapter_idr, adapter, - __i2c_first_dynamic_bus_num, &id); + id = idr_alloc(&i2c_adapter_idr, adapter, + __i2c_first_dynamic_bus_num, 0, GFP_KERNEL); mutex_unlock(&core_lock); - - if (res < 0) { - if (res == -EAGAIN) - goto retry; - return res; - } + if (id < 0) + return id; adapter->nr = id; + return i2c_register_adapter(adapter); } EXPORT_SYMBOL(i2c_add_adapter); @@ -984,33 +976,17 @@ EXPORT_SYMBOL(i2c_add_adapter); int i2c_add_numbered_adapter(struct i2c_adapter *adap) { int id; - int status; if (adap->nr == -1) /* -1 means dynamically assign bus id */ return i2c_add_adapter(adap); - if (adap->nr & ~MAX_IDR_MASK) - return -EINVAL; - -retry: - if (idr_pre_get(&i2c_adapter_idr, GFP_KERNEL) == 0) - return -ENOMEM; mutex_lock(&core_lock); - /* "above" here means "above or equal to", sigh; - * we need the "equal to" result to force the result - */ - status = idr_get_new_above(&i2c_adapter_idr, adap, adap->nr, &id); - if (status == 0 && id != adap->nr) { - status = -EBUSY; - idr_remove(&i2c_adapter_idr, id); - } + id = idr_alloc(&i2c_adapter_idr, adap, adap->nr, adap->nr + 1, + GFP_KERNEL); mutex_unlock(&core_lock); - if (status == -EAGAIN) - goto retry; - - if (status == 0) - status = i2c_register_adapter(adap); - return status; + if (id < 0) + return id == -ENOSPC ? -EBUSY : id; + return i2c_register_adapter(adap); } EXPORT_SYMBOL_GPL(i2c_add_numbered_adapter); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 394fea2ba1bc..784b97cb05b0 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -382,20 +382,21 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) static int cm_alloc_id(struct cm_id_private *cm_id_priv) { unsigned long flags; - int ret, id; + int id; static int next_id; - do { - spin_lock_irqsave(&cm.lock, flags); - ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, - next_id, &id); - if (!ret) - next_id = ((unsigned) id + 1) & MAX_IDR_MASK; - spin_unlock_irqrestore(&cm.lock, flags); - } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); + idr_preload(GFP_KERNEL); + spin_lock_irqsave(&cm.lock, flags); + + id = idr_alloc(&cm.local_id_table, cm_id_priv, next_id, 0, GFP_NOWAIT); + if (id >= 0) + next_id = max(id + 1, 0); + + spin_unlock_irqrestore(&cm.lock, flags); + idr_preload_end(); cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand; - return ret; + return id < 0 ? id : 0; } static void cm_free_id(__be32 local_id) @@ -3844,7 +3845,6 @@ static int __init ib_cm_init(void) cm.remote_sidr_table = RB_ROOT; idr_init(&cm.local_id_table); get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand); - idr_pre_get(&cm.local_id_table, GFP_KERNEL); INIT_LIST_HEAD(&cm.timewait_list); ret = class_register(&cm_class); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index d789eea32168..71c2c7116802 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2143,33 +2143,23 @@ static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv, unsigned short snum) { struct rdma_bind_list *bind_list; - int port, ret; + int ret; bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); if (!bind_list) return -ENOMEM; - do { - ret = idr_get_new_above(ps, bind_list, snum, &port); - } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL)); - - if (ret) - goto err1; - - if (port != snum) { - ret = -EADDRNOTAVAIL; - goto err2; - } + ret = idr_alloc(ps, bind_list, snum, snum + 1, GFP_KERNEL); + if (ret < 0) + goto err; bind_list->ps = ps; - bind_list->port = (unsigned short) port; + bind_list->port = (unsigned short)ret; cma_bind_port(bind_list, id_priv); return 0; -err2: - idr_remove(ps, port); -err1: +err: kfree(bind_list); - return ret; + return ret == -ENOSPC ? -EADDRNOTAVAIL : ret; } static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv) @@ -2214,10 +2204,9 @@ static int cma_check_port(struct rdma_bind_list *bind_list, { struct rdma_id_private *cur_id; struct sockaddr *addr, *cur_addr; - struct hlist_node *node; addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; - hlist_for_each_entry(cur_id, node, &bind_list->owners, node) { + hlist_for_each_entry(cur_id, &bind_list->owners, node) { if (id_priv == cur_id) continue; diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 176c8f90f2bb..9f5ad7cc33c8 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -118,14 +118,13 @@ static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool, { struct hlist_head *bucket; struct ib_pool_fmr *fmr; - struct hlist_node *pos; if (!pool->cache_bucket) return NULL; bucket = pool->cache_bucket + ib_fmr_hash(*page_list); - hlist_for_each_entry(fmr, pos, bucket, cache_node) + hlist_for_each_entry(fmr, bucket, cache_node) if (io_virtual_address == fmr->io_virtual_address && page_list_len == fmr->page_list_len && !memcmp(page_list, fmr->page_list, diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index a8905abc56e4..934f45e79e5e 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -611,19 +611,21 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent) static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask) { + bool preload = gfp_mask & __GFP_WAIT; unsigned long flags; int ret, id; -retry: - if (!idr_pre_get(&query_idr, gfp_mask)) - return -ENOMEM; + if (preload) + idr_preload(gfp_mask); spin_lock_irqsave(&idr_lock, flags); - ret = idr_get_new(&query_idr, query, &id); + + id = idr_alloc(&query_idr, query, 0, 0, GFP_NOWAIT); + spin_unlock_irqrestore(&idr_lock, flags); - if (ret == -EAGAIN) - goto retry; - if (ret) - return ret; + if (preload) + idr_preload_end(); + if (id < 0) + return id; query->mad_buf->timeout_ms = timeout_ms; query->mad_buf->context[0] = query; diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 49b15ac1987e..f2f63933e8a9 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -176,7 +176,6 @@ static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx) static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) { struct ib_ucm_context *ctx; - int result; ctx = kzalloc(sizeof *ctx, GFP_KERNEL); if (!ctx) @@ -187,17 +186,10 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) ctx->file = file; INIT_LIST_HEAD(&ctx->events); - do { - result = idr_pre_get(&ctx_id_table, GFP_KERNEL); - if (!result) - goto error; - - mutex_lock(&ctx_id_mutex); - result = idr_get_new(&ctx_id_table, ctx, &ctx->id); - mutex_unlock(&ctx_id_mutex); - } while (result == -EAGAIN); - - if (result) + mutex_lock(&ctx_id_mutex); + ctx->id = idr_alloc(&ctx_id_table, ctx, 0, 0, GFP_KERNEL); + mutex_unlock(&ctx_id_mutex); + if (ctx->id < 0) goto error; list_add_tail(&ctx->file_list, &file->ctxs); diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 2709ff581392..5ca44cd9b00c 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -145,7 +145,6 @@ static void ucma_put_ctx(struct ucma_context *ctx) static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) { struct ucma_context *ctx; - int ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -156,17 +155,10 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) INIT_LIST_HEAD(&ctx->mc_list); ctx->file = file; - do { - ret = idr_pre_get(&ctx_idr, GFP_KERNEL); - if (!ret) - goto error; - - mutex_lock(&mut); - ret = idr_get_new(&ctx_idr, ctx, &ctx->id); - mutex_unlock(&mut); - } while (ret == -EAGAIN); - - if (ret) + mutex_lock(&mut); + ctx->id = idr_alloc(&ctx_idr, ctx, 0, 0, GFP_KERNEL); + mutex_unlock(&mut); + if (ctx->id < 0) goto error; list_add_tail(&ctx->list, &file->ctx_list); @@ -180,23 +172,15 @@ error: static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx) { struct ucma_multicast *mc; - int ret; mc = kzalloc(sizeof(*mc), GFP_KERNEL); if (!mc) return NULL; - do { - ret = idr_pre_get(&multicast_idr, GFP_KERNEL); - if (!ret) - goto error; - - mutex_lock(&mut); - ret = idr_get_new(&multicast_idr, mc, &mc->id); - mutex_unlock(&mut); - } while (ret == -EAGAIN); - - if (ret) + mutex_lock(&mut); + mc->id = idr_alloc(&multicast_idr, mc, 0, 0, GFP_KERNEL); + mutex_unlock(&mut); + if (mc->id < 0) goto error; mc->ctx = ctx; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index e71d834c922a..a7d00f6b3bc1 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -125,18 +125,17 @@ static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj) { int ret; -retry: - if (!idr_pre_get(idr, GFP_KERNEL)) - return -ENOMEM; - + idr_preload(GFP_KERNEL); spin_lock(&ib_uverbs_idr_lock); - ret = idr_get_new(idr, uobj, &uobj->id); - spin_unlock(&ib_uverbs_idr_lock); - if (ret == -EAGAIN) - goto retry; + ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT); + if (ret >= 0) + uobj->id = ret; - return ret; + spin_unlock(&ib_uverbs_idr_lock); + idr_preload_end(); + + return ret < 0 ? ret : 0; } void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj) diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c index 28cd5cb51859..0ab826b280b2 100644 --- a/drivers/infiniband/hw/amso1100/c2_qp.c +++ b/drivers/infiniband/hw/amso1100/c2_qp.c @@ -382,14 +382,17 @@ static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp) { int ret; - do { - spin_lock_irq(&c2dev->qp_table.lock); - ret = idr_get_new_above(&c2dev->qp_table.idr, qp, - c2dev->qp_table.last++, &qp->qpn); - spin_unlock_irq(&c2dev->qp_table.lock); - } while ((ret == -EAGAIN) && - idr_pre_get(&c2dev->qp_table.idr, GFP_KERNEL)); - return ret; + idr_preload(GFP_KERNEL); + spin_lock_irq(&c2dev->qp_table.lock); + + ret = idr_alloc(&c2dev->qp_table.idr, qp, c2dev->qp_table.last++, 0, + GFP_NOWAIT); + if (ret >= 0) + qp->qpn = ret; + + spin_unlock_irq(&c2dev->qp_table.lock); + idr_preload_end(); + return ret < 0 ? ret : 0; } static void c2_free_qpn(struct c2_dev *c2dev, int qpn) diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h index a1c44578e039..837862287a29 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ b/drivers/infiniband/hw/cxgb3/iwch.h @@ -153,19 +153,17 @@ static inline int insert_handle(struct iwch_dev *rhp, struct idr *idr, void *handle, u32 id) { int ret; - int newid; - - do { - if (!idr_pre_get(idr, GFP_KERNEL)) { - return -ENOMEM; - } - spin_lock_irq(&rhp->lock); - ret = idr_get_new_above(idr, handle, id, &newid); - BUG_ON(newid != id); - spin_unlock_irq(&rhp->lock); - } while (ret == -EAGAIN); - - return ret; + + idr_preload(GFP_KERNEL); + spin_lock_irq(&rhp->lock); + + ret = idr_alloc(idr, handle, id, id + 1, GFP_NOWAIT); + + spin_unlock_irq(&rhp->lock); + idr_preload_end(); + + BUG_ON(ret == -ENOSPC); + return ret < 0 ? ret : 0; } static inline void remove_handle(struct iwch_dev *rhp, struct idr *idr, u32 id) diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 4c07fc069766..7eec5e13fa8c 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -260,20 +260,21 @@ static inline int _insert_handle(struct c4iw_dev *rhp, struct idr *idr, void *handle, u32 id, int lock) { int ret; - int newid; - do { - if (!idr_pre_get(idr, lock ? GFP_KERNEL : GFP_ATOMIC)) - return -ENOMEM; - if (lock) - spin_lock_irq(&rhp->lock); - ret = idr_get_new_above(idr, handle, id, &newid); - BUG_ON(!ret && newid != id); - if (lock) - spin_unlock_irq(&rhp->lock); - } while (ret == -EAGAIN); - - return ret; + if (lock) { + idr_preload(GFP_KERNEL); + spin_lock_irq(&rhp->lock); + } + + ret = idr_alloc(idr, handle, id, id + 1, GFP_ATOMIC); + + if (lock) { + spin_unlock_irq(&rhp->lock); + idr_preload_end(); + } + + BUG_ON(ret == -ENOSPC); + return ret < 0 ? ret : 0; } static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr, diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 8f5290147e8a..212150c25ea0 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -128,7 +128,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, void *vpage; u32 counter; u64 rpage, cqx_fec, h_ret; - int ipz_rc, ret, i; + int ipz_rc, i; unsigned long flags; if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) @@ -163,32 +163,19 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, adapter_handle = shca->ipz_hca_handle; param.eq_handle = shca->eq.ipz_eq_handle; - do { - if (!idr_pre_get(&ehca_cq_idr, GFP_KERNEL)) { - cq = ERR_PTR(-ENOMEM); - ehca_err(device, "Can't reserve idr nr. device=%p", - device); - goto create_cq_exit1; - } - - write_lock_irqsave(&ehca_cq_idr_lock, flags); - ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token); - write_unlock_irqrestore(&ehca_cq_idr_lock, flags); - } while (ret == -EAGAIN); + idr_preload(GFP_KERNEL); + write_lock_irqsave(&ehca_cq_idr_lock, flags); + my_cq->token = idr_alloc(&ehca_cq_idr, my_cq, 0, 0x2000000, GFP_NOWAIT); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); + idr_preload_end(); - if (ret) { + if (my_cq->token < 0) { cq = ERR_PTR(-ENOMEM); ehca_err(device, "Can't allocate new idr entry. device=%p", device); goto create_cq_exit1; } - if (my_cq->token > 0x1FFFFFF) { - cq = ERR_PTR(-ENOMEM); - ehca_err(device, "Invalid number of cq. device=%p", device); - goto create_cq_exit2; - } - /* * CQs maximum depth is 4GB-64, but we need additional 20 as buffer * for receiving errors CQEs. diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 149393915ae5..00d6861a6a18 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -636,30 +636,26 @@ static struct ehca_qp *internal_create_qp( my_qp->send_cq = container_of(init_attr->send_cq, struct ehca_cq, ib_cq); - do { - if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) { - ret = -ENOMEM; - ehca_err(pd->device, "Can't reserve idr resources."); - goto create_qp_exit0; - } + idr_preload(GFP_KERNEL); + write_lock_irqsave(&ehca_qp_idr_lock, flags); - write_lock_irqsave(&ehca_qp_idr_lock, flags); - ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token); - write_unlock_irqrestore(&ehca_qp_idr_lock, flags); - } while (ret == -EAGAIN); + ret = idr_alloc(&ehca_qp_idr, my_qp, 0, 0x2000000, GFP_NOWAIT); + if (ret >= 0) + my_qp->token = ret; - if (ret) { - ret = -ENOMEM; - ehca_err(pd->device, "Can't allocate new idr entry."); + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + idr_preload_end(); + if (ret < 0) { + if (ret == -ENOSPC) { + ret = -EINVAL; + ehca_err(pd->device, "Invalid number of qp"); + } else { + ret = -ENOMEM; + ehca_err(pd->device, "Can't allocate new idr entry."); + } goto create_qp_exit0; } - if (my_qp->token > 0x1FFFFFF) { - ret = -EINVAL; - ehca_err(pd->device, "Invalid number of qp"); - goto create_qp_exit1; - } - if (has_srq) parms.srq_token = my_qp->token; diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 7b371f545ece..bd0caedafe99 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -194,11 +194,6 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) struct ipath_devdata *dd; int ret; - if (!idr_pre_get(&unit_table, GFP_KERNEL)) { - dd = ERR_PTR(-ENOMEM); - goto bail; - } - dd = vzalloc(sizeof(*dd)); if (!dd) { dd = ERR_PTR(-ENOMEM); @@ -206,9 +201,10 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) } dd->ipath_unit = -1; + idr_preload(GFP_KERNEL); spin_lock_irqsave(&ipath_devs_lock, flags); - ret = idr_get_new(&unit_table, dd, &dd->ipath_unit); + ret = idr_alloc(&unit_table, dd, 0, 0, GFP_NOWAIT); if (ret < 0) { printk(KERN_ERR IPATH_DRV_NAME ": Could not allocate unit ID: error %d\n", -ret); @@ -216,6 +212,7 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) dd = ERR_PTR(ret); goto bail_unlock; } + dd->ipath_unit = ret; dd->pcidev = pdev; pci_set_drvdata(pdev, dd); @@ -224,7 +221,7 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) bail_unlock: spin_unlock_irqrestore(&ipath_devs_lock, flags); - + idr_preload_end(); bail: return dd; } @@ -2503,11 +2500,6 @@ static int __init infinipath_init(void) * the PCI subsystem. */ idr_init(&unit_table); - if (!idr_pre_get(&unit_table, GFP_KERNEL)) { - printk(KERN_ERR IPATH_DRV_NAME ": idr_pre_get() failed\n"); - ret = -ENOMEM; - goto bail; - } ret = pci_register_driver(&ipath_driver); if (ret < 0) { diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index dbc99d41605c..e0d79b2395e4 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -203,7 +203,7 @@ static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new) static struct id_map_entry * id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id) { - int ret, id; + int ret; static int next_id; struct id_map_entry *ent; struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; @@ -220,25 +220,23 @@ id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id) ent->dev = to_mdev(ibdev); INIT_DELAYED_WORK(&ent->timeout, id_map_ent_timeout); - do { - spin_lock(&to_mdev(ibdev)->sriov.id_map_lock); - ret = idr_get_new_above(&sriov->pv_id_table, ent, - next_id, &id); - if (!ret) { - next_id = ((unsigned) id + 1) & MAX_IDR_MASK; - ent->pv_cm_id = (u32)id; - sl_id_map_add(ibdev, ent); - } + idr_preload(GFP_KERNEL); + spin_lock(&to_mdev(ibdev)->sriov.id_map_lock); - spin_unlock(&sriov->id_map_lock); - } while (ret == -EAGAIN && idr_pre_get(&sriov->pv_id_table, GFP_KERNEL)); - /*the function idr_get_new_above can return -ENOSPC, so don't insert in that case.*/ - if (!ret) { - spin_lock(&sriov->id_map_lock); + ret = idr_alloc(&sriov->pv_id_table, ent, next_id, 0, GFP_NOWAIT); + if (ret >= 0) { + next_id = max(ret + 1, 0); + ent->pv_cm_id = (u32)ret; + sl_id_map_add(ibdev, ent); list_add_tail(&ent->list, &sriov->cm_list); - spin_unlock(&sriov->id_map_lock); - return ent; } + + spin_unlock(&sriov->id_map_lock); + idr_preload_end(); + + if (ret >= 0) + return ent; + /*error flow*/ kfree(ent); mlx4_ib_warn(ibdev, "No more space in the idr (err:0x%x)\n", ret); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index c4e0131f1b57..48928c8e7774 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -51,18 +51,6 @@ static DEFINE_IDR(ocrdma_dev_id); static union ib_gid ocrdma_zero_sgid; -static int ocrdma_get_instance(void) -{ - int instance = 0; - - /* Assign an unused number */ - if (!idr_pre_get(&ocrdma_dev_id, GFP_KERNEL)) - return -1; - if (idr_get_new(&ocrdma_dev_id, NULL, &instance)) - return -1; - return instance; -} - void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid) { u8 mac_addr[6]; @@ -416,7 +404,7 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) goto idr_err; memcpy(&dev->nic_info, dev_info, sizeof(*dev_info)); - dev->id = ocrdma_get_instance(); + dev->id = idr_alloc(&ocrdma_dev_id, NULL, 0, 0, GFP_KERNEL); if (dev->id < 0) goto idr_err; diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index ddf066d9abb6..50e33aa0b4e3 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1060,22 +1060,23 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) struct qib_devdata *dd; int ret; - if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) { - dd = ERR_PTR(-ENOMEM); - goto bail; - } - dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra); if (!dd) { dd = ERR_PTR(-ENOMEM); goto bail; } + idr_preload(GFP_KERNEL); spin_lock_irqsave(&qib_devs_lock, flags); - ret = idr_get_new(&qib_unit_table, dd, &dd->unit); - if (ret >= 0) + + ret = idr_alloc(&qib_unit_table, dd, 0, 0, GFP_NOWAIT); + if (ret >= 0) { + dd->unit = ret; list_add(&dd->list, &qib_dev_list); + } + spin_unlock_irqrestore(&qib_devs_lock, flags); + idr_preload_end(); if (ret < 0) { qib_early_err(&pdev->dev, @@ -1180,11 +1181,6 @@ static int __init qlogic_ib_init(void) * the PCI subsystem. */ idr_init(&qib_unit_table); - if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) { - pr_err("idr_pre_get() failed\n"); - ret = -ENOMEM; - goto bail_cq_wq; - } ret = pci_register_driver(&qib_driver); if (ret < 0) { @@ -1199,7 +1195,6 @@ static int __init qlogic_ib_init(void) bail_unit: idr_destroy(&qib_unit_table); -bail_cq_wq: destroy_workqueue(qib_cq_wq); bail_dev: qib_dev_cleanup(); diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index abe2d699b6f3..8b07f83d48ad 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -483,7 +483,6 @@ data_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct sockaddr_mISDN *maddr = (struct sockaddr_mISDN *) addr; struct sock *sk = sock->sk; - struct hlist_node *node; struct sock *csk; int err = 0; @@ -508,7 +507,7 @@ data_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) if (sk->sk_protocol < ISDN_P_B_START) { read_lock_bh(&data_sockets.lock); - sk_for_each(csk, node, &data_sockets.head) { + sk_for_each(csk, &data_sockets.head) { if (sk == csk) continue; if (_pms(csk)->dev != _pms(sk)->dev) diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c index deda591f70b9..9cb4b621fbc3 100644 --- a/drivers/isdn/mISDN/stack.c +++ b/drivers/isdn/mISDN/stack.c @@ -64,12 +64,11 @@ unlock: static void send_socklist(struct mISDN_sock_list *sl, struct sk_buff *skb) { - struct hlist_node *node; struct sock *sk; struct sk_buff *cskb = NULL; read_lock(&sl->lock); - sk_for_each(sk, node, &sl->head) { + sk_for_each(sk, &sl->head) { if (sk->sk_state != MISDN_BOUND) continue; if (!cskb) diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c index aefb78e3cbf9..d9d3f1c7b662 100644 --- a/drivers/md/dm-bio-prison.c +++ b/drivers/md/dm-bio-prison.c @@ -106,9 +106,8 @@ static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket, struct dm_cell_key *key) { struct dm_bio_prison_cell *cell; - struct hlist_node *tmp; - hlist_for_each_entry(cell, tmp, bucket, list) + hlist_for_each_entry(cell, bucket, list) if (keys_equal(&cell->key, key)) return cell; diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 651ca79881dd..93205e32a004 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -859,9 +859,8 @@ static void __check_watermark(struct dm_bufio_client *c) static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block) { struct dm_buffer *b; - struct hlist_node *hn; - hlist_for_each_entry(b, hn, &c->cache_hash[DM_BUFIO_HASH(block)], + hlist_for_each_entry(b, &c->cache_hash[DM_BUFIO_HASH(block)], hash_list) { dm_bufio_cond_resched(); if (b->block == block) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 59fc18ae52c2..10079e07edf4 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -227,12 +227,11 @@ static void stop_tracking_chunk(struct dm_snapshot *s, struct bio *bio) static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk) { struct dm_snap_tracked_chunk *c; - struct hlist_node *hn; int found = 0; spin_lock_irq(&s->tracked_chunk_lock); - hlist_for_each_entry(c, hn, + hlist_for_each_entry(c, &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) { if (c->chunk == chunk) { found = 1; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 314a0e2faf79..bb2cd3ce9b0f 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -318,7 +318,6 @@ static void __exit dm_exit(void) /* * Should be empty by this point. */ - idr_remove_all(&_minor_idr); idr_destroy(&_minor_idr); } @@ -627,7 +626,6 @@ static void dec_pending(struct dm_io *io, int error) queue_io(md, bio); } else { /* done with normal IO or empty flush */ - trace_block_bio_complete(md->queue, bio, io_error); bio_endio(bio, io_error); } } @@ -1756,62 +1754,38 @@ static void free_minor(int minor) */ static int specific_minor(int minor) { - int r, m; + int r; if (minor >= (1 << MINORBITS)) return -EINVAL; - r = idr_pre_get(&_minor_idr, GFP_KERNEL); - if (!r) - return -ENOMEM; - + idr_preload(GFP_KERNEL); spin_lock(&_minor_lock); - if (idr_find(&_minor_idr, minor)) { - r = -EBUSY; - goto out; - } - - r = idr_get_new_above(&_minor_idr, MINOR_ALLOCED, minor, &m); - if (r) - goto out; + r = idr_alloc(&_minor_idr, MINOR_ALLOCED, minor, minor + 1, GFP_NOWAIT); - if (m != minor) { - idr_remove(&_minor_idr, m); - r = -EBUSY; - goto out; - } - -out: spin_unlock(&_minor_lock); - return r; + idr_preload_end(); + if (r < 0) + return r == -ENOSPC ? -EBUSY : r; + return 0; } static int next_free_minor(int *minor) { - int r, m; - - r = idr_pre_get(&_minor_idr, GFP_KERNEL); - if (!r) - return -ENOMEM; + int r; + idr_preload(GFP_KERNEL); spin_lock(&_minor_lock); - r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m); - if (r) - goto out; - - if (m >= (1 << MINORBITS)) { - idr_remove(&_minor_idr, m); - r = -ENOSPC; - goto out; - } - - *minor = m; + r = idr_alloc(&_minor_idr, MINOR_ALLOCED, 0, 1 << MINORBITS, GFP_NOWAIT); -out: spin_unlock(&_minor_lock); - return r; + idr_preload_end(); + if (r < 0) + return r; + *minor = r; + return 0; } static const struct block_device_operations dm_blk_dops; diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c index 7b17a1fdeaf9..81da1a26042e 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c @@ -46,10 +46,9 @@ static int is_shadow(struct dm_transaction_manager *tm, dm_block_t b) int r = 0; unsigned bucket = dm_hash_block(b, DM_HASH_MASK); struct shadow_info *si; - struct hlist_node *n; spin_lock(&tm->lock); - hlist_for_each_entry(si, n, tm->buckets + bucket, hlist) + hlist_for_each_entry(si, tm->buckets + bucket, hlist) if (si->where == b) { r = 1; break; @@ -81,14 +80,14 @@ static void insert_shadow(struct dm_transaction_manager *tm, dm_block_t b) static void wipe_shadow_table(struct dm_transaction_manager *tm) { struct shadow_info *si; - struct hlist_node *n, *tmp; + struct hlist_node *tmp; struct hlist_head *bucket; int i; spin_lock(&tm->lock); for (i = 0; i < DM_HASH_SIZE; i++) { bucket = tm->buckets + i; - hlist_for_each_entry_safe(si, n, tmp, bucket, hlist) + hlist_for_each_entry_safe(si, tmp, bucket, hlist) kfree(si); INIT_HLIST_HEAD(bucket); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 19d77a026639..5af2d2709081 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -184,8 +184,6 @@ static void return_io(struct bio *return_bi) return_bi = bi->bi_next; bi->bi_next = NULL; bi->bi_size = 0; - trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), - bi, 0); bio_endio(bi, 0); bi = return_bi; } @@ -365,10 +363,9 @@ static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector, short generation) { struct stripe_head *sh; - struct hlist_node *hn; pr_debug("__find_stripe, sector %llu\n", (unsigned long long)sector); - hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash) + hlist_for_each_entry(sh, stripe_hash(conf, sector), hash) if (sh->sector == sector && sh->generation == generation) return sh; pr_debug("__stripe %llu not in cache\n", (unsigned long long)sector); @@ -3917,8 +3914,6 @@ static void raid5_align_endio(struct bio *bi, int error) rdev_dec_pending(rdev, conf->mddev); if (!error && uptodate) { - trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev), - raid_bi, 0); bio_endio(raid_bi, 0); if (atomic_dec_and_test(&conf->active_aligned_reads)) wake_up(&conf->wait_for_stripe); @@ -4377,8 +4372,6 @@ static void make_request(struct mddev *mddev, struct bio * bi) if ( rw == WRITE ) md_write_end(mddev); - trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), - bi, 0); bio_endio(bi, 0); } } @@ -4755,11 +4748,8 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) handled++; } remaining = raid5_dec_bi_active_stripes(raid_bio); - if (remaining == 0) { - trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev), - raid_bio, 0); + if (remaining == 0) bio_endio(raid_bio, 0); - } if (atomic_dec_and_test(&conf->active_aligned_reads)) wake_up(&conf->wait_for_stripe); return handled; diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index 56ff19cdc2ad..ffcb10ac4341 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c @@ -512,18 +512,17 @@ int memstick_add_host(struct memstick_host *host) { int rc; - while (1) { - if (!idr_pre_get(&memstick_host_idr, GFP_KERNEL)) - return -ENOMEM; + idr_preload(GFP_KERNEL); + spin_lock(&memstick_host_lock); - spin_lock(&memstick_host_lock); - rc = idr_get_new(&memstick_host_idr, host, &host->id); - spin_unlock(&memstick_host_lock); - if (!rc) - break; - else if (rc != -EAGAIN) - return rc; - } + rc = idr_alloc(&memstick_host_idr, host, 0, 0, GFP_NOWAIT); + if (rc >= 0) + host->id = rc; + + spin_unlock(&memstick_host_lock); + idr_preload_end(); + if (rc < 0) + return rc; dev_set_name(&host->dev, "memstick%u", host->id); diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index 9729b92fbfdd..f12b78dbce04 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -1213,21 +1213,10 @@ static int mspro_block_init_disk(struct memstick_dev *card) msb->page_size = be16_to_cpu(sys_info->unit_size); mutex_lock(&mspro_block_disk_lock); - if (!idr_pre_get(&mspro_block_disk_idr, GFP_KERNEL)) { - mutex_unlock(&mspro_block_disk_lock); - return -ENOMEM; - } - - rc = idr_get_new(&mspro_block_disk_idr, card, &disk_id); + disk_id = idr_alloc(&mspro_block_disk_idr, card, 0, 256, GFP_KERNEL); mutex_unlock(&mspro_block_disk_lock); - - if (rc) - return rc; - - if ((disk_id << MSPRO_BLOCK_PART_SHIFT) > 255) { - rc = -ENOSPC; - goto out_release_id; - } + if (disk_id < 0) + return disk_id; msb->disk = alloc_disk(1 << MSPRO_BLOCK_PART_SHIFT); if (!msb->disk) { diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c index 29b2172ae18f..a7c5b31c0d50 100644 --- a/drivers/memstick/host/r592.c +++ b/drivers/memstick/host/r592.c @@ -454,7 +454,7 @@ static int r592_transfer_fifo_pio(struct r592_device *dev) /* Executes one TPC (data is read/written from small or large fifo) */ static void r592_execute_tpc(struct r592_device *dev) { - bool is_write = dev->req->tpc >= MS_TPC_SET_RW_REG_ADRS; + bool is_write; int len, error; u32 status, reg; @@ -463,6 +463,7 @@ static void r592_execute_tpc(struct r592_device *dev) return; } + is_write = dev->req->tpc >= MS_TPC_SET_RW_REG_ADRS; len = dev->req->long_data ? dev->req->sg.length : dev->req->data_len; diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c index 481a98a10ecd..2f12cc13489a 100644 --- a/drivers/mfd/rtsx_pcr.c +++ b/drivers/mfd/rtsx_pcr.c @@ -1091,15 +1091,14 @@ static int rtsx_pci_probe(struct pci_dev *pcidev, } handle->pcr = pcr; - if (!idr_pre_get(&rtsx_pci_idr, GFP_KERNEL)) { - ret = -ENOMEM; - goto free_handle; - } - + idr_preload(GFP_KERNEL); spin_lock(&rtsx_pci_lock); - ret = idr_get_new(&rtsx_pci_idr, pcr, &pcr->id); + ret = idr_alloc(&rtsx_pci_idr, pcr, 0, 0, GFP_NOWAIT); + if (ret >= 0) + pcr->id = ret; spin_unlock(&rtsx_pci_lock); - if (ret) + idr_preload_end(); + if (ret < 0) goto free_handle; pcr->pci = pcidev; diff --git a/drivers/misc/c2port/core.c b/drivers/misc/c2port/core.c index f428d86bfc10..f32550a74bdd 100644 --- a/drivers/misc/c2port/core.c +++ b/drivers/misc/c2port/core.c @@ -885,7 +885,7 @@ struct c2port_device *c2port_device_register(char *name, struct c2port_ops *ops, void *devdata) { struct c2port_device *c2dev; - int id, ret; + int ret; if (unlikely(!ops) || unlikely(!ops->access) || \ unlikely(!ops->c2d_dir) || unlikely(!ops->c2ck_set) || \ @@ -897,22 +897,18 @@ struct c2port_device *c2port_device_register(char *name, if (unlikely(!c2dev)) return ERR_PTR(-ENOMEM); - ret = idr_pre_get(&c2port_idr, GFP_KERNEL); - if (!ret) { - ret = -ENOMEM; - goto error_idr_get_new; - } - + idr_preload(GFP_KERNEL); spin_lock_irq(&c2port_idr_lock); - ret = idr_get_new(&c2port_idr, c2dev, &id); + ret = idr_alloc(&c2port_idr, c2dev, 0, 0, GFP_NOWAIT); spin_unlock_irq(&c2port_idr_lock); + idr_preload_end(); if (ret < 0) - goto error_idr_get_new; - c2dev->id = id; + goto error_idr_alloc; + c2dev->id = ret; c2dev->dev = device_create(c2port_class, NULL, 0, c2dev, - "c2port%d", id); + "c2port%d", c2dev->id); if (unlikely(IS_ERR(c2dev->dev))) { ret = PTR_ERR(c2dev->dev); goto error_device_create; @@ -946,10 +942,10 @@ error_device_create_bin_file: error_device_create: spin_lock_irq(&c2port_idr_lock); - idr_remove(&c2port_idr, id); + idr_remove(&c2port_idr, c2dev->id); spin_unlock_irq(&c2port_idr_lock); -error_idr_get_new: +error_idr_alloc: kfree(c2dev); return ERR_PTR(ret); diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c index 240a6d361665..2129274ef7ab 100644 --- a/drivers/misc/sgi-gru/grutlbpurge.c +++ b/drivers/misc/sgi-gru/grutlbpurge.c @@ -280,11 +280,10 @@ static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm, const struct mmu_notifier_ops *ops) { struct mmu_notifier *mn, *gru_mn = NULL; - struct hlist_node *n; if (mm->mmu_notifier_mm) { rcu_read_lock(); - hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, + hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) if (mn->ops == ops) { gru_mn = mn; diff --git a/drivers/misc/tifm_core.c b/drivers/misc/tifm_core.c index 0bd5349b0422..0ab7c922212c 100644 --- a/drivers/misc/tifm_core.c +++ b/drivers/misc/tifm_core.c @@ -196,13 +196,14 @@ int tifm_add_adapter(struct tifm_adapter *fm) { int rc; - if (!idr_pre_get(&tifm_adapter_idr, GFP_KERNEL)) - return -ENOMEM; - + idr_preload(GFP_KERNEL); spin_lock(&tifm_adapter_lock); - rc = idr_get_new(&tifm_adapter_idr, fm, &fm->id); + rc = idr_alloc(&tifm_adapter_idr, fm, 0, 0, GFP_NOWAIT); + if (rc >= 0) + fm->id = rc; spin_unlock(&tifm_adapter_lock); - if (rc) + idr_preload_end(); + if (rc < 0) return rc; dev_set_name(&fm->dev, "tifm%u", fm->id); diff --git a/drivers/misc/vmw_vmci/vmci_doorbell.c b/drivers/misc/vmw_vmci/vmci_doorbell.c index c3e8397f62ed..a8cee33ae8d2 100644 --- a/drivers/misc/vmw_vmci/vmci_doorbell.c +++ b/drivers/misc/vmw_vmci/vmci_doorbell.c @@ -127,9 +127,8 @@ static struct dbell_entry *dbell_index_table_find(u32 idx) { u32 bucket = VMCI_DOORBELL_HASH(idx); struct dbell_entry *dbell; - struct hlist_node *node; - hlist_for_each_entry(dbell, node, &vmci_doorbell_it.entries[bucket], + hlist_for_each_entry(dbell, &vmci_doorbell_it.entries[bucket], node) { if (idx == dbell->idx) return dbell; @@ -359,12 +358,10 @@ static void dbell_fire_entries(u32 notify_idx) { u32 bucket = VMCI_DOORBELL_HASH(notify_idx); struct dbell_entry *dbell; - struct hlist_node *node; spin_lock_bh(&vmci_doorbell_it.lock); - hlist_for_each_entry(dbell, node, - &vmci_doorbell_it.entries[bucket], node) { + hlist_for_each_entry(dbell, &vmci_doorbell_it.entries[bucket], node) { if (dbell->idx == notify_idx && atomic_read(&dbell->active) == 1) { if (dbell->run_delayed) { diff --git a/drivers/misc/vmw_vmci/vmci_resource.c b/drivers/misc/vmw_vmci/vmci_resource.c index a196f84a4fd2..9a53a30de445 100644 --- a/drivers/misc/vmw_vmci/vmci_resource.c +++ b/drivers/misc/vmw_vmci/vmci_resource.c @@ -46,11 +46,10 @@ static struct vmci_resource *vmci_resource_lookup(struct vmci_handle handle, enum vmci_resource_type type) { struct vmci_resource *r, *resource = NULL; - struct hlist_node *node; unsigned int idx = vmci_resource_hash(handle); rcu_read_lock(); - hlist_for_each_entry_rcu(r, node, + hlist_for_each_entry_rcu(r, &vmci_resource_table.entries[idx], node) { u32 cid = r->handle.context; u32 rid = r->handle.resource; @@ -146,12 +145,11 @@ void vmci_resource_remove(struct vmci_resource *resource) struct vmci_handle handle = resource->handle; unsigned int idx = vmci_resource_hash(handle); struct vmci_resource *r; - struct hlist_node *node; /* Remove resource from hash table. */ spin_lock(&vmci_resource_table.lock); - hlist_for_each_entry(r, node, &vmci_resource_table.entries[idx], node) { + hlist_for_each_entry(r, &vmci_resource_table.entries[idx], node) { if (vmci_handle_is_equal(r->handle, resource->handle)) { hlist_del_init_rcu(&r->node); break; diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 821cd8224137..2a3593d9f87d 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -429,19 +429,20 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev) int err; struct mmc_host *host; - if (!idr_pre_get(&mmc_host_idr, GFP_KERNEL)) - return NULL; - host = kzalloc(sizeof(struct mmc_host) + extra, GFP_KERNEL); if (!host) return NULL; /* scanning will be enabled when we're ready */ host->rescan_disable = 1; + idr_preload(GFP_KERNEL); spin_lock(&mmc_host_lock); - err = idr_get_new(&mmc_host_idr, host, &host->index); + err = idr_alloc(&mmc_host_idr, host, 0, 0, GFP_NOWAIT); + if (err >= 0) + host->index = err; spin_unlock(&mmc_host_lock); - if (err) + idr_preload_end(); + if (err < 0) goto free; dev_set_name(&host->class_dev, "mmc%d", host->index); diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 60063ccb4c4b..98342213ed21 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -1453,7 +1453,7 @@ static void dw_mci_read_data_pio(struct dw_mci *host) if (!sg_miter_next(sg_miter)) goto done; - host->sg = sg_miter->__sg; + host->sg = sg_miter->piter.sg; buf = sg_miter->addr; remain = sg_miter->length; offset = 0; @@ -1508,7 +1508,7 @@ static void dw_mci_write_data_pio(struct dw_mci *host) if (!sg_miter_next(sg_miter)) goto done; - host->sg = sg_miter->__sg; + host->sg = sg_miter->piter.sg; buf = sg_miter->addr; remain = sg_miter->length; offset = 0; diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index ec794a72975d..61d5f56473e1 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -349,13 +349,8 @@ int add_mtd_device(struct mtd_info *mtd) BUG_ON(mtd->writesize == 0); mutex_lock(&mtd_table_mutex); - do { - if (!idr_pre_get(&mtd_idr, GFP_KERNEL)) - goto fail_locked; - error = idr_get_new(&mtd_idr, mtd, &i); - } while (error == -EAGAIN); - - if (error) + i = idr_alloc(&mtd_idr, mtd, 0, 0, GFP_KERNEL); + if (i < 0) goto fail_locked; mtd->index = i; diff --git a/drivers/mtd/tests/mtd_nandecctest.c b/drivers/mtd/tests/mtd_nandecctest.c index 1eee264509a8..db2f22e7966a 100644 --- a/drivers/mtd/tests/mtd_nandecctest.c +++ b/drivers/mtd/tests/mtd_nandecctest.c @@ -256,7 +256,7 @@ static int nand_ecc_test_run(const size_t size) goto error; } - get_random_bytes(correct_data, size); + prandom_bytes(correct_data, size); __nand_calculate_ecc(correct_data, size, correct_ecc); for (i = 0; i < ARRAY_SIZE(nand_ecc_test); i++) { diff --git a/drivers/mtd/tests/mtd_oobtest.c b/drivers/mtd/tests/mtd_oobtest.c index e827fa8cd844..3e24b379ffa4 100644 --- a/drivers/mtd/tests/mtd_oobtest.c +++ b/drivers/mtd/tests/mtd_oobtest.c @@ -29,6 +29,7 @@ #include <linux/mtd/mtd.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/random.h> static int dev = -EINVAL; module_param(dev, int, S_IRUGO); @@ -46,26 +47,7 @@ static int use_offset; static int use_len; static int use_len_max; static int vary_offset; -static unsigned long next = 1; - -static inline unsigned int simple_rand(void) -{ - next = next * 1103515245 + 12345; - return (unsigned int)((next / 65536) % 32768); -} - -static inline void simple_srand(unsigned long seed) -{ - next = seed; -} - -static void set_random_data(unsigned char *buf, size_t len) -{ - size_t i; - - for (i = 0; i < len; ++i) - buf[i] = simple_rand(); -} +static struct rnd_state rnd_state; static int erase_eraseblock(int ebnum) { @@ -129,7 +111,7 @@ static int write_eraseblock(int ebnum) loff_t addr = ebnum * mtd->erasesize; for (i = 0; i < pgcnt; ++i, addr += mtd->writesize) { - set_random_data(writebuf, use_len); + prandom_bytes_state(&rnd_state, writebuf, use_len); ops.mode = MTD_OPS_AUTO_OOB; ops.len = 0; ops.retlen = 0; @@ -182,7 +164,7 @@ static int verify_eraseblock(int ebnum) loff_t addr = ebnum * mtd->erasesize; for (i = 0; i < pgcnt; ++i, addr += mtd->writesize) { - set_random_data(writebuf, use_len); + prandom_bytes_state(&rnd_state, writebuf, use_len); ops.mode = MTD_OPS_AUTO_OOB; ops.len = 0; ops.retlen = 0; @@ -273,7 +255,7 @@ static int verify_eraseblock_in_one_go(int ebnum) loff_t addr = ebnum * mtd->erasesize; size_t len = mtd->ecclayout->oobavail * pgcnt; - set_random_data(writebuf, len); + prandom_bytes_state(&rnd_state, writebuf, len); ops.mode = MTD_OPS_AUTO_OOB; ops.len = 0; ops.retlen = 0; @@ -424,12 +406,12 @@ static int __init mtd_oobtest_init(void) if (err) goto out; - simple_srand(1); + prandom_seed_state(&rnd_state, 1); err = write_whole_device(); if (err) goto out; - simple_srand(1); + prandom_seed_state(&rnd_state, 1); err = verify_all_eraseblocks(); if (err) goto out; @@ -444,13 +426,13 @@ static int __init mtd_oobtest_init(void) if (err) goto out; - simple_srand(3); + prandom_seed_state(&rnd_state, 3); err = write_whole_device(); if (err) goto out; /* Check all eraseblocks */ - simple_srand(3); + prandom_seed_state(&rnd_state, 3); pr_info("verifying all eraseblocks\n"); for (i = 0; i < ebcnt; ++i) { if (bbt[i]) @@ -479,7 +461,7 @@ static int __init mtd_oobtest_init(void) use_len = mtd->ecclayout->oobavail; use_len_max = mtd->ecclayout->oobavail; vary_offset = 1; - simple_srand(5); + prandom_seed_state(&rnd_state, 5); err = write_whole_device(); if (err) @@ -490,7 +472,7 @@ static int __init mtd_oobtest_init(void) use_len = mtd->ecclayout->oobavail; use_len_max = mtd->ecclayout->oobavail; vary_offset = 1; - simple_srand(5); + prandom_seed_state(&rnd_state, 5); err = verify_all_eraseblocks(); if (err) goto out; @@ -649,7 +631,7 @@ static int __init mtd_oobtest_init(void) goto out; /* Write all eraseblocks */ - simple_srand(11); + prandom_seed_state(&rnd_state, 11); pr_info("writing OOBs of whole device\n"); for (i = 0; i < ebcnt - 1; ++i) { int cnt = 2; @@ -659,7 +641,7 @@ static int __init mtd_oobtest_init(void) continue; addr = (i + 1) * mtd->erasesize - mtd->writesize; for (pg = 0; pg < cnt; ++pg) { - set_random_data(writebuf, sz); + prandom_bytes_state(&rnd_state, writebuf, sz); ops.mode = MTD_OPS_AUTO_OOB; ops.len = 0; ops.retlen = 0; @@ -680,12 +662,13 @@ static int __init mtd_oobtest_init(void) pr_info("written %u eraseblocks\n", i); /* Check all eraseblocks */ - simple_srand(11); + prandom_seed_state(&rnd_state, 11); pr_info("verifying all eraseblocks\n"); for (i = 0; i < ebcnt - 1; ++i) { if (bbt[i] || bbt[i + 1]) continue; - set_random_data(writebuf, mtd->ecclayout->oobavail * 2); + prandom_bytes_state(&rnd_state, writebuf, + mtd->ecclayout->oobavail * 2); addr = (i + 1) * mtd->erasesize - mtd->writesize; ops.mode = MTD_OPS_AUTO_OOB; ops.len = 0; diff --git a/drivers/mtd/tests/mtd_pagetest.c b/drivers/mtd/tests/mtd_pagetest.c index f93a76f88113..0c1140b6c286 100644 --- a/drivers/mtd/tests/mtd_pagetest.c +++ b/drivers/mtd/tests/mtd_pagetest.c @@ -29,6 +29,7 @@ #include <linux/mtd/mtd.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/random.h> static int dev = -EINVAL; module_param(dev, int, S_IRUGO); @@ -45,26 +46,7 @@ static int bufsize; static int ebcnt; static int pgcnt; static int errcnt; -static unsigned long next = 1; - -static inline unsigned int simple_rand(void) -{ - next = next * 1103515245 + 12345; - return (unsigned int)((next / 65536) % 32768); -} - -static inline void simple_srand(unsigned long seed) -{ - next = seed; -} - -static void set_random_data(unsigned char *buf, size_t len) -{ - size_t i; - - for (i = 0; i < len; ++i) - buf[i] = simple_rand(); -} +static struct rnd_state rnd_state; static int erase_eraseblock(int ebnum) { @@ -98,7 +80,7 @@ static int write_eraseblock(int ebnum) size_t written; loff_t addr = ebnum * mtd->erasesize; - set_random_data(writebuf, mtd->erasesize); + prandom_bytes_state(&rnd_state, writebuf, mtd->erasesize); cond_resched(); err = mtd_write(mtd, addr, mtd->erasesize, &written, writebuf); if (err || written != mtd->erasesize) @@ -124,7 +106,7 @@ static int verify_eraseblock(int ebnum) for (i = 0; i < ebcnt && bbt[ebcnt - i - 1]; ++i) addrn -= mtd->erasesize; - set_random_data(writebuf, mtd->erasesize); + prandom_bytes_state(&rnd_state, writebuf, mtd->erasesize); for (j = 0; j < pgcnt - 1; ++j, addr += pgsize) { /* Do a read to set the internal dataRAMs to different data */ err = mtd_read(mtd, addr0, bufsize, &read, twopages); @@ -160,7 +142,8 @@ static int verify_eraseblock(int ebnum) } /* Check boundary between eraseblocks */ if (addr <= addrn - pgsize - pgsize && !bbt[ebnum + 1]) { - unsigned long oldnext = next; + struct rnd_state old_state = rnd_state; + /* Do a read to set the internal dataRAMs to different data */ err = mtd_read(mtd, addr0, bufsize, &read, twopages); if (mtd_is_bitflip(err)) @@ -188,13 +171,13 @@ static int verify_eraseblock(int ebnum) return err; } memcpy(boundary, writebuf + mtd->erasesize - pgsize, pgsize); - set_random_data(boundary + pgsize, pgsize); + prandom_bytes_state(&rnd_state, boundary + pgsize, pgsize); if (memcmp(twopages, boundary, bufsize)) { pr_err("error: verify failed at %#llx\n", (long long)addr); errcnt += 1; } - next = oldnext; + rnd_state = old_state; } return err; } @@ -326,7 +309,7 @@ static int erasecrosstest(void) return err; pr_info("writing 1st page of block %d\n", ebnum); - set_random_data(writebuf, pgsize); + prandom_bytes_state(&rnd_state, writebuf, pgsize); strcpy(writebuf, "There is no data like this!"); err = mtd_write(mtd, addr0, pgsize, &written, writebuf); if (err || written != pgsize) { @@ -359,7 +342,7 @@ static int erasecrosstest(void) return err; pr_info("writing 1st page of block %d\n", ebnum); - set_random_data(writebuf, pgsize); + prandom_bytes_state(&rnd_state, writebuf, pgsize); strcpy(writebuf, "There is no data like this!"); err = mtd_write(mtd, addr0, pgsize, &written, writebuf); if (err || written != pgsize) { @@ -417,7 +400,7 @@ static int erasetest(void) return err; pr_info("writing 1st page of block %d\n", ebnum); - set_random_data(writebuf, pgsize); + prandom_bytes_state(&rnd_state, writebuf, pgsize); err = mtd_write(mtd, addr0, pgsize, &written, writebuf); if (err || written != pgsize) { pr_err("error: write failed at %#llx\n", @@ -565,7 +548,7 @@ static int __init mtd_pagetest_init(void) pr_info("erased %u eraseblocks\n", i); /* Write all eraseblocks */ - simple_srand(1); + prandom_seed_state(&rnd_state, 1); pr_info("writing whole device\n"); for (i = 0; i < ebcnt; ++i) { if (bbt[i]) @@ -580,7 +563,7 @@ static int __init mtd_pagetest_init(void) pr_info("written %u eraseblocks\n", i); /* Check all eraseblocks */ - simple_srand(1); + prandom_seed_state(&rnd_state, 1); pr_info("verifying all eraseblocks\n"); for (i = 0; i < ebcnt; ++i) { if (bbt[i]) diff --git a/drivers/mtd/tests/mtd_speedtest.c b/drivers/mtd/tests/mtd_speedtest.c index 596cbea8df4c..a6ce9c1fa6c5 100644 --- a/drivers/mtd/tests/mtd_speedtest.c +++ b/drivers/mtd/tests/mtd_speedtest.c @@ -49,13 +49,6 @@ static int pgcnt; static int goodebcnt; static struct timeval start, finish; -static void set_random_data(unsigned char *buf, size_t len) -{ - size_t i; - - for (i = 0; i < len; ++i) - buf[i] = random32(); -} static int erase_eraseblock(int ebnum) { @@ -396,7 +389,7 @@ static int __init mtd_speedtest_init(void) goto out; } - set_random_data(iobuf, mtd->erasesize); + prandom_bytes(iobuf, mtd->erasesize); err = scan_for_bad_eraseblocks(); if (err) diff --git a/drivers/mtd/tests/mtd_stresstest.c b/drivers/mtd/tests/mtd_stresstest.c index 3729f679ae5d..2d7e6cffd6d4 100644 --- a/drivers/mtd/tests/mtd_stresstest.c +++ b/drivers/mtd/tests/mtd_stresstest.c @@ -282,8 +282,7 @@ static int __init mtd_stresstest_init(void) } for (i = 0; i < ebcnt; i++) offsets[i] = mtd->erasesize; - for (i = 0; i < bufsize; i++) - writebuf[i] = random32(); + prandom_bytes(writebuf, bufsize); err = scan_for_bad_eraseblocks(); if (err) diff --git a/drivers/mtd/tests/mtd_subpagetest.c b/drivers/mtd/tests/mtd_subpagetest.c index c880c2229c59..aade56f27945 100644 --- a/drivers/mtd/tests/mtd_subpagetest.c +++ b/drivers/mtd/tests/mtd_subpagetest.c @@ -28,6 +28,7 @@ #include <linux/mtd/mtd.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/random.h> static int dev = -EINVAL; module_param(dev, int, S_IRUGO); @@ -43,26 +44,7 @@ static int bufsize; static int ebcnt; static int pgcnt; static int errcnt; -static unsigned long next = 1; - -static inline unsigned int simple_rand(void) -{ - next = next * 1103515245 + 12345; - return (unsigned int)((next / 65536) % 32768); -} - -static inline void simple_srand(unsigned long seed) -{ - next = seed; -} - -static void set_random_data(unsigned char *buf, size_t len) -{ - size_t i; - - for (i = 0; i < len; ++i) - buf[i] = simple_rand(); -} +static struct rnd_state rnd_state; static inline void clear_data(unsigned char *buf, size_t len) { @@ -119,7 +101,7 @@ static int write_eraseblock(int ebnum) int err = 0; loff_t addr = ebnum * mtd->erasesize; - set_random_data(writebuf, subpgsize); + prandom_bytes_state(&rnd_state, writebuf, subpgsize); err = mtd_write(mtd, addr, subpgsize, &written, writebuf); if (unlikely(err || written != subpgsize)) { pr_err("error: write failed at %#llx\n", @@ -133,7 +115,7 @@ static int write_eraseblock(int ebnum) addr += subpgsize; - set_random_data(writebuf, subpgsize); + prandom_bytes_state(&rnd_state, writebuf, subpgsize); err = mtd_write(mtd, addr, subpgsize, &written, writebuf); if (unlikely(err || written != subpgsize)) { pr_err("error: write failed at %#llx\n", @@ -157,7 +139,7 @@ static int write_eraseblock2(int ebnum) for (k = 1; k < 33; ++k) { if (addr + (subpgsize * k) > (ebnum + 1) * mtd->erasesize) break; - set_random_data(writebuf, subpgsize * k); + prandom_bytes_state(&rnd_state, writebuf, subpgsize * k); err = mtd_write(mtd, addr, subpgsize * k, &written, writebuf); if (unlikely(err || written != subpgsize * k)) { pr_err("error: write failed at %#llx\n", @@ -193,7 +175,7 @@ static int verify_eraseblock(int ebnum) int err = 0; loff_t addr = ebnum * mtd->erasesize; - set_random_data(writebuf, subpgsize); + prandom_bytes_state(&rnd_state, writebuf, subpgsize); clear_data(readbuf, subpgsize); err = mtd_read(mtd, addr, subpgsize, &read, readbuf); if (unlikely(err || read != subpgsize)) { @@ -220,7 +202,7 @@ static int verify_eraseblock(int ebnum) addr += subpgsize; - set_random_data(writebuf, subpgsize); + prandom_bytes_state(&rnd_state, writebuf, subpgsize); clear_data(readbuf, subpgsize); err = mtd_read(mtd, addr, subpgsize, &read, readbuf); if (unlikely(err || read != subpgsize)) { @@ -257,7 +239,7 @@ static int verify_eraseblock2(int ebnum) for (k = 1; k < 33; ++k) { if (addr + (subpgsize * k) > (ebnum + 1) * mtd->erasesize) break; - set_random_data(writebuf, subpgsize * k); + prandom_bytes_state(&rnd_state, writebuf, subpgsize * k); clear_data(readbuf, subpgsize * k); err = mtd_read(mtd, addr, subpgsize * k, &read, readbuf); if (unlikely(err || read != subpgsize * k)) { @@ -430,7 +412,7 @@ static int __init mtd_subpagetest_init(void) goto out; pr_info("writing whole device\n"); - simple_srand(1); + prandom_seed_state(&rnd_state, 1); for (i = 0; i < ebcnt; ++i) { if (bbt[i]) continue; @@ -443,7 +425,7 @@ static int __init mtd_subpagetest_init(void) } pr_info("written %u eraseblocks\n", i); - simple_srand(1); + prandom_seed_state(&rnd_state, 1); pr_info("verifying all eraseblocks\n"); for (i = 0; i < ebcnt; ++i) { if (bbt[i]) @@ -466,7 +448,7 @@ static int __init mtd_subpagetest_init(void) goto out; /* Write all eraseblocks */ - simple_srand(3); + prandom_seed_state(&rnd_state, 3); pr_info("writing whole device\n"); for (i = 0; i < ebcnt; ++i) { if (bbt[i]) @@ -481,7 +463,7 @@ static int __init mtd_subpagetest_init(void) pr_info("written %u eraseblocks\n", i); /* Check all eraseblocks */ - simple_srand(3); + prandom_seed_state(&rnd_state, 3); pr_info("verifying all eraseblocks\n"); for (i = 0; i < ebcnt; ++i) { if (bbt[i]) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index f4d2e9e3c6d5..c3f1afd86906 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -2197,13 +2197,13 @@ static int ixgbe_get_ethtool_fdir_entry(struct ixgbe_adapter *adapter, union ixgbe_atr_input *mask = &adapter->fdir_mask; struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs; - struct hlist_node *node, *node2; + struct hlist_node *node2; struct ixgbe_fdir_filter *rule = NULL; /* report total rule count */ cmd->data = (1024 << adapter->fdir_pballoc) - 2; - hlist_for_each_entry_safe(rule, node, node2, + hlist_for_each_entry_safe(rule, node2, &adapter->fdir_filter_list, fdir_node) { if (fsp->location <= rule->sw_idx) break; @@ -2264,14 +2264,14 @@ static int ixgbe_get_ethtool_fdir_all(struct ixgbe_adapter *adapter, struct ethtool_rxnfc *cmd, u32 *rule_locs) { - struct hlist_node *node, *node2; + struct hlist_node *node2; struct ixgbe_fdir_filter *rule; int cnt = 0; /* report total rule count */ cmd->data = (1024 << adapter->fdir_pballoc) - 2; - hlist_for_each_entry_safe(rule, node, node2, + hlist_for_each_entry_safe(rule, node2, &adapter->fdir_filter_list, fdir_node) { if (cnt == cmd->rule_cnt) return -EMSGSIZE; @@ -2358,19 +2358,19 @@ static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter, u16 sw_idx) { struct ixgbe_hw *hw = &adapter->hw; - struct hlist_node *node, *node2, *parent; - struct ixgbe_fdir_filter *rule; + struct hlist_node *node2; + struct ixgbe_fdir_filter *rule, *parent; int err = -EINVAL; parent = NULL; rule = NULL; - hlist_for_each_entry_safe(rule, node, node2, + hlist_for_each_entry_safe(rule, node2, &adapter->fdir_filter_list, fdir_node) { /* hash found, or no matching entry */ if (rule->sw_idx >= sw_idx) break; - parent = node; + parent = rule; } /* if there is an old rule occupying our place remove it */ @@ -2399,7 +2399,7 @@ static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter, /* add filter to the list */ if (parent) - hlist_add_after(parent, &input->fdir_node); + hlist_add_after(&parent->fdir_node, &input->fdir_node); else hlist_add_head(&input->fdir_node, &adapter->fdir_filter_list); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 68478d6dfa2d..db5611ae407e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3891,7 +3891,7 @@ static void ixgbe_configure_pb(struct ixgbe_adapter *adapter) static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; - struct hlist_node *node, *node2; + struct hlist_node *node2; struct ixgbe_fdir_filter *filter; spin_lock(&adapter->fdir_perfect_lock); @@ -3899,7 +3899,7 @@ static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter) if (!hlist_empty(&adapter->fdir_filter_list)) ixgbe_fdir_set_input_mask_82599(hw, &adapter->fdir_mask); - hlist_for_each_entry_safe(filter, node, node2, + hlist_for_each_entry_safe(filter, node2, &adapter->fdir_filter_list, fdir_node) { ixgbe_fdir_write_perfect_filter_82599(hw, &filter->filter, @@ -4356,12 +4356,12 @@ static void ixgbe_clean_all_tx_rings(struct ixgbe_adapter *adapter) static void ixgbe_fdir_filter_exit(struct ixgbe_adapter *adapter) { - struct hlist_node *node, *node2; + struct hlist_node *node2; struct ixgbe_fdir_filter *filter; spin_lock(&adapter->fdir_perfect_lock); - hlist_for_each_entry_safe(filter, node, node2, + hlist_for_each_entry_safe(filter, node2, &adapter->fdir_filter_list, fdir_node) { hlist_del(&filter->fdir_node); kfree(filter); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 5385474bb526..bb4d8d99f36d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -225,11 +225,10 @@ static inline struct mlx4_en_filter * mlx4_en_filter_find(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip, __be16 src_port, __be16 dst_port) { - struct hlist_node *elem; struct mlx4_en_filter *filter; struct mlx4_en_filter *ret = NULL; - hlist_for_each_entry(filter, elem, + hlist_for_each_entry(filter, filter_hash_bucket(priv, src_ip, dst_ip, src_port, dst_port), filter_chain) { @@ -574,13 +573,13 @@ static void mlx4_en_put_qp(struct mlx4_en_priv *priv) if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) { struct mlx4_mac_entry *entry; - struct hlist_node *n, *tmp; + struct hlist_node *tmp; struct hlist_head *bucket; unsigned int mac_hash; mac_hash = priv->dev->dev_addr[MLX4_EN_MAC_HASH_IDX]; bucket = &priv->mac_hash[mac_hash]; - hlist_for_each_entry_safe(entry, n, tmp, bucket, hlist) { + hlist_for_each_entry_safe(entry, tmp, bucket, hlist) { if (ether_addr_equal_64bits(entry->mac, priv->dev->dev_addr)) { en_dbg(DRV, priv, "Releasing qp: port %d, MAC %pM, qpn %d\n", @@ -609,11 +608,11 @@ static int mlx4_en_replace_mac(struct mlx4_en_priv *priv, int qpn, struct hlist_head *bucket; unsigned int mac_hash; struct mlx4_mac_entry *entry; - struct hlist_node *n, *tmp; + struct hlist_node *tmp; u64 prev_mac_u64 = mlx4_en_mac_to_u64(prev_mac); bucket = &priv->mac_hash[prev_mac[MLX4_EN_MAC_HASH_IDX]]; - hlist_for_each_entry_safe(entry, n, tmp, bucket, hlist) { + hlist_for_each_entry_safe(entry, tmp, bucket, hlist) { if (ether_addr_equal_64bits(entry->mac, prev_mac)) { mlx4_en_uc_steer_release(priv, entry->mac, qpn, entry->reg_id); @@ -1019,7 +1018,7 @@ static void mlx4_en_do_uc_filter(struct mlx4_en_priv *priv, { struct netdev_hw_addr *ha; struct mlx4_mac_entry *entry; - struct hlist_node *n, *tmp; + struct hlist_node *tmp; bool found; u64 mac; int err = 0; @@ -1035,7 +1034,7 @@ static void mlx4_en_do_uc_filter(struct mlx4_en_priv *priv, /* find what to remove */ for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) { bucket = &priv->mac_hash[i]; - hlist_for_each_entry_safe(entry, n, tmp, bucket, hlist) { + hlist_for_each_entry_safe(entry, tmp, bucket, hlist) { found = false; netdev_for_each_uc_addr(ha, dev) { if (ether_addr_equal_64bits(entry->mac, @@ -1078,7 +1077,7 @@ static void mlx4_en_do_uc_filter(struct mlx4_en_priv *priv, netdev_for_each_uc_addr(ha, dev) { found = false; bucket = &priv->mac_hash[ha->addr[MLX4_EN_MAC_HASH_IDX]]; - hlist_for_each_entry(entry, n, bucket, hlist) { + hlist_for_each_entry(entry, bucket, hlist) { if (ether_addr_equal_64bits(entry->mac, ha->addr)) { found = true; break; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index ce38654bbdd0..c7f856308e1a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -35,6 +35,7 @@ #include <linux/slab.h> #include <linux/mlx4/qp.h> #include <linux/skbuff.h> +#include <linux/rculist.h> #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <linux/vmalloc.h> @@ -617,7 +618,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if (is_multicast_ether_addr(ethh->h_dest)) { struct mlx4_mac_entry *entry; - struct hlist_node *n; struct hlist_head *bucket; unsigned int mac_hash; @@ -625,7 +625,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud mac_hash = ethh->h_source[MLX4_EN_MAC_HASH_IDX]; bucket = &priv->mac_hash[mac_hash]; rcu_read_lock(); - hlist_for_each_entry_rcu(entry, n, bucket, hlist) { + hlist_for_each_entry_rcu(entry, bucket, hlist) { if (ether_addr_equal_64bits(entry->mac, ethh->h_source)) { rcu_read_unlock(); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c index 325e11e1ce0f..f89cc7a3fe6c 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c @@ -576,7 +576,7 @@ void qlcnic_free_mac_list(struct qlcnic_adapter *adapter) void qlcnic_prune_lb_filters(struct qlcnic_adapter *adapter) { struct qlcnic_filter *tmp_fil; - struct hlist_node *tmp_hnode, *n; + struct hlist_node *n; struct hlist_head *head; int i; unsigned long time; @@ -584,7 +584,7 @@ void qlcnic_prune_lb_filters(struct qlcnic_adapter *adapter) for (i = 0; i < adapter->fhash.fbucket_size; i++) { head = &(adapter->fhash.fhead[i]); - hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) { + hlist_for_each_entry_safe(tmp_fil, n, head, fnode) { cmd = tmp_fil->vlan_id ? QLCNIC_MAC_VLAN_DEL : QLCNIC_MAC_DEL; time = tmp_fil->ftime; @@ -604,7 +604,7 @@ void qlcnic_prune_lb_filters(struct qlcnic_adapter *adapter) for (i = 0; i < adapter->rx_fhash.fbucket_size; i++) { head = &(adapter->rx_fhash.fhead[i]); - hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) + hlist_for_each_entry_safe(tmp_fil, n, head, fnode) { time = tmp_fil->ftime; if (jiffies > (QLCNIC_FILTER_AGE * HZ + time)) { @@ -621,14 +621,14 @@ void qlcnic_prune_lb_filters(struct qlcnic_adapter *adapter) void qlcnic_delete_lb_filters(struct qlcnic_adapter *adapter) { struct qlcnic_filter *tmp_fil; - struct hlist_node *tmp_hnode, *n; + struct hlist_node *n; struct hlist_head *head; int i; u8 cmd; for (i = 0; i < adapter->fhash.fbucket_size; i++) { head = &(adapter->fhash.fhead[i]); - hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) { + hlist_for_each_entry_safe(tmp_fil, n, head, fnode) { cmd = tmp_fil->vlan_id ? QLCNIC_MAC_VLAN_DEL : QLCNIC_MAC_DEL; qlcnic_sre_macaddr_change(adapter, diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 6387e0cc3ea9..0e630061bff3 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -162,7 +162,7 @@ void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter, struct sk_buff *skb, { struct ethhdr *phdr = (struct ethhdr *)(skb->data); struct qlcnic_filter *fil, *tmp_fil; - struct hlist_node *tmp_hnode, *n; + struct hlist_node *n; struct hlist_head *head; unsigned long time; u64 src_addr = 0; @@ -179,7 +179,7 @@ void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter, struct sk_buff *skb, (adapter->fhash.fbucket_size - 1); head = &(adapter->rx_fhash.fhead[hindex]); - hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) { + hlist_for_each_entry_safe(tmp_fil, n, head, fnode) { if (!memcmp(tmp_fil->faddr, &src_addr, ETH_ALEN) && tmp_fil->vlan_id == vlan_id) { time = tmp_fil->ftime; @@ -205,7 +205,7 @@ void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter, struct sk_buff *skb, (adapter->fhash.fbucket_size - 1); head = &(adapter->rx_fhash.fhead[hindex]); spin_lock(&adapter->rx_mac_learn_lock); - hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) { + hlist_for_each_entry_safe(tmp_fil, n, head, fnode) { if (!memcmp(tmp_fil->faddr, &src_addr, ETH_ALEN) && tmp_fil->vlan_id == vlan_id) { found = 1; @@ -272,7 +272,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter, struct sk_buff *skb) { struct qlcnic_filter *fil, *tmp_fil; - struct hlist_node *tmp_hnode, *n; + struct hlist_node *n; struct hlist_head *head; struct net_device *netdev = adapter->netdev; struct ethhdr *phdr = (struct ethhdr *)(skb->data); @@ -294,7 +294,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter, hindex = qlcnic_mac_hash(src_addr) & (adapter->fhash.fbucket_size - 1); head = &(adapter->fhash.fhead[hindex]); - hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) { + hlist_for_each_entry_safe(tmp_fil, n, head, fnode) { if (!memcmp(tmp_fil->faddr, &src_addr, ETH_ALEN) && tmp_fil->vlan_id == vlan_id) { if (jiffies > (QLCNIC_READD_AGE * HZ + tmp_fil->ftime)) diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 289b4eefb42f..1df0ff3839e8 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -614,10 +614,9 @@ struct vnet_port *__tx_port_find(struct vnet *vp, struct sk_buff *skb) { unsigned int hash = vnet_hashfn(skb->data); struct hlist_head *hp = &vp->port_hash[hash]; - struct hlist_node *n; struct vnet_port *port; - hlist_for_each_entry(port, n, hp, hash) { + hlist_for_each_entry(port, hp, hash) { if (ether_addr_equal(port->raddr, skb->data)) return port; } diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index defcd8a85744..417b2af1aa80 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -55,9 +55,8 @@ static struct macvlan_dev *macvlan_hash_lookup(const struct macvlan_port *port, const unsigned char *addr) { struct macvlan_dev *vlan; - struct hlist_node *n; - hlist_for_each_entry_rcu(vlan, n, &port->vlan_hash[addr[5]], hlist) { + hlist_for_each_entry_rcu(vlan, &port->vlan_hash[addr[5]], hlist) { if (ether_addr_equal_64bits(vlan->dev->dev_addr, addr)) return vlan; } @@ -149,7 +148,6 @@ static void macvlan_broadcast(struct sk_buff *skb, { const struct ethhdr *eth = eth_hdr(skb); const struct macvlan_dev *vlan; - struct hlist_node *n; struct sk_buff *nskb; unsigned int i; int err; @@ -159,7 +157,7 @@ static void macvlan_broadcast(struct sk_buff *skb, return; for (i = 0; i < MACVLAN_HASH_SIZE; i++) { - hlist_for_each_entry_rcu(vlan, n, &port->vlan_hash[i], hlist) { + hlist_for_each_entry_rcu(vlan, &port->vlan_hash[i], hlist) { if (vlan->dev == src || !(vlan->mode & mode)) continue; diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 97243011d319..a449439bd653 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -279,28 +279,17 @@ static int macvtap_receive(struct sk_buff *skb) static int macvtap_get_minor(struct macvlan_dev *vlan) { int retval = -ENOMEM; - int id; mutex_lock(&minor_lock); - if (idr_pre_get(&minor_idr, GFP_KERNEL) == 0) - goto exit; - - retval = idr_get_new_above(&minor_idr, vlan, 1, &id); - if (retval < 0) { - if (retval == -EAGAIN) - retval = -ENOMEM; - goto exit; - } - if (id < MACVTAP_NUM_DEVS) { - vlan->minor = id; - } else { + retval = idr_alloc(&minor_idr, vlan, 1, MACVTAP_NUM_DEVS, GFP_KERNEL); + if (retval >= 0) { + vlan->minor = retval; + } else if (retval == -ENOSPC) { printk(KERN_ERR "too many macvtap devices\n"); retval = -EINVAL; - idr_remove(&minor_idr, id); } -exit: mutex_unlock(&minor_lock); - return retval; + return retval < 0 ? retval : 0; } static void macvtap_free_minor(struct macvlan_dev *vlan) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 3db9131e9229..72ff14b811c6 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -2953,46 +2953,21 @@ static void __exit ppp_cleanup(void) * by holding all_ppp_mutex */ -static int __unit_alloc(struct idr *p, void *ptr, int n) -{ - int unit, err; - -again: - if (!idr_pre_get(p, GFP_KERNEL)) { - pr_err("PPP: No free memory for idr\n"); - return -ENOMEM; - } - - err = idr_get_new_above(p, ptr, n, &unit); - if (err < 0) { - if (err == -EAGAIN) - goto again; - return err; - } - - return unit; -} - /* associate pointer with specified number */ static int unit_set(struct idr *p, void *ptr, int n) { int unit; - unit = __unit_alloc(p, ptr, n); - if (unit < 0) - return unit; - else if (unit != n) { - idr_remove(p, unit); - return -EINVAL; - } - + unit = idr_alloc(p, ptr, n, n + 1, GFP_KERNEL); + if (unit == -ENOSPC) + unit = -EINVAL; return unit; } /* get new free unit number and associate pointer with it */ static int unit_get(struct idr *p, void *ptr) { - return __unit_alloc(p, ptr, 0); + return idr_alloc(p, ptr, 0, 0, GFP_KERNEL); } /* put unit number back to a pool */ diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b6f45c5d84d5..2c6a22e278ea 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -197,9 +197,8 @@ static inline u32 tun_hashfn(u32 rxhash) static struct tun_flow_entry *tun_flow_find(struct hlist_head *head, u32 rxhash) { struct tun_flow_entry *e; - struct hlist_node *n; - hlist_for_each_entry_rcu(e, n, head, hash_link) { + hlist_for_each_entry_rcu(e, head, hash_link) { if (e->rxhash == rxhash) return e; } @@ -241,9 +240,9 @@ static void tun_flow_flush(struct tun_struct *tun) spin_lock_bh(&tun->lock); for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { struct tun_flow_entry *e; - struct hlist_node *h, *n; + struct hlist_node *n; - hlist_for_each_entry_safe(e, h, n, &tun->flows[i], hash_link) + hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) tun_flow_delete(tun, e); } spin_unlock_bh(&tun->lock); @@ -256,9 +255,9 @@ static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index) spin_lock_bh(&tun->lock); for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { struct tun_flow_entry *e; - struct hlist_node *h, *n; + struct hlist_node *n; - hlist_for_each_entry_safe(e, h, n, &tun->flows[i], hash_link) { + hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) { if (e->queue_index == queue_index) tun_flow_delete(tun, e); } @@ -279,9 +278,9 @@ static void tun_flow_cleanup(unsigned long data) spin_lock_bh(&tun->lock); for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { struct tun_flow_entry *e; - struct hlist_node *h, *n; + struct hlist_node *n; - hlist_for_each_entry_safe(e, h, n, &tun->flows[i], hash_link) { + hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) { unsigned long this_timer; count++; this_timer = e->updated + delay; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index f736823f8437..f10e58ac9c1b 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -145,9 +145,8 @@ static inline struct hlist_head *vni_head(struct net *net, u32 id) static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id) { struct vxlan_dev *vxlan; - struct hlist_node *node; - hlist_for_each_entry_rcu(vxlan, node, vni_head(net, id), hlist) { + hlist_for_each_entry_rcu(vxlan, vni_head(net, id), hlist) { if (vxlan->vni == id) return vxlan; } @@ -292,9 +291,8 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, { struct hlist_head *head = vxlan_fdb_head(vxlan, mac); struct vxlan_fdb *f; - struct hlist_node *node; - hlist_for_each_entry_rcu(f, node, head, hlist) { + hlist_for_each_entry_rcu(f, head, hlist) { if (compare_ether_addr(mac, f->eth_addr) == 0) return f; } @@ -422,10 +420,9 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, for (h = 0; h < FDB_HASH_SIZE; ++h) { struct vxlan_fdb *f; - struct hlist_node *n; int err; - hlist_for_each_entry_rcu(f, n, &vxlan->fdb_head[h], hlist) { + hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) { if (idx < cb->args[0]) goto skip; @@ -483,11 +480,10 @@ static bool vxlan_group_used(struct vxlan_net *vn, const struct vxlan_dev *this) { const struct vxlan_dev *vxlan; - struct hlist_node *node; unsigned h; for (h = 0; h < VNI_HASH_SIZE; ++h) - hlist_for_each_entry(vxlan, node, &vn->vni_list[h], hlist) { + hlist_for_each_entry(vxlan, &vn->vni_list[h], hlist) { if (vxlan == this) continue; diff --git a/drivers/net/wireless/zd1201.c b/drivers/net/wireless/zd1201.c index 48273dd05b63..4941f201d6c8 100644 --- a/drivers/net/wireless/zd1201.c +++ b/drivers/net/wireless/zd1201.c @@ -309,7 +309,6 @@ static void zd1201_usbrx(struct urb *urb) if (data[urb->actual_length-1] == ZD1201_PACKET_RXDATA) { int datalen = urb->actual_length-1; unsigned short len, fc, seq; - struct hlist_node *node; len = ntohs(*(__be16 *)&data[datalen-2]); if (len>datalen) @@ -362,7 +361,7 @@ static void zd1201_usbrx(struct urb *urb) hlist_add_head(&frag->fnode, &zd->fraglist); goto resubmit; } - hlist_for_each_entry(frag, node, &zd->fraglist, fnode) + hlist_for_each_entry(frag, &zd->fraglist, fnode) if (frag->seq == (seq&IEEE80211_SCTL_SEQ)) break; if (!frag) @@ -1831,14 +1830,14 @@ err_zd: static void zd1201_disconnect(struct usb_interface *interface) { struct zd1201 *zd = usb_get_intfdata(interface); - struct hlist_node *node, *node2; + struct hlist_node *node2; struct zd1201_frag *frag; if (!zd) return; usb_set_intfdata(interface, NULL); - hlist_for_each_entry_safe(frag, node, node2, &zd->fraglist, fnode) { + hlist_for_each_entry_safe(frag, node2, &zd->fraglist, fnode) { hlist_del_init(&frag->fnode); kfree_skb(frag->skb); kfree(frag); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 924e4665bd57..b099e0025d2b 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -842,9 +842,8 @@ static struct pci_cap_saved_state *pci_find_saved_cap( struct pci_dev *pci_dev, char cap) { struct pci_cap_saved_state *tmp; - struct hlist_node *pos; - hlist_for_each_entry(tmp, pos, &pci_dev->saved_cap_space, next) { + hlist_for_each_entry(tmp, &pci_dev->saved_cap_space, next) { if (tmp->cap.cap_nr == cap) return tmp; } @@ -1041,7 +1040,6 @@ struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev) struct pci_saved_state *state; struct pci_cap_saved_state *tmp; struct pci_cap_saved_data *cap; - struct hlist_node *pos; size_t size; if (!dev->state_saved) @@ -1049,7 +1047,7 @@ struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev) size = sizeof(*state) + sizeof(struct pci_cap_saved_data); - hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next) + hlist_for_each_entry(tmp, &dev->saved_cap_space, next) size += sizeof(struct pci_cap_saved_data) + tmp->cap.size; state = kzalloc(size, GFP_KERNEL); @@ -1060,7 +1058,7 @@ struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev) sizeof(state->config_space)); cap = state->cap; - hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next) { + hlist_for_each_entry(tmp, &dev->saved_cap_space, next) { size_t len = sizeof(struct pci_cap_saved_data) + tmp->cap.size; memcpy(cap, &tmp->cap, len); cap = (struct pci_cap_saved_data *)((u8 *)cap + len); @@ -2038,9 +2036,9 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev) void pci_free_cap_save_buffers(struct pci_dev *dev) { struct pci_cap_saved_state *tmp; - struct hlist_node *pos, *n; + struct hlist_node *n; - hlist_for_each_entry_safe(tmp, pos, n, &dev->saved_cap_space, next) + hlist_for_each_entry_safe(tmp, n, &dev->saved_cap_space, next) kfree(tmp); } diff --git a/drivers/power/bq2415x_charger.c b/drivers/power/bq2415x_charger.c index ca91396fc48e..0727f9256138 100644 --- a/drivers/power/bq2415x_charger.c +++ b/drivers/power/bq2415x_charger.c @@ -1515,16 +1515,11 @@ static int bq2415x_probe(struct i2c_client *client, } /* Get new ID for the new device */ - ret = idr_pre_get(&bq2415x_id, GFP_KERNEL); - if (ret == 0) - return -ENOMEM; - mutex_lock(&bq2415x_id_mutex); - ret = idr_get_new(&bq2415x_id, client, &num); + num = idr_alloc(&bq2415x_id, client, 0, 0, GFP_KERNEL); mutex_unlock(&bq2415x_id_mutex); - - if (ret < 0) - return ret; + if (num < 0) + return num; name = kasprintf(GFP_KERNEL, "%s-%d", id->name, num); if (!name) { diff --git a/drivers/power/bq27x00_battery.c b/drivers/power/bq27x00_battery.c index 8ccf5d7d0add..26037ca7efb4 100644 --- a/drivers/power/bq27x00_battery.c +++ b/drivers/power/bq27x00_battery.c @@ -791,14 +791,11 @@ static int bq27x00_battery_probe(struct i2c_client *client, int retval = 0; /* Get new ID for the new battery device */ - retval = idr_pre_get(&battery_id, GFP_KERNEL); - if (retval == 0) - return -ENOMEM; mutex_lock(&battery_mutex); - retval = idr_get_new(&battery_id, client, &num); + num = idr_alloc(&battery_id, client, 0, 0, GFP_KERNEL); mutex_unlock(&battery_mutex); - if (retval < 0) - return retval; + if (num < 0) + return num; name = kasprintf(GFP_KERNEL, "%s-%d", id->name, num); if (!name) { diff --git a/drivers/power/ds2782_battery.c b/drivers/power/ds2782_battery.c index e7301b3ed623..c09e7726c96c 100644 --- a/drivers/power/ds2782_battery.c +++ b/drivers/power/ds2782_battery.c @@ -395,17 +395,12 @@ static int ds278x_battery_probe(struct i2c_client *client, } /* Get an ID for this battery */ - ret = idr_pre_get(&battery_id, GFP_KERNEL); - if (ret == 0) { - ret = -ENOMEM; - goto fail_id; - } - mutex_lock(&battery_lock); - ret = idr_get_new(&battery_id, client, &num); + ret = idr_alloc(&battery_id, client, 0, 0, GFP_KERNEL); mutex_unlock(&battery_lock); if (ret < 0) goto fail_id; + num = ret; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) { diff --git a/drivers/pps/clients/pps-gpio.c b/drivers/pps/clients/pps-gpio.c index 2bf0c1b608dd..d3db26e46489 100644 --- a/drivers/pps/clients/pps-gpio.c +++ b/drivers/pps/clients/pps-gpio.c @@ -128,7 +128,8 @@ static int pps_gpio_probe(struct platform_device *pdev) } /* allocate space for device info */ - data = kzalloc(sizeof(struct pps_gpio_device_data), GFP_KERNEL); + data = devm_kzalloc(&pdev->dev, sizeof(struct pps_gpio_device_data), + GFP_KERNEL); if (data == NULL) { err = -ENOMEM; goto return_error; @@ -150,7 +151,6 @@ static int pps_gpio_probe(struct platform_device *pdev) pps_default_params |= PPS_CAPTURECLEAR | PPS_OFFSETCLEAR; data->pps = pps_register_source(&data->info, pps_default_params); if (data->pps == NULL) { - kfree(data); pr_err("failed to register IRQ %d as PPS source\n", irq); err = -EINVAL; goto return_error; @@ -164,7 +164,6 @@ static int pps_gpio_probe(struct platform_device *pdev) get_irqf_trigger_flags(pdata), data->info.name, data); if (ret) { pps_unregister_source(data->pps); - kfree(data); pr_err("failed to acquire IRQ %d\n", irq); err = -EINVAL; goto return_error; @@ -190,7 +189,6 @@ static int pps_gpio_remove(struct platform_device *pdev) gpio_free(pdata->gpio_pin); pps_unregister_source(data->pps); pr_info("removed IRQ %d as PPS source\n", data->irq); - kfree(data); return 0; } diff --git a/drivers/pps/kapi.c b/drivers/pps/kapi.c index f197e8ea185c..cdad4d95b20e 100644 --- a/drivers/pps/kapi.c +++ b/drivers/pps/kapi.c @@ -102,7 +102,7 @@ struct pps_device *pps_register_source(struct pps_source_info *info, goto pps_register_source_exit; } - /* These initializations must be done before calling idr_get_new() + /* These initializations must be done before calling idr_alloc() * in order to avoid reces into pps_event(). */ pps->params.api_version = PPS_API_VERS; diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c index 6437703eb10f..7173e3ad475d 100644 --- a/drivers/pps/pps.c +++ b/drivers/pps/pps.c @@ -295,29 +295,21 @@ int pps_register_cdev(struct pps_device *pps) dev_t devt; mutex_lock(&pps_idr_lock); - /* Get new ID for the new PPS source */ - if (idr_pre_get(&pps_idr, GFP_KERNEL) == 0) { - mutex_unlock(&pps_idr_lock); - return -ENOMEM; - } - - /* Now really allocate the PPS source. - * After idr_get_new() calling the new source will be freely available - * into the kernel. + /* + * Get new ID for the new PPS source. After idr_alloc() calling + * the new source will be freely available into the kernel. */ - err = idr_get_new(&pps_idr, pps, &pps->id); - mutex_unlock(&pps_idr_lock); - - if (err < 0) - return err; - - pps->id &= MAX_IDR_MASK; - if (pps->id >= PPS_MAX_SOURCES) { - pr_err("%s: too many PPS sources in the system\n", - pps->info.name); - err = -EBUSY; - goto free_idr; + err = idr_alloc(&pps_idr, pps, 0, PPS_MAX_SOURCES, GFP_KERNEL); + if (err < 0) { + if (err == -ENOSPC) { + pr_err("%s: too many PPS sources in the system\n", + pps->info.name); + err = -EBUSY; + } + goto out_unlock; } + pps->id = err; + mutex_unlock(&pps_idr_lock); devt = MKDEV(MAJOR(pps_devt), pps->id); @@ -351,8 +343,8 @@ del_cdev: free_idr: mutex_lock(&pps_idr_lock); idr_remove(&pps_idr, pps->id); +out_unlock: mutex_unlock(&pps_idr_lock); - return err; } diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index dd3bfaf1ad40..29387df4bfc9 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -199,11 +199,6 @@ int rproc_alloc_vring(struct rproc_vdev *rvdev, int i) /* actual size of vring (in bytes) */ size = PAGE_ALIGN(vring_size(rvring->len, rvring->align)); - if (!idr_pre_get(&rproc->notifyids, GFP_KERNEL)) { - dev_err(dev, "idr_pre_get failed\n"); - return -ENOMEM; - } - /* * Allocate non-cacheable memory for the vring. In the future * this call will also configure the IOMMU for us @@ -221,12 +216,13 @@ int rproc_alloc_vring(struct rproc_vdev *rvdev, int i) * TODO: let the rproc know the notifyid of this vring * TODO: support predefined notifyids (via resource table) */ - ret = idr_get_new(&rproc->notifyids, rvring, ¬ifyid); + ret = idr_alloc(&rproc->notifyids, rvring, 0, 0, GFP_KERNEL); if (ret) { - dev_err(dev, "idr_get_new failed: %d\n", ret); + dev_err(dev, "idr_alloc failed: %d\n", ret); dma_free_coherent(dev->parent, size, va, dma); return ret; } + notifyid = ret; /* Store largest notifyid */ rproc->max_notifyid = max(rproc->max_notifyid, notifyid); @@ -1180,7 +1176,6 @@ static void rproc_type_release(struct device *dev) rproc_delete_debug_dir(rproc); - idr_remove_all(&rproc->notifyids); idr_destroy(&rproc->notifyids); if (rproc->index >= 0) diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index d85446021ddb..a59684b5fc68 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -213,13 +213,10 @@ static struct rpmsg_endpoint *__rpmsg_create_ept(struct virtproc_info *vrp, struct rpmsg_channel *rpdev, rpmsg_rx_cb_t cb, void *priv, u32 addr) { - int err, tmpaddr, request; + int id_min, id_max, id; struct rpmsg_endpoint *ept; struct device *dev = rpdev ? &rpdev->dev : &vrp->vdev->dev; - if (!idr_pre_get(&vrp->endpoints, GFP_KERNEL)) - return NULL; - ept = kzalloc(sizeof(*ept), GFP_KERNEL); if (!ept) { dev_err(dev, "failed to kzalloc a new ept\n"); @@ -234,31 +231,28 @@ static struct rpmsg_endpoint *__rpmsg_create_ept(struct virtproc_info *vrp, ept->priv = priv; /* do we need to allocate a local address ? */ - request = addr == RPMSG_ADDR_ANY ? RPMSG_RESERVED_ADDRESSES : addr; + if (addr == RPMSG_ADDR_ANY) { + id_min = RPMSG_RESERVED_ADDRESSES; + id_max = 0; + } else { + id_min = addr; + id_max = addr + 1; + } mutex_lock(&vrp->endpoints_lock); /* bind the endpoint to an rpmsg address (and allocate one if needed) */ - err = idr_get_new_above(&vrp->endpoints, ept, request, &tmpaddr); - if (err) { - dev_err(dev, "idr_get_new_above failed: %d\n", err); + id = idr_alloc(&vrp->endpoints, ept, id_min, id_max, GFP_KERNEL); + if (id < 0) { + dev_err(dev, "idr_alloc failed: %d\n", id); goto free_ept; } - - /* make sure the user's address request is fulfilled, if relevant */ - if (addr != RPMSG_ADDR_ANY && tmpaddr != addr) { - dev_err(dev, "address 0x%x already in use\n", addr); - goto rem_idr; - } - - ept->addr = tmpaddr; + ept->addr = id; mutex_unlock(&vrp->endpoints_lock); return ept; -rem_idr: - idr_remove(&vrp->endpoints, request); free_ept: mutex_unlock(&vrp->endpoints_lock); kref_put(&ept->refcount, __ept_release); @@ -1036,7 +1030,6 @@ static void rpmsg_remove(struct virtio_device *vdev) if (vrp->ns_ept) __rpmsg_destroy_ept(vrp, vrp->ns_ept); - idr_remove_all(&vrp->endpoints); idr_destroy(&vrp->endpoints); vdev->config->del_vqs(vrp->vdev); diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 742f5d7eb0f5..a6f7190c09a4 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -12,13 +12,13 @@ *----------------------------------------------------------------------------*/ #ifndef AAC_DRIVER_BUILD -# define AAC_DRIVER_BUILD 29801 +# define AAC_DRIVER_BUILD 30000 # define AAC_DRIVER_BRANCH "-ms" #endif #define MAXIMUM_NUM_CONTAINERS 32 #define AAC_NUM_MGT_FIB 8 -#define AAC_NUM_IO_FIB (512 - AAC_NUM_MGT_FIB) +#define AAC_NUM_IO_FIB (1024 - AAC_NUM_MGT_FIB) #define AAC_NUM_FIB (AAC_NUM_IO_FIB + AAC_NUM_MGT_FIB) #define AAC_MAX_LUN (8) @@ -36,6 +36,10 @@ #define CONTAINER_TO_ID(cont) (cont) #define CONTAINER_TO_LUN(cont) (0) +#define PMC_DEVICE_S7 0x28c +#define PMC_DEVICE_S8 0x28d +#define PMC_DEVICE_S9 0x28f + #define aac_phys_to_logical(x) ((x)+1) #define aac_logical_to_phys(x) ((x)?(x)-1:0) diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c index 8e5d3be16127..3f759957f4b4 100644 --- a/drivers/scsi/aacraid/comminit.c +++ b/drivers/scsi/aacraid/comminit.c @@ -404,7 +404,13 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev) dev->max_fib_size = status[1] & 0xFFE0; host->sg_tablesize = status[2] >> 16; dev->sg_tablesize = status[2] & 0xFFFF; - host->can_queue = (status[3] & 0xFFFF) - AAC_NUM_MGT_FIB; + if (dev->pdev->device == PMC_DEVICE_S7 || + dev->pdev->device == PMC_DEVICE_S8 || + dev->pdev->device == PMC_DEVICE_S9) + host->can_queue = ((status[3] >> 16) ? (status[3] >> 16) : + (status[3] & 0xFFFF)) - AAC_NUM_MGT_FIB; + else + host->can_queue = (status[3] & 0xFFFF) - AAC_NUM_MGT_FIB; dev->max_num_aif = status[4] & 0xFFFF; /* * NOTE: @@ -452,6 +458,9 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev) } } + if (host->can_queue > AAC_NUM_IO_FIB) + host->can_queue = AAC_NUM_IO_FIB; + /* * Ok now init the communication subsystem */ diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c index e6bf12675db8..a5f7690e819e 100644 --- a/drivers/scsi/bfa/bfad.c +++ b/drivers/scsi/bfa/bfad.c @@ -1034,7 +1034,7 @@ bfad_start_ops(struct bfad_s *bfad) { sizeof(driver_info.host_os_patch) - 1); strncpy(driver_info.os_device_name, bfad->pci_name, - sizeof(driver_info.os_device_name - 1)); + sizeof(driver_info.os_device_name) - 1); /* FCS driver info init */ spin_lock_irqsave(&bfad->bfad_lock, flags); diff --git a/drivers/scsi/bfa/bfad_im.c b/drivers/scsi/bfa/bfad_im.c index 8f92732655c7..5864f987f206 100644 --- a/drivers/scsi/bfa/bfad_im.c +++ b/drivers/scsi/bfa/bfad_im.c @@ -523,20 +523,13 @@ bfad_im_scsi_host_alloc(struct bfad_s *bfad, struct bfad_im_port_s *im_port, int error = 1; mutex_lock(&bfad_mutex); - if (!idr_pre_get(&bfad_im_port_index, GFP_KERNEL)) { + error = idr_alloc(&bfad_im_port_index, im_port, 0, 0, GFP_KERNEL); + if (error < 0) { mutex_unlock(&bfad_mutex); - printk(KERN_WARNING "idr_pre_get failure\n"); + printk(KERN_WARNING "idr_alloc failure\n"); goto out; } - - error = idr_get_new(&bfad_im_port_index, im_port, - &im_port->idr_id); - if (error) { - mutex_unlock(&bfad_mutex); - printk(KERN_WARNING "idr_get_new failure\n"); - goto out; - } - + im_port->idr_id = error; mutex_unlock(&bfad_mutex); im_port->shost = bfad_scsi_host_alloc(im_port, bfad); diff --git a/drivers/scsi/bnx2fc/bnx2fc.h b/drivers/scsi/bnx2fc/bnx2fc.h index 3486845ba301..50fcd018d14b 100644 --- a/drivers/scsi/bnx2fc/bnx2fc.h +++ b/drivers/scsi/bnx2fc/bnx2fc.h @@ -64,7 +64,7 @@ #include "bnx2fc_constants.h" #define BNX2FC_NAME "bnx2fc" -#define BNX2FC_VERSION "1.0.12" +#define BNX2FC_VERSION "1.0.13" #define PFX "bnx2fc: " @@ -156,6 +156,18 @@ #define BNX2FC_RELOGIN_WAIT_TIME 200 #define BNX2FC_RELOGIN_WAIT_CNT 10 +#define BNX2FC_STATS(hba, stat, cnt) \ + do { \ + u32 val; \ + \ + val = fw_stats->stat.cnt; \ + if (hba->prev_stats.stat.cnt <= val) \ + val -= hba->prev_stats.stat.cnt; \ + else \ + val += (0xfffffff - hba->prev_stats.stat.cnt); \ + hba->bfw_stats.cnt += val; \ + } while (0) + /* bnx2fc driver uses only one instance of fcoe_percpu_s */ extern struct fcoe_percpu_s bnx2fc_global; @@ -167,6 +179,14 @@ struct bnx2fc_percpu_s { spinlock_t fp_work_lock; }; +struct bnx2fc_fw_stats { + u64 fc_crc_cnt; + u64 fcoe_tx_pkt_cnt; + u64 fcoe_rx_pkt_cnt; + u64 fcoe_tx_byte_cnt; + u64 fcoe_rx_byte_cnt; +}; + struct bnx2fc_hba { struct list_head list; struct cnic_dev *cnic; @@ -207,6 +227,8 @@ struct bnx2fc_hba { struct bnx2fc_rport **tgt_ofld_list; /* statistics */ + struct bnx2fc_fw_stats bfw_stats; + struct fcoe_statistics_params prev_stats; struct fcoe_statistics_params *stats_buffer; dma_addr_t stats_buf_dma; struct completion stat_req_done; @@ -280,6 +302,7 @@ struct bnx2fc_rport { #define BNX2FC_FLAG_UPLD_REQ_COMPL 0x7 #define BNX2FC_FLAG_EXPL_LOGO 0x8 #define BNX2FC_FLAG_DISABLE_FAILED 0x9 +#define BNX2FC_FLAG_ENABLED 0xa u8 src_addr[ETH_ALEN]; u32 max_sqes; @@ -468,6 +491,8 @@ int bnx2fc_send_fw_fcoe_init_msg(struct bnx2fc_hba *hba); int bnx2fc_send_fw_fcoe_destroy_msg(struct bnx2fc_hba *hba); int bnx2fc_send_session_ofld_req(struct fcoe_port *port, struct bnx2fc_rport *tgt); +int bnx2fc_send_session_enable_req(struct fcoe_port *port, + struct bnx2fc_rport *tgt); int bnx2fc_send_session_disable_req(struct fcoe_port *port, struct bnx2fc_rport *tgt); int bnx2fc_send_session_destroy_req(struct bnx2fc_hba *hba, diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 70ecd953a579..6401db494ef5 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -22,7 +22,7 @@ DEFINE_PER_CPU(struct bnx2fc_percpu_s, bnx2fc_percpu); #define DRV_MODULE_NAME "bnx2fc" #define DRV_MODULE_VERSION BNX2FC_VERSION -#define DRV_MODULE_RELDATE "Jun 04, 2012" +#define DRV_MODULE_RELDATE "Dec 21, 2012" static char version[] = @@ -687,11 +687,16 @@ static struct fc_host_statistics *bnx2fc_get_host_stats(struct Scsi_Host *shost) BNX2FC_HBA_DBG(lport, "FW stat req timed out\n"); return bnx2fc_stats; } - bnx2fc_stats->invalid_crc_count += fw_stats->rx_stat2.fc_crc_cnt; - bnx2fc_stats->tx_frames += fw_stats->tx_stat.fcoe_tx_pkt_cnt; - bnx2fc_stats->tx_words += (fw_stats->tx_stat.fcoe_tx_byte_cnt) / 4; - bnx2fc_stats->rx_frames += fw_stats->rx_stat0.fcoe_rx_pkt_cnt; - bnx2fc_stats->rx_words += (fw_stats->rx_stat0.fcoe_rx_byte_cnt) / 4; + BNX2FC_STATS(hba, rx_stat2, fc_crc_cnt); + bnx2fc_stats->invalid_crc_count += hba->bfw_stats.fc_crc_cnt; + BNX2FC_STATS(hba, tx_stat, fcoe_tx_pkt_cnt); + bnx2fc_stats->tx_frames += hba->bfw_stats.fcoe_tx_pkt_cnt; + BNX2FC_STATS(hba, tx_stat, fcoe_tx_byte_cnt); + bnx2fc_stats->tx_words += ((hba->bfw_stats.fcoe_tx_byte_cnt) / 4); + BNX2FC_STATS(hba, rx_stat0, fcoe_rx_pkt_cnt); + bnx2fc_stats->rx_frames += hba->bfw_stats.fcoe_rx_pkt_cnt; + BNX2FC_STATS(hba, rx_stat0, fcoe_rx_byte_cnt); + bnx2fc_stats->rx_words += ((hba->bfw_stats.fcoe_rx_byte_cnt) / 4); bnx2fc_stats->dumped_frames = 0; bnx2fc_stats->lip_count = 0; @@ -700,6 +705,8 @@ static struct fc_host_statistics *bnx2fc_get_host_stats(struct Scsi_Host *shost) bnx2fc_stats->loss_of_signal_count = 0; bnx2fc_stats->prim_seq_protocol_err_count = 0; + memcpy(&hba->prev_stats, hba->stats_buffer, + sizeof(struct fcoe_statistics_params)); return bnx2fc_stats; } @@ -2660,7 +2667,7 @@ static struct scsi_host_template bnx2fc_shost_template = { .can_queue = BNX2FC_CAN_QUEUE, .use_clustering = ENABLE_CLUSTERING, .sg_tablesize = BNX2FC_MAX_BDS_PER_CMD, - .max_sectors = 512, + .max_sectors = 1024, }; static struct libfc_function_template bnx2fc_libfc_fcn_templ = { diff --git a/drivers/scsi/bnx2fc/bnx2fc_hwi.c b/drivers/scsi/bnx2fc/bnx2fc_hwi.c index ef60afa94d0e..85ea98a80f40 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_hwi.c +++ b/drivers/scsi/bnx2fc/bnx2fc_hwi.c @@ -347,7 +347,7 @@ int bnx2fc_send_session_ofld_req(struct fcoe_port *port, * @port: port structure pointer * @tgt: bnx2fc_rport structure pointer */ -static int bnx2fc_send_session_enable_req(struct fcoe_port *port, +int bnx2fc_send_session_enable_req(struct fcoe_port *port, struct bnx2fc_rport *tgt) { struct kwqe *kwqe_arr[2]; @@ -759,8 +759,6 @@ static void bnx2fc_process_unsol_compl(struct bnx2fc_rport *tgt, u16 wqe) case FCOE_ERROR_CODE_DATA_SOFN_SEQ_ACTIVE_RESET: BNX2FC_TGT_DBG(tgt, "REC TOV popped for xid - 0x%x\n", xid); - memset(&io_req->err_entry, 0, - sizeof(struct fcoe_err_report_entry)); memcpy(&io_req->err_entry, err_entry, sizeof(struct fcoe_err_report_entry)); if (!test_bit(BNX2FC_FLAG_SRR_SENT, @@ -847,8 +845,6 @@ ret_err_rqe: goto ret_warn_rqe; } - memset(&io_req->err_entry, 0, - sizeof(struct fcoe_err_report_entry)); memcpy(&io_req->err_entry, err_entry, sizeof(struct fcoe_err_report_entry)); @@ -1124,7 +1120,6 @@ static void bnx2fc_process_ofld_cmpl(struct bnx2fc_hba *hba, struct bnx2fc_interface *interface; u32 conn_id; u32 context_id; - int rc; conn_id = ofld_kcqe->fcoe_conn_id; context_id = ofld_kcqe->fcoe_conn_context_id; @@ -1153,17 +1148,10 @@ static void bnx2fc_process_ofld_cmpl(struct bnx2fc_hba *hba, "resources\n"); set_bit(BNX2FC_FLAG_CTX_ALLOC_FAILURE, &tgt->flags); } - goto ofld_cmpl_err; } else { - - /* now enable the session */ - rc = bnx2fc_send_session_enable_req(port, tgt); - if (rc) { - printk(KERN_ERR PFX "enable session failed\n"); - goto ofld_cmpl_err; - } + /* FW offload request successfully completed */ + set_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); } - return; ofld_cmpl_err: set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); wake_up_interruptible(&tgt->ofld_wait); @@ -1210,15 +1198,9 @@ static void bnx2fc_process_enable_conn_cmpl(struct bnx2fc_hba *hba, printk(KERN_ERR PFX "bnx2fc-enbl_cmpl: HBA mis-match\n"); goto enbl_cmpl_err; } - if (ofld_kcqe->completion_status) - goto enbl_cmpl_err; - else { + if (!ofld_kcqe->completion_status) /* enable successful - rport ready for issuing IOs */ - set_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); - set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); - wake_up_interruptible(&tgt->ofld_wait); - } - return; + set_bit(BNX2FC_FLAG_ENABLED, &tgt->flags); enbl_cmpl_err: set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); @@ -1251,6 +1233,7 @@ static void bnx2fc_process_conn_disable_cmpl(struct bnx2fc_hba *hba, /* disable successful */ BNX2FC_TGT_DBG(tgt, "disable successful\n"); clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); + clear_bit(BNX2FC_FLAG_ENABLED, &tgt->flags); set_bit(BNX2FC_FLAG_DISABLED, &tgt->flags); set_bit(BNX2FC_FLAG_UPLD_REQ_COMPL, &tgt->flags); wake_up_interruptible(&tgt->upld_wait); diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c index 8d4626c07a12..60798e829de6 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_io.c +++ b/drivers/scsi/bnx2fc/bnx2fc_io.c @@ -654,7 +654,7 @@ int bnx2fc_init_mp_req(struct bnx2fc_cmd *io_req) mp_req->mp_resp_bd = dma_alloc_coherent(&hba->pcidev->dev, sz, &mp_req->mp_resp_bd_dma, GFP_ATOMIC); - if (!mp_req->mp_req_bd) { + if (!mp_req->mp_resp_bd) { printk(KERN_ERR PFX "unable to alloc MP resp bd\n"); bnx2fc_free_mp_resc(io_req); return FAILED; @@ -685,8 +685,8 @@ int bnx2fc_init_mp_req(struct bnx2fc_cmd *io_req) static int bnx2fc_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags) { struct fc_lport *lport; - struct fc_rport *rport = starget_to_rport(scsi_target(sc_cmd->device)); - struct fc_rport_libfc_priv *rp = rport->dd_data; + struct fc_rport *rport; + struct fc_rport_libfc_priv *rp; struct fcoe_port *port; struct bnx2fc_interface *interface; struct bnx2fc_rport *tgt; @@ -704,6 +704,7 @@ static int bnx2fc_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags) unsigned long start = jiffies; lport = shost_priv(host); + rport = starget_to_rport(scsi_target(sc_cmd->device)); port = lport_priv(lport); interface = port->priv; @@ -712,6 +713,7 @@ static int bnx2fc_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags) rc = FAILED; goto tmf_err; } + rp = rport->dd_data; rc = fc_block_scsi_eh(sc_cmd); if (rc) diff --git a/drivers/scsi/bnx2fc/bnx2fc_tgt.c b/drivers/scsi/bnx2fc/bnx2fc_tgt.c index b9d0d9cb17f9..c57a3bb8a9fb 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_tgt.c +++ b/drivers/scsi/bnx2fc/bnx2fc_tgt.c @@ -33,6 +33,7 @@ static void bnx2fc_upld_timer(unsigned long data) BNX2FC_TGT_DBG(tgt, "upld_timer - Upload compl not received!!\n"); /* fake upload completion */ clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); + clear_bit(BNX2FC_FLAG_ENABLED, &tgt->flags); set_bit(BNX2FC_FLAG_UPLD_REQ_COMPL, &tgt->flags); wake_up_interruptible(&tgt->upld_wait); } @@ -55,10 +56,25 @@ static void bnx2fc_ofld_timer(unsigned long data) * resources are freed up in bnx2fc_offload_session */ clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); + clear_bit(BNX2FC_FLAG_ENABLED, &tgt->flags); set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); wake_up_interruptible(&tgt->ofld_wait); } +static void bnx2fc_ofld_wait(struct bnx2fc_rport *tgt) +{ + setup_timer(&tgt->ofld_timer, bnx2fc_ofld_timer, (unsigned long)tgt); + mod_timer(&tgt->ofld_timer, jiffies + BNX2FC_FW_TIMEOUT); + + wait_event_interruptible(tgt->ofld_wait, + (test_bit( + BNX2FC_FLAG_OFLD_REQ_CMPL, + &tgt->flags))); + if (signal_pending(current)) + flush_signals(current); + del_timer_sync(&tgt->ofld_timer); +} + static void bnx2fc_offload_session(struct fcoe_port *port, struct bnx2fc_rport *tgt, struct fc_rport_priv *rdata) @@ -103,17 +119,7 @@ retry_ofld: * wait for the session is offloaded and enabled. 3 Secs * should be ample time for this process to complete. */ - setup_timer(&tgt->ofld_timer, bnx2fc_ofld_timer, (unsigned long)tgt); - mod_timer(&tgt->ofld_timer, jiffies + BNX2FC_FW_TIMEOUT); - - wait_event_interruptible(tgt->ofld_wait, - (test_bit( - BNX2FC_FLAG_OFLD_REQ_CMPL, - &tgt->flags))); - if (signal_pending(current)) - flush_signals(current); - - del_timer_sync(&tgt->ofld_timer); + bnx2fc_ofld_wait(tgt); if (!(test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags))) { if (test_and_clear_bit(BNX2FC_FLAG_CTX_ALLOC_FAILURE, @@ -131,14 +137,23 @@ retry_ofld: } if (bnx2fc_map_doorbell(tgt)) { printk(KERN_ERR PFX "map doorbell failed - no mem\n"); - /* upload will take care of cleaning up sess resc */ - lport->tt.rport_logoff(rdata); + goto ofld_err; } + clear_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); + rval = bnx2fc_send_session_enable_req(port, tgt); + if (rval) { + pr_err(PFX "enable session failed\n"); + goto ofld_err; + } + bnx2fc_ofld_wait(tgt); + if (!(test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags))) + goto ofld_err; return; ofld_err: /* couldn't offload the session. log off from this rport */ BNX2FC_TGT_DBG(tgt, "bnx2fc_offload_session - offload error\n"); + clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); /* Free session resources */ bnx2fc_free_session_resc(hba, tgt); tgt_init_err: @@ -259,6 +274,19 @@ void bnx2fc_flush_active_ios(struct bnx2fc_rport *tgt) spin_unlock_bh(&tgt->tgt_lock); } +static void bnx2fc_upld_wait(struct bnx2fc_rport *tgt) +{ + setup_timer(&tgt->upld_timer, bnx2fc_upld_timer, (unsigned long)tgt); + mod_timer(&tgt->upld_timer, jiffies + BNX2FC_FW_TIMEOUT); + wait_event_interruptible(tgt->upld_wait, + (test_bit( + BNX2FC_FLAG_UPLD_REQ_COMPL, + &tgt->flags))); + if (signal_pending(current)) + flush_signals(current); + del_timer_sync(&tgt->upld_timer); +} + static void bnx2fc_upload_session(struct fcoe_port *port, struct bnx2fc_rport *tgt) { @@ -279,19 +307,8 @@ static void bnx2fc_upload_session(struct fcoe_port *port, * wait for upload to complete. 3 Secs * should be sufficient time for this process to complete. */ - setup_timer(&tgt->upld_timer, bnx2fc_upld_timer, (unsigned long)tgt); - mod_timer(&tgt->upld_timer, jiffies + BNX2FC_FW_TIMEOUT); - BNX2FC_TGT_DBG(tgt, "waiting for disable compl\n"); - wait_event_interruptible(tgt->upld_wait, - (test_bit( - BNX2FC_FLAG_UPLD_REQ_COMPL, - &tgt->flags))); - - if (signal_pending(current)) - flush_signals(current); - - del_timer_sync(&tgt->upld_timer); + bnx2fc_upld_wait(tgt); /* * traverse thru the active_q and tmf_q and cleanup @@ -308,24 +325,13 @@ static void bnx2fc_upload_session(struct fcoe_port *port, bnx2fc_send_session_destroy_req(hba, tgt); /* wait for destroy to complete */ - setup_timer(&tgt->upld_timer, - bnx2fc_upld_timer, (unsigned long)tgt); - mod_timer(&tgt->upld_timer, jiffies + BNX2FC_FW_TIMEOUT); - - wait_event_interruptible(tgt->upld_wait, - (test_bit( - BNX2FC_FLAG_UPLD_REQ_COMPL, - &tgt->flags))); + bnx2fc_upld_wait(tgt); if (!(test_bit(BNX2FC_FLAG_DESTROYED, &tgt->flags))) printk(KERN_ERR PFX "ERROR!! destroy timed out\n"); BNX2FC_TGT_DBG(tgt, "destroy wait complete flags = 0x%lx\n", tgt->flags); - if (signal_pending(current)) - flush_signals(current); - - del_timer_sync(&tgt->upld_timer); } else if (test_bit(BNX2FC_FLAG_DISABLE_FAILED, &tgt->flags)) { printk(KERN_ERR PFX "ERROR!! DISABLE req failed, destroy" @@ -381,7 +387,9 @@ static int bnx2fc_init_tgt(struct bnx2fc_rport *tgt, tgt->rq_cons_idx = 0; atomic_set(&tgt->num_active_ios, 0); - if (rdata->flags & FC_RP_FLAGS_RETRY) { + if (rdata->flags & FC_RP_FLAGS_RETRY && + rdata->ids.roles & FC_RPORT_ROLE_FCP_TARGET && + !(rdata->ids.roles & FC_RPORT_ROLE_FCP_INITIATOR)) { tgt->dev_type = TYPE_TAPE; tgt->io_timeout = 0; /* use default ULP timeout */ } else { @@ -479,7 +487,7 @@ void bnx2fc_rport_event_handler(struct fc_lport *lport, tgt = (struct bnx2fc_rport *)&rp[1]; /* This can happen when ADISC finds the same target */ - if (test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags)) { + if (test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags)) { BNX2FC_TGT_DBG(tgt, "already offloaded\n"); mutex_unlock(&hba->hba_mutex); return; @@ -494,11 +502,8 @@ void bnx2fc_rport_event_handler(struct fc_lport *lport, BNX2FC_TGT_DBG(tgt, "OFFLOAD num_ofld_sess = %d\n", hba->num_ofld_sess); - if (test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags)) { - /* - * Session is offloaded and enabled. Map - * doorbell register for this target - */ + if (test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags)) { + /* Session is offloaded and enabled. */ BNX2FC_TGT_DBG(tgt, "sess offloaded\n"); /* This counter is protected with hba mutex */ hba->num_ofld_sess++; @@ -535,7 +540,7 @@ void bnx2fc_rport_event_handler(struct fc_lport *lport, */ tgt = (struct bnx2fc_rport *)&rp[1]; - if (!(test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags))) { + if (!(test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags))) { mutex_unlock(&hba->hba_mutex); break; } diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c index 91eec60252ee..a28b03e5a5f6 100644 --- a/drivers/scsi/bnx2i/bnx2i_hwi.c +++ b/drivers/scsi/bnx2i/bnx2i_hwi.c @@ -1317,7 +1317,7 @@ int bnx2i_send_fw_iscsi_init_msg(struct bnx2i_hba *hba) (1ULL << ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_LUN)); if (error_mask1) { iscsi_init2.error_bit_map[0] = error_mask1; - mask64 &= (u32)(~mask64); + mask64 ^= (u32)(mask64); mask64 |= error_mask1; } else iscsi_init2.error_bit_map[0] = (u32) mask64; diff --git a/drivers/scsi/ch.c b/drivers/scsi/ch.c index a15474eef5f7..2a323742ce04 100644 --- a/drivers/scsi/ch.c +++ b/drivers/scsi/ch.c @@ -895,7 +895,7 @@ static int ch_probe(struct device *dev) { struct scsi_device *sd = to_scsi_device(dev); struct device *class_dev; - int minor, ret = -ENOMEM; + int ret; scsi_changer *ch; if (sd->type != TYPE_MEDIUM_CHANGER) @@ -905,22 +905,19 @@ static int ch_probe(struct device *dev) if (NULL == ch) return -ENOMEM; - if (!idr_pre_get(&ch_index_idr, GFP_KERNEL)) - goto free_ch; - + idr_preload(GFP_KERNEL); spin_lock(&ch_index_lock); - ret = idr_get_new(&ch_index_idr, ch, &minor); + ret = idr_alloc(&ch_index_idr, ch, 0, CH_MAX_DEVS + 1, GFP_NOWAIT); spin_unlock(&ch_index_lock); + idr_preload_end(); - if (ret) + if (ret < 0) { + if (ret == -ENOSPC) + ret = -ENODEV; goto free_ch; - - if (minor > CH_MAX_DEVS) { - ret = -ENODEV; - goto remove_idr; } - ch->minor = minor; + ch->minor = ret; sprintf(ch->name,"ch%d",ch->minor); class_dev = device_create(ch_sysfs_class, dev, @@ -944,7 +941,7 @@ static int ch_probe(struct device *dev) return 0; remove_idr: - idr_remove(&ch_index_idr, minor); + idr_remove(&ch_index_idr, ch->minor); free_ch: kfree(ch); return ret; diff --git a/drivers/scsi/csiostor/csio_hw.c b/drivers/scsi/csiostor/csio_hw.c index 8ecdb94a59f4..bdd78fb4fc70 100644 --- a/drivers/scsi/csiostor/csio_hw.c +++ b/drivers/scsi/csiostor/csio_hw.c @@ -2131,13 +2131,16 @@ csio_hw_flash_config(struct csio_hw *hw, u32 *fw_cfg_param, char *path) value_to_add = 4 - (cf->size % 4); cfg_data = kzalloc(cf->size+value_to_add, GFP_KERNEL); - if (cfg_data == NULL) - return -ENOMEM; + if (cfg_data == NULL) { + ret = -ENOMEM; + goto leave; + } memcpy((void *)cfg_data, (const void *)cf->data, cf->size); - - if (csio_hw_check_fwconfig(hw, fw_cfg_param) != 0) - return -EINVAL; + if (csio_hw_check_fwconfig(hw, fw_cfg_param) != 0) { + ret = -EINVAL; + goto leave; + } mtype = FW_PARAMS_PARAM_Y_GET(*fw_cfg_param); maddr = FW_PARAMS_PARAM_Z_GET(*fw_cfg_param) << 16; @@ -2149,9 +2152,9 @@ csio_hw_flash_config(struct csio_hw *hw, u32 *fw_cfg_param, char *path) strncpy(path, "/lib/firmware/" CSIO_CF_FNAME, 64); } +leave: kfree(cfg_data); release_firmware(cf); - return ret; } diff --git a/drivers/scsi/csiostor/csio_init.c b/drivers/scsi/csiostor/csio_init.c index c323b2030afa..0604b5ff3638 100644 --- a/drivers/scsi/csiostor/csio_init.c +++ b/drivers/scsi/csiostor/csio_init.c @@ -60,13 +60,6 @@ static struct scsi_transport_template *csio_fcoe_transport_vport; /* * debugfs support */ -static int -csio_mem_open(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - return 0; -} - static ssize_t csio_mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -110,7 +103,7 @@ csio_mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) static const struct file_operations csio_mem_debugfs_fops = { .owner = THIS_MODULE, - .open = csio_mem_open, + .open = simple_open, .read = csio_mem_read, .llseek = default_llseek, }; diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c index f924b3c3720e..3fecf35ba292 100644 --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c @@ -1564,6 +1564,7 @@ static int t4_uld_state_change(void *handle, enum cxgb4_state state) break; case CXGB4_STATE_DETACH: pr_info("cdev 0x%p, DETACH.\n", cdev); + cxgbi_device_unregister(cdev); break; default: pr_info("cdev 0x%p, unknown state %d.\n", cdev, state); diff --git a/drivers/scsi/fnic/fnic_fcs.c b/drivers/scsi/fnic/fnic_fcs.c index 3c53c3478ee7..483eb9dbe663 100644 --- a/drivers/scsi/fnic/fnic_fcs.c +++ b/drivers/scsi/fnic/fnic_fcs.c @@ -495,7 +495,8 @@ void fnic_eth_send(struct fcoe_ctlr *fip, struct sk_buff *skb) } fnic_queue_wq_eth_desc(wq, skb, pa, skb->len, - fnic->vlan_hw_insert, fnic->vlan_id, 1); + 0 /* hw inserts cos value */, + fnic->vlan_id, 1); spin_unlock_irqrestore(&fnic->wq_lock[0], flags); } @@ -563,7 +564,8 @@ static int fnic_send_frame(struct fnic *fnic, struct fc_frame *fp) } fnic_queue_wq_desc(wq, skb, pa, tot_len, fr_eof(fp), - fnic->vlan_hw_insert, fnic->vlan_id, 1, 1, 1); + 0 /* hw inserts cos value */, + fnic->vlan_id, 1, 1, 1); fnic_send_frame_end: spin_unlock_irqrestore(&fnic->wq_lock[0], flags); diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c index 599790e41a98..59bceac51a4c 100644 --- a/drivers/scsi/gdth.c +++ b/drivers/scsi/gdth.c @@ -1107,14 +1107,8 @@ static int gdth_init_pci(struct pci_dev *pdev, gdth_pci_str *pcistr, pci_read_config_word(pdev, PCI_COMMAND, &command); command |= 6; pci_write_config_word(pdev, PCI_COMMAND, command); - if (pci_resource_start(pdev, 8) == 1UL) - pci_resource_start(pdev, 8) = 0UL; - i = 0xFEFF0001UL; - pci_write_config_dword(pdev, PCI_ROM_ADDRESS, i); - gdth_delay(1); - pci_write_config_dword(pdev, PCI_ROM_ADDRESS, - pci_resource_start(pdev, 8)); - + gdth_delay(1); + dp6m_ptr = ha->brd; /* Ensure that it is safe to access the non HW portions of DPMEM. diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 1d7da3f41ebb..8fa79b83f2d3 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -98,6 +98,7 @@ static unsigned int ipr_transop_timeout = 0; static unsigned int ipr_debug = 0; static unsigned int ipr_max_devs = IPR_DEFAULT_SIS64_DEVS; static unsigned int ipr_dual_ioa_raid = 1; +static unsigned int ipr_number_of_msix = 2; static DEFINE_SPINLOCK(ipr_driver_lock); /* This table describes the differences between DMA controller chips */ @@ -107,6 +108,7 @@ static const struct ipr_chip_cfg_t ipr_chip_cfg[] = { .max_cmds = 100, .cache_line_size = 0x20, .clear_isr = 1, + .iopoll_weight = 0, { .set_interrupt_mask_reg = 0x0022C, .clr_interrupt_mask_reg = 0x00230, @@ -131,6 +133,7 @@ static const struct ipr_chip_cfg_t ipr_chip_cfg[] = { .max_cmds = 100, .cache_line_size = 0x20, .clear_isr = 1, + .iopoll_weight = 0, { .set_interrupt_mask_reg = 0x00288, .clr_interrupt_mask_reg = 0x0028C, @@ -155,6 +158,7 @@ static const struct ipr_chip_cfg_t ipr_chip_cfg[] = { .max_cmds = 1000, .cache_line_size = 0x20, .clear_isr = 0, + .iopoll_weight = 64, { .set_interrupt_mask_reg = 0x00010, .clr_interrupt_mask_reg = 0x00018, @@ -215,6 +219,8 @@ MODULE_PARM_DESC(dual_ioa_raid, "Enable dual adapter RAID support. Set to 1 to e module_param_named(max_devs, ipr_max_devs, int, 0); MODULE_PARM_DESC(max_devs, "Specify the maximum number of physical devices. " "[Default=" __stringify(IPR_DEFAULT_SIS64_DEVS) "]"); +module_param_named(number_of_msix, ipr_number_of_msix, int, 0); +MODULE_PARM_DESC(number_of_msix, "Specify the number of MSIX interrupts to use on capable adapters (1 - 5). (default:2)"); MODULE_LICENSE("GPL"); MODULE_VERSION(IPR_DRIVER_VERSION); @@ -549,7 +555,8 @@ static void ipr_trc_hook(struct ipr_cmnd *ipr_cmd, struct ipr_trace_entry *trace_entry; struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; - trace_entry = &ioa_cfg->trace[ioa_cfg->trace_index++]; + trace_entry = &ioa_cfg->trace[atomic_add_return + (1, &ioa_cfg->trace_index)%IPR_NUM_TRACE_ENTRIES]; trace_entry->time = jiffies; trace_entry->op_code = ipr_cmd->ioarcb.cmd_pkt.cdb[0]; trace_entry->type = type; @@ -560,6 +567,7 @@ static void ipr_trc_hook(struct ipr_cmnd *ipr_cmd, trace_entry->cmd_index = ipr_cmd->cmd_index & 0xff; trace_entry->res_handle = ipr_cmd->ioarcb.res_handle; trace_entry->u.add_data = add_data; + wmb(); } #else #define ipr_trc_hook(ipr_cmd, type, add_data) do { } while (0) @@ -595,8 +603,11 @@ static void ipr_reinit_ipr_cmnd(struct ipr_cmnd *ipr_cmd) struct ipr_ioasa *ioasa = &ipr_cmd->s.ioasa; struct ipr_ioasa64 *ioasa64 = &ipr_cmd->s.ioasa64; dma_addr_t dma_addr = ipr_cmd->dma_addr; + int hrrq_id; + hrrq_id = ioarcb->cmd_pkt.hrrq_id; memset(&ioarcb->cmd_pkt, 0, sizeof(struct ipr_cmd_pkt)); + ioarcb->cmd_pkt.hrrq_id = hrrq_id; ioarcb->data_transfer_length = 0; ioarcb->read_data_transfer_length = 0; ioarcb->ioadl_len = 0; @@ -646,12 +657,16 @@ static void ipr_init_ipr_cmnd(struct ipr_cmnd *ipr_cmd, * pointer to ipr command struct **/ static -struct ipr_cmnd *__ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg) +struct ipr_cmnd *__ipr_get_free_ipr_cmnd(struct ipr_hrr_queue *hrrq) { - struct ipr_cmnd *ipr_cmd; + struct ipr_cmnd *ipr_cmd = NULL; + + if (likely(!list_empty(&hrrq->hrrq_free_q))) { + ipr_cmd = list_entry(hrrq->hrrq_free_q.next, + struct ipr_cmnd, queue); + list_del(&ipr_cmd->queue); + } - ipr_cmd = list_entry(ioa_cfg->free_q.next, struct ipr_cmnd, queue); - list_del(&ipr_cmd->queue); return ipr_cmd; } @@ -666,7 +681,8 @@ struct ipr_cmnd *__ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg) static struct ipr_cmnd *ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg) { - struct ipr_cmnd *ipr_cmd = __ipr_get_free_ipr_cmnd(ioa_cfg); + struct ipr_cmnd *ipr_cmd = + __ipr_get_free_ipr_cmnd(&ioa_cfg->hrrq[IPR_INIT_HRRQ]); ipr_init_ipr_cmnd(ipr_cmd, ipr_lock_and_done); return ipr_cmd; } @@ -686,9 +702,15 @@ static void ipr_mask_and_clear_interrupts(struct ipr_ioa_cfg *ioa_cfg, u32 clr_ints) { volatile u32 int_reg; + int i; /* Stop new interrupts */ - ioa_cfg->allow_interrupts = 0; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_interrupts = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); /* Set interrupt mask to stop all new interrupts */ if (ioa_cfg->sis64) @@ -761,13 +783,12 @@ static int ipr_set_pcix_cmd_reg(struct ipr_ioa_cfg *ioa_cfg) **/ static void ipr_sata_eh_done(struct ipr_cmnd *ipr_cmd) { - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct ata_queued_cmd *qc = ipr_cmd->qc; struct ipr_sata_port *sata_port = qc->ap->private_data; qc->err_mask |= AC_ERR_OTHER; sata_port->ioasa.status |= ATA_BUSY; - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); ata_qc_complete(qc); } @@ -783,14 +804,13 @@ static void ipr_sata_eh_done(struct ipr_cmnd *ipr_cmd) **/ static void ipr_scsi_eh_done(struct ipr_cmnd *ipr_cmd) { - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; scsi_cmd->result |= (DID_ERROR << 16); scsi_dma_unmap(ipr_cmd->scsi_cmd); scsi_cmd->scsi_done(scsi_cmd); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); } /** @@ -805,24 +825,32 @@ static void ipr_scsi_eh_done(struct ipr_cmnd *ipr_cmd) static void ipr_fail_all_ops(struct ipr_ioa_cfg *ioa_cfg) { struct ipr_cmnd *ipr_cmd, *temp; + struct ipr_hrr_queue *hrrq; ENTER; - list_for_each_entry_safe(ipr_cmd, temp, &ioa_cfg->pending_q, queue) { - list_del(&ipr_cmd->queue); + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry_safe(ipr_cmd, + temp, &hrrq->hrrq_pending_q, queue) { + list_del(&ipr_cmd->queue); - ipr_cmd->s.ioasa.hdr.ioasc = cpu_to_be32(IPR_IOASC_IOA_WAS_RESET); - ipr_cmd->s.ioasa.hdr.ilid = cpu_to_be32(IPR_DRIVER_ILID); + ipr_cmd->s.ioasa.hdr.ioasc = + cpu_to_be32(IPR_IOASC_IOA_WAS_RESET); + ipr_cmd->s.ioasa.hdr.ilid = + cpu_to_be32(IPR_DRIVER_ILID); - if (ipr_cmd->scsi_cmd) - ipr_cmd->done = ipr_scsi_eh_done; - else if (ipr_cmd->qc) - ipr_cmd->done = ipr_sata_eh_done; + if (ipr_cmd->scsi_cmd) + ipr_cmd->done = ipr_scsi_eh_done; + else if (ipr_cmd->qc) + ipr_cmd->done = ipr_sata_eh_done; - ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, IPR_IOASC_IOA_WAS_RESET); - del_timer(&ipr_cmd->timer); - ipr_cmd->done(ipr_cmd); + ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, + IPR_IOASC_IOA_WAS_RESET); + del_timer(&ipr_cmd->timer); + ipr_cmd->done(ipr_cmd); + } + spin_unlock(&hrrq->_lock); } - LEAVE; } @@ -872,9 +900,7 @@ static void ipr_do_req(struct ipr_cmnd *ipr_cmd, void (*done) (struct ipr_cmnd *), void (*timeout_func) (struct ipr_cmnd *), u32 timeout) { - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; - - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); ipr_cmd->done = done; @@ -975,6 +1001,14 @@ static void ipr_send_blocking_cmd(struct ipr_cmnd *ipr_cmd, spin_lock_irq(ioa_cfg->host->host_lock); } +static int ipr_get_hrrq_index(struct ipr_ioa_cfg *ioa_cfg) +{ + if (ioa_cfg->hrrq_num == 1) + return 0; + else + return (atomic_add_return(1, &ioa_cfg->hrrq_index) % (ioa_cfg->hrrq_num - 1)) + 1; +} + /** * ipr_send_hcam - Send an HCAM to the adapter. * @ioa_cfg: ioa config struct @@ -994,9 +1028,9 @@ static void ipr_send_hcam(struct ipr_ioa_cfg *ioa_cfg, u8 type, struct ipr_cmnd *ipr_cmd; struct ipr_ioarcb *ioarcb; - if (ioa_cfg->allow_cmds) { + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) { ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); list_add_tail(&hostrcb->queue, &ioa_cfg->hostrcb_pending_q); ipr_cmd->u.hostrcb = hostrcb; @@ -1166,14 +1200,15 @@ static int ipr_is_same_device(struct ipr_resource_entry *res, } /** - * ipr_format_res_path - Format the resource path for printing. + * __ipr_format_res_path - Format the resource path for printing. * @res_path: resource path * @buf: buffer + * @len: length of buffer provided * * Return value: * pointer to buffer **/ -static char *ipr_format_res_path(u8 *res_path, char *buffer, int len) +static char *__ipr_format_res_path(u8 *res_path, char *buffer, int len) { int i; char *p = buffer; @@ -1187,6 +1222,27 @@ static char *ipr_format_res_path(u8 *res_path, char *buffer, int len) } /** + * ipr_format_res_path - Format the resource path for printing. + * @ioa_cfg: ioa config struct + * @res_path: resource path + * @buf: buffer + * @len: length of buffer provided + * + * Return value: + * pointer to buffer + **/ +static char *ipr_format_res_path(struct ipr_ioa_cfg *ioa_cfg, + u8 *res_path, char *buffer, int len) +{ + char *p = buffer; + + *p = '\0'; + p += snprintf(p, buffer + len - p, "%d/", ioa_cfg->host->host_no); + __ipr_format_res_path(res_path, p, len - (buffer - p)); + return buffer; +} + +/** * ipr_update_res_entry - Update the resource entry. * @res: resource entry struct * @cfgtew: config table entry wrapper struct @@ -1226,8 +1282,8 @@ static void ipr_update_res_entry(struct ipr_resource_entry *res, if (res->sdev && new_path) sdev_printk(KERN_INFO, res->sdev, "Resource path: %s\n", - ipr_format_res_path(res->res_path, buffer, - sizeof(buffer))); + ipr_format_res_path(res->ioa_cfg, + res->res_path, buffer, sizeof(buffer))); } else { res->flags = cfgtew->u.cfgte->flags; if (res->flags & IPR_IS_IOA_RESOURCE) @@ -1363,7 +1419,7 @@ static void ipr_process_ccn(struct ipr_cmnd *ipr_cmd) u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); list_del(&hostrcb->queue); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); if (ioasc) { if (ioasc != IPR_IOASC_IOA_WAS_RESET) @@ -1613,8 +1669,8 @@ static void ipr_log_sis64_config_error(struct ipr_ioa_cfg *ioa_cfg, ipr_err_separator; ipr_err("Device %d : %s", i + 1, - ipr_format_res_path(dev_entry->res_path, buffer, - sizeof(buffer))); + __ipr_format_res_path(dev_entry->res_path, + buffer, sizeof(buffer))); ipr_log_ext_vpd(&dev_entry->vpd); ipr_err("-----New Device Information-----\n"); @@ -1960,14 +2016,16 @@ static void ipr_log64_fabric_path(struct ipr_hostrcb *hostrcb, ipr_hcam_err(hostrcb, "%s %s: Resource Path=%s\n", path_active_desc[i].desc, path_state_desc[j].desc, - ipr_format_res_path(fabric->res_path, buffer, - sizeof(buffer))); + ipr_format_res_path(hostrcb->ioa_cfg, + fabric->res_path, + buffer, sizeof(buffer))); return; } } ipr_err("Path state=%02X Resource Path=%s\n", path_state, - ipr_format_res_path(fabric->res_path, buffer, sizeof(buffer))); + ipr_format_res_path(hostrcb->ioa_cfg, fabric->res_path, + buffer, sizeof(buffer))); } static const struct { @@ -2108,18 +2166,20 @@ static void ipr_log64_path_elem(struct ipr_hostrcb *hostrcb, ipr_hcam_err(hostrcb, "%s %s: Resource Path=%s, Link rate=%s, WWN=%08X%08X\n", path_status_desc[j].desc, path_type_desc[i].desc, - ipr_format_res_path(cfg->res_path, buffer, - sizeof(buffer)), - link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK], - be32_to_cpu(cfg->wwid[0]), be32_to_cpu(cfg->wwid[1])); + ipr_format_res_path(hostrcb->ioa_cfg, + cfg->res_path, buffer, sizeof(buffer)), + link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK], + be32_to_cpu(cfg->wwid[0]), + be32_to_cpu(cfg->wwid[1])); return; } } ipr_hcam_err(hostrcb, "Path element=%02X: Resource Path=%s, Link rate=%s " "WWN=%08X%08X\n", cfg->type_status, - ipr_format_res_path(cfg->res_path, buffer, sizeof(buffer)), - link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK], - be32_to_cpu(cfg->wwid[0]), be32_to_cpu(cfg->wwid[1])); + ipr_format_res_path(hostrcb->ioa_cfg, + cfg->res_path, buffer, sizeof(buffer)), + link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK], + be32_to_cpu(cfg->wwid[0]), be32_to_cpu(cfg->wwid[1])); } /** @@ -2182,7 +2242,8 @@ static void ipr_log_sis64_array_error(struct ipr_ioa_cfg *ioa_cfg, ipr_err("RAID %s Array Configuration: %s\n", error->protection_level, - ipr_format_res_path(error->last_res_path, buffer, sizeof(buffer))); + ipr_format_res_path(ioa_cfg, error->last_res_path, + buffer, sizeof(buffer))); ipr_err_separator; @@ -2203,11 +2264,12 @@ static void ipr_log_sis64_array_error(struct ipr_ioa_cfg *ioa_cfg, ipr_err("Array Member %d:\n", i); ipr_log_ext_vpd(&array_entry->vpd); ipr_err("Current Location: %s\n", - ipr_format_res_path(array_entry->res_path, buffer, - sizeof(buffer))); + ipr_format_res_path(ioa_cfg, array_entry->res_path, + buffer, sizeof(buffer))); ipr_err("Expected Location: %s\n", - ipr_format_res_path(array_entry->expected_res_path, - buffer, sizeof(buffer))); + ipr_format_res_path(ioa_cfg, + array_entry->expected_res_path, + buffer, sizeof(buffer))); ipr_err_separator; } @@ -2409,7 +2471,7 @@ static void ipr_process_error(struct ipr_cmnd *ipr_cmd) fd_ioasc = be32_to_cpu(hostrcb->hcam.u.error.fd_ioasc); list_del(&hostrcb->queue); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); if (!ioasc) { ipr_handle_log_data(ioa_cfg, hostrcb); @@ -2491,36 +2553,6 @@ static void ipr_oper_timeout(struct ipr_cmnd *ipr_cmd) } /** - * ipr_reset_reload - Reset/Reload the IOA - * @ioa_cfg: ioa config struct - * @shutdown_type: shutdown type - * - * This function resets the adapter and re-initializes it. - * This function assumes that all new host commands have been stopped. - * Return value: - * SUCCESS / FAILED - **/ -static int ipr_reset_reload(struct ipr_ioa_cfg *ioa_cfg, - enum ipr_shutdown_type shutdown_type) -{ - if (!ioa_cfg->in_reset_reload) - ipr_initiate_ioa_reset(ioa_cfg, shutdown_type); - - spin_unlock_irq(ioa_cfg->host->host_lock); - wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); - spin_lock_irq(ioa_cfg->host->host_lock); - - /* If we got hit with a host reset while we were already resetting - the adapter for some reason, and the reset failed. */ - if (ioa_cfg->ioa_is_dead) { - ipr_trace; - return FAILED; - } - - return SUCCESS; -} - -/** * ipr_find_ses_entry - Find matching SES in SES table * @res: resource entry struct of SES * @@ -3153,7 +3185,8 @@ static void ipr_worker_thread(struct work_struct *work) restart: do { did_work = 0; - if (!ioa_cfg->allow_cmds || !ioa_cfg->allow_ml_add_del) { + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds || + !ioa_cfg->allow_ml_add_del) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); return; } @@ -3401,7 +3434,7 @@ static ssize_t ipr_show_adapter_state(struct device *dev, int len; spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - if (ioa_cfg->ioa_is_dead) + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) len = snprintf(buf, PAGE_SIZE, "offline\n"); else len = snprintf(buf, PAGE_SIZE, "online\n"); @@ -3427,14 +3460,20 @@ static ssize_t ipr_store_adapter_state(struct device *dev, struct Scsi_Host *shost = class_to_shost(dev); struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata; unsigned long lock_flags; - int result = count; + int result = count, i; if (!capable(CAP_SYS_ADMIN)) return -EACCES; spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - if (ioa_cfg->ioa_is_dead && !strncmp(buf, "online", 6)) { - ioa_cfg->ioa_is_dead = 0; + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead && + !strncmp(buf, "online", 6)) { + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].ioa_is_dead = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); ioa_cfg->reset_retries = 0; ioa_cfg->in_ioa_bringdown = 0; ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE); @@ -3494,6 +3533,95 @@ static struct device_attribute ipr_ioa_reset_attr = { .store = ipr_store_reset_adapter }; +static int ipr_iopoll(struct blk_iopoll *iop, int budget); + /** + * ipr_show_iopoll_weight - Show ipr polling mode + * @dev: class device struct + * @buf: buffer + * + * Return value: + * number of bytes printed to buffer + **/ +static ssize_t ipr_show_iopoll_weight(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata; + unsigned long lock_flags = 0; + int len; + + spin_lock_irqsave(shost->host_lock, lock_flags); + len = snprintf(buf, PAGE_SIZE, "%d\n", ioa_cfg->iopoll_weight); + spin_unlock_irqrestore(shost->host_lock, lock_flags); + + return len; +} + +/** + * ipr_store_iopoll_weight - Change the adapter's polling mode + * @dev: class device struct + * @buf: buffer + * + * Return value: + * number of bytes printed to buffer + **/ +static ssize_t ipr_store_iopoll_weight(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata; + unsigned long user_iopoll_weight; + unsigned long lock_flags = 0; + int i; + + if (!ioa_cfg->sis64) { + dev_info(&ioa_cfg->pdev->dev, "blk-iopoll not supported on this adapter\n"); + return -EINVAL; + } + if (kstrtoul(buf, 10, &user_iopoll_weight)) + return -EINVAL; + + if (user_iopoll_weight > 256) { + dev_info(&ioa_cfg->pdev->dev, "Invalid blk-iopoll weight. It must be less than 256\n"); + return -EINVAL; + } + + if (user_iopoll_weight == ioa_cfg->iopoll_weight) { + dev_info(&ioa_cfg->pdev->dev, "Current blk-iopoll weight has the same weight\n"); + return strlen(buf); + } + + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + for (i = 1; i < ioa_cfg->hrrq_num; i++) + blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll); + } + + spin_lock_irqsave(shost->host_lock, lock_flags); + ioa_cfg->iopoll_weight = user_iopoll_weight; + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + for (i = 1; i < ioa_cfg->hrrq_num; i++) { + blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll, + ioa_cfg->iopoll_weight, ipr_iopoll); + blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll); + } + } + spin_unlock_irqrestore(shost->host_lock, lock_flags); + + return strlen(buf); +} + +static struct device_attribute ipr_iopoll_weight_attr = { + .attr = { + .name = "iopoll_weight", + .mode = S_IRUGO | S_IWUSR, + }, + .show = ipr_show_iopoll_weight, + .store = ipr_store_iopoll_weight +}; + /** * ipr_alloc_ucode_buffer - Allocates a microcode download buffer * @buf_len: buffer length @@ -3862,6 +3990,7 @@ static struct device_attribute *ipr_ioa_attrs[] = { &ipr_ioa_reset_attr, &ipr_update_fw_attr, &ipr_ioa_fw_type_attr, + &ipr_iopoll_weight_attr, NULL, }; @@ -4014,7 +4143,7 @@ static int ipr_alloc_dump(struct ipr_ioa_cfg *ioa_cfg) ioa_cfg->dump = dump; ioa_cfg->sdt_state = WAIT_FOR_DUMP; - if (ioa_cfg->ioa_is_dead && !ioa_cfg->dump_taken) { + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead && !ioa_cfg->dump_taken) { ioa_cfg->dump_taken = 1; schedule_work(&ioa_cfg->work_q); } @@ -4227,8 +4356,8 @@ static ssize_t ipr_show_resource_path(struct device *dev, struct device_attribut res = (struct ipr_resource_entry *)sdev->hostdata; if (res && ioa_cfg->sis64) len = snprintf(buf, PAGE_SIZE, "%s\n", - ipr_format_res_path(res->res_path, buffer, - sizeof(buffer))); + __ipr_format_res_path(res->res_path, buffer, + sizeof(buffer))); else if (res) len = snprintf(buf, PAGE_SIZE, "%d:%d:%d:%d\n", ioa_cfg->host->host_no, res->bus, res->target, res->lun); @@ -4556,8 +4685,8 @@ static int ipr_slave_configure(struct scsi_device *sdev) scsi_adjust_queue_depth(sdev, 0, sdev->host->cmd_per_lun); if (ioa_cfg->sis64) sdev_printk(KERN_INFO, sdev, "Resource path: %s\n", - ipr_format_res_path(res->res_path, buffer, - sizeof(buffer))); + ipr_format_res_path(ioa_cfg, + res->res_path, buffer, sizeof(buffer))); return 0; } spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); @@ -4638,22 +4767,18 @@ static int ipr_slave_alloc(struct scsi_device *sdev) return rc; } -/** - * ipr_eh_host_reset - Reset the host adapter - * @scsi_cmd: scsi command struct - * - * Return value: - * SUCCESS / FAILED - **/ -static int __ipr_eh_host_reset(struct scsi_cmnd *scsi_cmd) +static int ipr_eh_host_reset(struct scsi_cmnd *cmd) { struct ipr_ioa_cfg *ioa_cfg; - int rc; + unsigned long lock_flags = 0; + int rc = SUCCESS; ENTER; - ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata; + ioa_cfg = (struct ipr_ioa_cfg *) cmd->device->host->hostdata; + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); if (!ioa_cfg->in_reset_reload) { + ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_ABBREV); dev_err(&ioa_cfg->pdev->dev, "Adapter being reset as a result of error recovery.\n"); @@ -4661,20 +4786,19 @@ static int __ipr_eh_host_reset(struct scsi_cmnd *scsi_cmd) ioa_cfg->sdt_state = GET_DUMP; } - rc = ipr_reset_reload(ioa_cfg, IPR_SHUTDOWN_ABBREV); - - LEAVE; - return rc; -} - -static int ipr_eh_host_reset(struct scsi_cmnd *cmd) -{ - int rc; + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - spin_lock_irq(cmd->device->host->host_lock); - rc = __ipr_eh_host_reset(cmd); - spin_unlock_irq(cmd->device->host->host_lock); + /* If we got hit with a host reset while we were already resetting + the adapter for some reason, and the reset failed. */ + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) { + ipr_trace; + rc = FAILED; + } + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + LEAVE; return rc; } @@ -4723,7 +4847,7 @@ static int ipr_device_reset(struct ipr_ioa_cfg *ioa_cfg, ipr_send_blocking_cmd(ipr_cmd, ipr_timeout, IPR_DEVICE_RESET_TIMEOUT); ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); if (ipr_is_gata(res) && res->sata_port && ioasc != IPR_IOASC_IOA_WAS_RESET) { if (ipr_cmd->ioa_cfg->sis64) memcpy(&res->sata_port->ioasa, &ipr_cmd->s.ioasa64.u.gata, @@ -4793,6 +4917,7 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd) struct ipr_resource_entry *res; struct ata_port *ap; int rc = 0; + struct ipr_hrr_queue *hrrq; ENTER; ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata; @@ -4808,22 +4933,26 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd) */ if (ioa_cfg->in_reset_reload) return FAILED; - if (ioa_cfg->ioa_is_dead) + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) return FAILED; - list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { - if (ipr_cmd->ioarcb.res_handle == res->res_handle) { - if (ipr_cmd->scsi_cmd) - ipr_cmd->done = ipr_scsi_eh_done; - if (ipr_cmd->qc) - ipr_cmd->done = ipr_sata_eh_done; - if (ipr_cmd->qc && !(ipr_cmd->qc->flags & ATA_QCFLAG_FAILED)) { - ipr_cmd->qc->err_mask |= AC_ERR_TIMEOUT; - ipr_cmd->qc->flags |= ATA_QCFLAG_FAILED; + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) { + if (ipr_cmd->ioarcb.res_handle == res->res_handle) { + if (ipr_cmd->scsi_cmd) + ipr_cmd->done = ipr_scsi_eh_done; + if (ipr_cmd->qc) + ipr_cmd->done = ipr_sata_eh_done; + if (ipr_cmd->qc && + !(ipr_cmd->qc->flags & ATA_QCFLAG_FAILED)) { + ipr_cmd->qc->err_mask |= AC_ERR_TIMEOUT; + ipr_cmd->qc->flags |= ATA_QCFLAG_FAILED; + } } } + spin_unlock(&hrrq->_lock); } - res->resetting_device = 1; scmd_printk(KERN_ERR, scsi_cmd, "Resetting device\n"); @@ -4833,11 +4962,17 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd) ata_std_error_handler(ap); spin_lock_irq(scsi_cmd->device->host->host_lock); - list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { - if (ipr_cmd->ioarcb.res_handle == res->res_handle) { - rc = -EIO; - break; + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry(ipr_cmd, + &hrrq->hrrq_pending_q, queue) { + if (ipr_cmd->ioarcb.res_handle == + res->res_handle) { + rc = -EIO; + break; + } } + spin_unlock(&hrrq->_lock); } } else rc = ipr_device_reset(ioa_cfg, res); @@ -4890,7 +5025,7 @@ static void ipr_bus_reset_done(struct ipr_cmnd *ipr_cmd) else ipr_cmd->sibling->done(ipr_cmd->sibling); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); LEAVE; } @@ -4951,6 +5086,7 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd) struct ipr_cmd_pkt *cmd_pkt; u32 ioasc, int_reg; int op_found = 0; + struct ipr_hrr_queue *hrrq; ENTER; ioa_cfg = (struct ipr_ioa_cfg *)scsi_cmd->device->host->hostdata; @@ -4960,7 +5096,8 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd) * This will force the mid-layer to call ipr_eh_host_reset, * which will then go to sleep and wait for the reset to complete */ - if (ioa_cfg->in_reset_reload || ioa_cfg->ioa_is_dead) + if (ioa_cfg->in_reset_reload || + ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) return FAILED; if (!res) return FAILED; @@ -4975,12 +5112,16 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd) if (!ipr_is_gscsi(res)) return FAILED; - list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { - if (ipr_cmd->scsi_cmd == scsi_cmd) { - ipr_cmd->done = ipr_scsi_eh_done; - op_found = 1; - break; + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) { + if (ipr_cmd->scsi_cmd == scsi_cmd) { + ipr_cmd->done = ipr_scsi_eh_done; + op_found = 1; + break; + } } + spin_unlock(&hrrq->_lock); } if (!op_found) @@ -5007,7 +5148,7 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd) ipr_trace; } - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_free_q); if (!ipr_is_naca_model(res)) res->needs_sync_complete = 1; @@ -5099,6 +5240,9 @@ static irqreturn_t ipr_handle_other_interrupt(struct ipr_ioa_cfg *ioa_cfg, } else { if (int_reg & IPR_PCII_IOA_UNIT_CHECKED) ioa_cfg->ioa_unit_checked = 1; + else if (int_reg & IPR_PCII_NO_HOST_RRQ) + dev_err(&ioa_cfg->pdev->dev, + "No Host RRQ. 0x%08X\n", int_reg); else dev_err(&ioa_cfg->pdev->dev, "Permanent IOA failure. 0x%08X\n", int_reg); @@ -5121,10 +5265,10 @@ static irqreturn_t ipr_handle_other_interrupt(struct ipr_ioa_cfg *ioa_cfg, * Return value: * none **/ -static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg) +static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg, u16 number) { ioa_cfg->errors_logged++; - dev_err(&ioa_cfg->pdev->dev, "%s\n", msg); + dev_err(&ioa_cfg->pdev->dev, "%s %d\n", msg, number); if (WAIT_FOR_DUMP == ioa_cfg->sdt_state) ioa_cfg->sdt_state = GET_DUMP; @@ -5132,6 +5276,83 @@ static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg) ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE); } +static int ipr_process_hrrq(struct ipr_hrr_queue *hrr_queue, int budget, + struct list_head *doneq) +{ + u32 ioasc; + u16 cmd_index; + struct ipr_cmnd *ipr_cmd; + struct ipr_ioa_cfg *ioa_cfg = hrr_queue->ioa_cfg; + int num_hrrq = 0; + + /* If interrupts are disabled, ignore the interrupt */ + if (!hrr_queue->allow_interrupts) + return 0; + + while ((be32_to_cpu(*hrr_queue->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == + hrr_queue->toggle_bit) { + + cmd_index = (be32_to_cpu(*hrr_queue->hrrq_curr) & + IPR_HRRQ_REQ_RESP_HANDLE_MASK) >> + IPR_HRRQ_REQ_RESP_HANDLE_SHIFT; + + if (unlikely(cmd_index > hrr_queue->max_cmd_id || + cmd_index < hrr_queue->min_cmd_id)) { + ipr_isr_eh(ioa_cfg, + "Invalid response handle from IOA: ", + cmd_index); + break; + } + + ipr_cmd = ioa_cfg->ipr_cmnd_list[cmd_index]; + ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); + + ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, ioasc); + + list_move_tail(&ipr_cmd->queue, doneq); + + if (hrr_queue->hrrq_curr < hrr_queue->hrrq_end) { + hrr_queue->hrrq_curr++; + } else { + hrr_queue->hrrq_curr = hrr_queue->hrrq_start; + hrr_queue->toggle_bit ^= 1u; + } + num_hrrq++; + if (budget > 0 && num_hrrq >= budget) + break; + } + + return num_hrrq; +} + +static int ipr_iopoll(struct blk_iopoll *iop, int budget) +{ + struct ipr_ioa_cfg *ioa_cfg; + struct ipr_hrr_queue *hrrq; + struct ipr_cmnd *ipr_cmd, *temp; + unsigned long hrrq_flags; + int completed_ops; + LIST_HEAD(doneq); + + hrrq = container_of(iop, struct ipr_hrr_queue, iopoll); + ioa_cfg = hrrq->ioa_cfg; + + spin_lock_irqsave(hrrq->lock, hrrq_flags); + completed_ops = ipr_process_hrrq(hrrq, budget, &doneq); + + if (completed_ops < budget) + blk_iopoll_complete(iop); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + + list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) { + list_del(&ipr_cmd->queue); + del_timer(&ipr_cmd->timer); + ipr_cmd->fast_done(ipr_cmd); + } + + return completed_ops; +} + /** * ipr_isr - Interrupt service routine * @irq: irq number @@ -5142,78 +5363,48 @@ static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg) **/ static irqreturn_t ipr_isr(int irq, void *devp) { - struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)devp; - unsigned long lock_flags = 0; + struct ipr_hrr_queue *hrrq = (struct ipr_hrr_queue *)devp; + struct ipr_ioa_cfg *ioa_cfg = hrrq->ioa_cfg; + unsigned long hrrq_flags = 0; u32 int_reg = 0; - u32 ioasc; - u16 cmd_index; int num_hrrq = 0; int irq_none = 0; struct ipr_cmnd *ipr_cmd, *temp; irqreturn_t rc = IRQ_NONE; LIST_HEAD(doneq); - spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - + spin_lock_irqsave(hrrq->lock, hrrq_flags); /* If interrupts are disabled, ignore the interrupt */ - if (!ioa_cfg->allow_interrupts) { - spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + if (!hrrq->allow_interrupts) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return IRQ_NONE; } while (1) { - ipr_cmd = NULL; - - while ((be32_to_cpu(*ioa_cfg->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == - ioa_cfg->toggle_bit) { - - cmd_index = (be32_to_cpu(*ioa_cfg->hrrq_curr) & - IPR_HRRQ_REQ_RESP_HANDLE_MASK) >> IPR_HRRQ_REQ_RESP_HANDLE_SHIFT; - - if (unlikely(cmd_index >= IPR_NUM_CMD_BLKS)) { - ipr_isr_eh(ioa_cfg, "Invalid response handle from IOA"); - rc = IRQ_HANDLED; - goto unlock_out; - } - - ipr_cmd = ioa_cfg->ipr_cmnd_list[cmd_index]; - - ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); + if (ipr_process_hrrq(hrrq, -1, &doneq)) { + rc = IRQ_HANDLED; - ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, ioasc); - - list_move_tail(&ipr_cmd->queue, &doneq); - - rc = IRQ_HANDLED; - - if (ioa_cfg->hrrq_curr < ioa_cfg->hrrq_end) { - ioa_cfg->hrrq_curr++; - } else { - ioa_cfg->hrrq_curr = ioa_cfg->hrrq_start; - ioa_cfg->toggle_bit ^= 1u; - } - } - - if (ipr_cmd && !ioa_cfg->clear_isr) - break; + if (!ioa_cfg->clear_isr) + break; - if (ipr_cmd != NULL) { /* Clear the PCI interrupt */ num_hrrq = 0; do { - writel(IPR_PCII_HRRQ_UPDATED, ioa_cfg->regs.clr_interrupt_reg32); + writel(IPR_PCII_HRRQ_UPDATED, + ioa_cfg->regs.clr_interrupt_reg32); int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32); } while (int_reg & IPR_PCII_HRRQ_UPDATED && - num_hrrq++ < IPR_MAX_HRRQ_RETRIES); + num_hrrq++ < IPR_MAX_HRRQ_RETRIES); } else if (rc == IRQ_NONE && irq_none == 0) { int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32); irq_none++; } else if (num_hrrq == IPR_MAX_HRRQ_RETRIES && int_reg & IPR_PCII_HRRQ_UPDATED) { - ipr_isr_eh(ioa_cfg, "Error clearing HRRQ"); + ipr_isr_eh(ioa_cfg, + "Error clearing HRRQ: ", num_hrrq); rc = IRQ_HANDLED; - goto unlock_out; + break; } else break; } @@ -5221,14 +5412,64 @@ static irqreturn_t ipr_isr(int irq, void *devp) if (unlikely(rc == IRQ_NONE)) rc = ipr_handle_other_interrupt(ioa_cfg, int_reg); -unlock_out: - spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) { list_del(&ipr_cmd->queue); del_timer(&ipr_cmd->timer); ipr_cmd->fast_done(ipr_cmd); } + return rc; +} + +/** + * ipr_isr_mhrrq - Interrupt service routine + * @irq: irq number + * @devp: pointer to ioa config struct + * + * Return value: + * IRQ_NONE / IRQ_HANDLED + **/ +static irqreturn_t ipr_isr_mhrrq(int irq, void *devp) +{ + struct ipr_hrr_queue *hrrq = (struct ipr_hrr_queue *)devp; + struct ipr_ioa_cfg *ioa_cfg = hrrq->ioa_cfg; + unsigned long hrrq_flags = 0; + struct ipr_cmnd *ipr_cmd, *temp; + irqreturn_t rc = IRQ_NONE; + LIST_HEAD(doneq); + + spin_lock_irqsave(hrrq->lock, hrrq_flags); + + /* If interrupts are disabled, ignore the interrupt */ + if (!hrrq->allow_interrupts) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + return IRQ_NONE; + } + + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == + hrrq->toggle_bit) { + if (!blk_iopoll_sched_prep(&hrrq->iopoll)) + blk_iopoll_sched(&hrrq->iopoll); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + return IRQ_HANDLED; + } + } else { + if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == + hrrq->toggle_bit) + + if (ipr_process_hrrq(hrrq, -1, &doneq)) + rc = IRQ_HANDLED; + } + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + + list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) { + list_del(&ipr_cmd->queue); + del_timer(&ipr_cmd->timer); + ipr_cmd->fast_done(ipr_cmd); + } return rc; } @@ -5388,7 +5629,6 @@ static void ipr_erp_done(struct ipr_cmnd *ipr_cmd) { struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; struct ipr_resource_entry *res = scsi_cmd->device->hostdata; - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); if (IPR_IOASC_SENSE_KEY(ioasc) > 0) { @@ -5406,7 +5646,7 @@ static void ipr_erp_done(struct ipr_cmnd *ipr_cmd) res->in_erp = 0; } scsi_dma_unmap(ipr_cmd->scsi_cmd); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); scsi_cmd->scsi_done(scsi_cmd); } @@ -5790,7 +6030,7 @@ static void ipr_erp_start(struct ipr_ioa_cfg *ioa_cfg, } scsi_dma_unmap(ipr_cmd->scsi_cmd); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); scsi_cmd->scsi_done(scsi_cmd); } @@ -5809,21 +6049,21 @@ static void ipr_scsi_done(struct ipr_cmnd *ipr_cmd) struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); - unsigned long lock_flags; + unsigned long hrrq_flags; scsi_set_resid(scsi_cmd, be32_to_cpu(ipr_cmd->s.ioasa.hdr.residual_data_len)); if (likely(IPR_IOASC_SENSE_KEY(ioasc) == 0)) { scsi_dma_unmap(scsi_cmd); - spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + spin_lock_irqsave(ipr_cmd->hrrq->lock, hrrq_flags); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); scsi_cmd->scsi_done(scsi_cmd); - spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + spin_unlock_irqrestore(ipr_cmd->hrrq->lock, hrrq_flags); } else { - spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); + spin_lock_irqsave(ipr_cmd->hrrq->lock, hrrq_flags); ipr_erp_start(ioa_cfg, ipr_cmd); - spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + spin_unlock_irqrestore(ipr_cmd->hrrq->lock, hrrq_flags); } } @@ -5846,22 +6086,34 @@ static int ipr_queuecommand(struct Scsi_Host *shost, struct ipr_resource_entry *res; struct ipr_ioarcb *ioarcb; struct ipr_cmnd *ipr_cmd; - unsigned long lock_flags; + unsigned long hrrq_flags, lock_flags; int rc; + struct ipr_hrr_queue *hrrq; + int hrrq_id; ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata; - spin_lock_irqsave(shost->host_lock, lock_flags); scsi_cmd->result = (DID_OK << 16); res = scsi_cmd->device->hostdata; + if (ipr_is_gata(res) && res->sata_port) { + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); + rc = ata_sas_queuecmd(scsi_cmd, res->sata_port->ap); + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + return rc; + } + + hrrq_id = ipr_get_hrrq_index(ioa_cfg); + hrrq = &ioa_cfg->hrrq[hrrq_id]; + + spin_lock_irqsave(hrrq->lock, hrrq_flags); /* * We are currently blocking all devices due to a host reset * We have told the host to stop giving us new requests, but * ERP ops don't count. FIXME */ - if (unlikely(!ioa_cfg->allow_cmds && !ioa_cfg->ioa_is_dead)) { - spin_unlock_irqrestore(shost->host_lock, lock_flags); + if (unlikely(!hrrq->allow_cmds && !hrrq->ioa_is_dead)) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return SCSI_MLQUEUE_HOST_BUSY; } @@ -5869,19 +6121,17 @@ static int ipr_queuecommand(struct Scsi_Host *shost, * FIXME - Create scsi_set_host_offline interface * and the ioa_is_dead check can be removed */ - if (unlikely(ioa_cfg->ioa_is_dead || !res)) { - spin_unlock_irqrestore(shost->host_lock, lock_flags); + if (unlikely(hrrq->ioa_is_dead || !res)) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); goto err_nodev; } - if (ipr_is_gata(res) && res->sata_port) { - rc = ata_sas_queuecmd(scsi_cmd, res->sata_port->ap); - spin_unlock_irqrestore(shost->host_lock, lock_flags); - return rc; + ipr_cmd = __ipr_get_free_ipr_cmnd(hrrq); + if (ipr_cmd == NULL) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + return SCSI_MLQUEUE_HOST_BUSY; } - - ipr_cmd = __ipr_get_free_ipr_cmnd(ioa_cfg); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); ipr_init_ipr_cmnd(ipr_cmd, ipr_scsi_done); ioarcb = &ipr_cmd->ioarcb; @@ -5902,26 +6152,27 @@ static int ipr_queuecommand(struct Scsi_Host *shost, } if (scsi_cmd->cmnd[0] >= 0xC0 && - (!ipr_is_gscsi(res) || scsi_cmd->cmnd[0] == IPR_QUERY_RSRC_STATE)) + (!ipr_is_gscsi(res) || scsi_cmd->cmnd[0] == IPR_QUERY_RSRC_STATE)) { ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD; + } if (ioa_cfg->sis64) rc = ipr_build_ioadl64(ioa_cfg, ipr_cmd); else rc = ipr_build_ioadl(ioa_cfg, ipr_cmd); - spin_lock_irqsave(shost->host_lock, lock_flags); - if (unlikely(rc || (!ioa_cfg->allow_cmds && !ioa_cfg->ioa_is_dead))) { - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + spin_lock_irqsave(hrrq->lock, hrrq_flags); + if (unlikely(rc || (!hrrq->allow_cmds && !hrrq->ioa_is_dead))) { + list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_free_q); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); if (!rc) scsi_dma_unmap(scsi_cmd); return SCSI_MLQUEUE_HOST_BUSY; } - if (unlikely(ioa_cfg->ioa_is_dead)) { - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + if (unlikely(hrrq->ioa_is_dead)) { + list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_free_q); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); scsi_dma_unmap(scsi_cmd); goto err_nodev; } @@ -5931,18 +6182,18 @@ static int ipr_queuecommand(struct Scsi_Host *shost, ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_SYNC_COMPLETE; res->needs_sync_complete = 0; } - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_pending_q); ipr_trc_hook(ipr_cmd, IPR_TRACE_START, IPR_GET_RES_PHYS_LOC(res)); ipr_send_command(ipr_cmd); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return 0; err_nodev: - spin_lock_irqsave(shost->host_lock, lock_flags); + spin_lock_irqsave(hrrq->lock, hrrq_flags); memset(scsi_cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); scsi_cmd->result = (DID_NO_CONNECT << 16); scsi_cmd->scsi_done(scsi_cmd); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return 0; } @@ -6040,7 +6291,7 @@ static void ipr_ata_phy_reset(struct ata_port *ap) spin_lock_irqsave(ioa_cfg->host->host_lock, flags); } - if (!ioa_cfg->allow_cmds) + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) goto out_unlock; rc = ipr_device_reset(ioa_cfg, res); @@ -6071,6 +6322,7 @@ static void ipr_ata_post_internal(struct ata_queued_cmd *qc) struct ipr_sata_port *sata_port = qc->ap->private_data; struct ipr_ioa_cfg *ioa_cfg = sata_port->ioa_cfg; struct ipr_cmnd *ipr_cmd; + struct ipr_hrr_queue *hrrq; unsigned long flags; spin_lock_irqsave(ioa_cfg->host->host_lock, flags); @@ -6080,11 +6332,15 @@ static void ipr_ata_post_internal(struct ata_queued_cmd *qc) spin_lock_irqsave(ioa_cfg->host->host_lock, flags); } - list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { - if (ipr_cmd->qc == qc) { - ipr_device_reset(ioa_cfg, sata_port->res); - break; + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) { + if (ipr_cmd->qc == qc) { + ipr_device_reset(ioa_cfg, sata_port->res); + break; + } } + spin_unlock(&hrrq->_lock); } spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); } @@ -6133,6 +6389,7 @@ static void ipr_sata_done(struct ipr_cmnd *ipr_cmd) struct ipr_resource_entry *res = sata_port->res; u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); + spin_lock(&ipr_cmd->hrrq->_lock); if (ipr_cmd->ioa_cfg->sis64) memcpy(&sata_port->ioasa, &ipr_cmd->s.ioasa64.u.gata, sizeof(struct ipr_ioasa_gata)); @@ -6148,7 +6405,8 @@ static void ipr_sata_done(struct ipr_cmnd *ipr_cmd) qc->err_mask |= __ac_err_mask(sata_port->ioasa.status); else qc->err_mask |= ac_err_mask(sata_port->ioasa.status); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); + spin_unlock(&ipr_cmd->hrrq->_lock); ata_qc_complete(qc); } @@ -6244,6 +6502,48 @@ static void ipr_build_ata_ioadl(struct ipr_cmnd *ipr_cmd, } /** + * ipr_qc_defer - Get a free ipr_cmd + * @qc: queued command + * + * Return value: + * 0 if success + **/ +static int ipr_qc_defer(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct ipr_sata_port *sata_port = ap->private_data; + struct ipr_ioa_cfg *ioa_cfg = sata_port->ioa_cfg; + struct ipr_cmnd *ipr_cmd; + struct ipr_hrr_queue *hrrq; + int hrrq_id; + + hrrq_id = ipr_get_hrrq_index(ioa_cfg); + hrrq = &ioa_cfg->hrrq[hrrq_id]; + + qc->lldd_task = NULL; + spin_lock(&hrrq->_lock); + if (unlikely(hrrq->ioa_is_dead)) { + spin_unlock(&hrrq->_lock); + return 0; + } + + if (unlikely(!hrrq->allow_cmds)) { + spin_unlock(&hrrq->_lock); + return ATA_DEFER_LINK; + } + + ipr_cmd = __ipr_get_free_ipr_cmnd(hrrq); + if (ipr_cmd == NULL) { + spin_unlock(&hrrq->_lock); + return ATA_DEFER_LINK; + } + + qc->lldd_task = ipr_cmd; + spin_unlock(&hrrq->_lock); + return 0; +} + +/** * ipr_qc_issue - Issue a SATA qc to a device * @qc: queued command * @@ -6260,10 +6560,23 @@ static unsigned int ipr_qc_issue(struct ata_queued_cmd *qc) struct ipr_ioarcb *ioarcb; struct ipr_ioarcb_ata_regs *regs; - if (unlikely(!ioa_cfg->allow_cmds || ioa_cfg->ioa_is_dead)) + if (qc->lldd_task == NULL) + ipr_qc_defer(qc); + + ipr_cmd = qc->lldd_task; + if (ipr_cmd == NULL) return AC_ERR_SYSTEM; - ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg); + qc->lldd_task = NULL; + spin_lock(&ipr_cmd->hrrq->_lock); + if (unlikely(!ipr_cmd->hrrq->allow_cmds || + ipr_cmd->hrrq->ioa_is_dead)) { + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); + spin_unlock(&ipr_cmd->hrrq->_lock); + return AC_ERR_SYSTEM; + } + + ipr_init_ipr_cmnd(ipr_cmd, ipr_lock_and_done); ioarcb = &ipr_cmd->ioarcb; if (ioa_cfg->sis64) { @@ -6275,7 +6588,7 @@ static unsigned int ipr_qc_issue(struct ata_queued_cmd *qc) memset(regs, 0, sizeof(*regs)); ioarcb->add_cmd_parms_len = cpu_to_be16(sizeof(*regs)); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); ipr_cmd->qc = qc; ipr_cmd->done = ipr_sata_done; ipr_cmd->ioarcb.res_handle = res->res_handle; @@ -6315,10 +6628,12 @@ static unsigned int ipr_qc_issue(struct ata_queued_cmd *qc) default: WARN_ON(1); + spin_unlock(&ipr_cmd->hrrq->_lock); return AC_ERR_INVALID; } ipr_send_command(ipr_cmd); + spin_unlock(&ipr_cmd->hrrq->_lock); return 0; } @@ -6357,6 +6672,7 @@ static struct ata_port_operations ipr_sata_ops = { .hardreset = ipr_sata_reset, .post_internal_cmd = ipr_ata_post_internal, .qc_prep = ata_noop_qc_prep, + .qc_defer = ipr_qc_defer, .qc_issue = ipr_qc_issue, .qc_fill_rtf = ipr_qc_fill_rtf, .port_start = ata_sas_port_start, @@ -6427,7 +6743,7 @@ static int ipr_ioa_bringdown_done(struct ipr_cmnd *ipr_cmd) ENTER; ioa_cfg->in_reset_reload = 0; ioa_cfg->reset_retries = 0; - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); wake_up_all(&ioa_cfg->reset_wait_q); spin_unlock_irq(ioa_cfg->host->host_lock); @@ -6454,11 +6770,16 @@ static int ipr_ioa_reset_done(struct ipr_cmnd *ipr_cmd) struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct ipr_resource_entry *res; struct ipr_hostrcb *hostrcb, *temp; - int i = 0; + int i = 0, j; ENTER; ioa_cfg->in_reset_reload = 0; - ioa_cfg->allow_cmds = 1; + for (j = 0; j < ioa_cfg->hrrq_num; j++) { + spin_lock(&ioa_cfg->hrrq[j]._lock); + ioa_cfg->hrrq[j].allow_cmds = 1; + spin_unlock(&ioa_cfg->hrrq[j]._lock); + } + wmb(); ioa_cfg->reset_cmd = NULL; ioa_cfg->doorbell |= IPR_RUNTIME_RESET; @@ -6482,14 +6803,14 @@ static int ipr_ioa_reset_done(struct ipr_cmnd *ipr_cmd) dev_info(&ioa_cfg->pdev->dev, "IOA initialized.\n"); ioa_cfg->reset_retries = 0; - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); wake_up_all(&ioa_cfg->reset_wait_q); spin_unlock(ioa_cfg->host->host_lock); scsi_unblock_requests(ioa_cfg->host); spin_lock(ioa_cfg->host->host_lock); - if (!ioa_cfg->allow_cmds) + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) scsi_block_requests(ioa_cfg->host); LEAVE; @@ -6560,9 +6881,11 @@ static int ipr_set_supported_devs(struct ipr_cmnd *ipr_cmd) if (!ioa_cfg->sis64) ipr_cmd->job_step = ipr_set_supported_devs; + LEAVE; return IPR_RC_JOB_RETURN; } + LEAVE; return IPR_RC_JOB_CONTINUE; } @@ -6820,7 +7143,7 @@ static int ipr_reset_cmd_failed(struct ipr_cmnd *ipr_cmd) ipr_cmd->ioarcb.cmd_pkt.cdb[0], ioasc); ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); return IPR_RC_JOB_RETURN; } @@ -7278,46 +7601,71 @@ static int ipr_ioafp_identify_hrrq(struct ipr_cmnd *ipr_cmd) { struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct ipr_ioarcb *ioarcb = &ipr_cmd->ioarcb; + struct ipr_hrr_queue *hrrq; ENTER; + ipr_cmd->job_step = ipr_ioafp_std_inquiry; dev_info(&ioa_cfg->pdev->dev, "Starting IOA initialization sequence.\n"); - ioarcb->cmd_pkt.cdb[0] = IPR_ID_HOST_RR_Q; - ioarcb->res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE); + if (ioa_cfg->identify_hrrq_index < ioa_cfg->hrrq_num) { + hrrq = &ioa_cfg->hrrq[ioa_cfg->identify_hrrq_index]; - ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD; - if (ioa_cfg->sis64) - ioarcb->cmd_pkt.cdb[1] = 0x1; - ioarcb->cmd_pkt.cdb[2] = - ((u64) ioa_cfg->host_rrq_dma >> 24) & 0xff; - ioarcb->cmd_pkt.cdb[3] = - ((u64) ioa_cfg->host_rrq_dma >> 16) & 0xff; - ioarcb->cmd_pkt.cdb[4] = - ((u64) ioa_cfg->host_rrq_dma >> 8) & 0xff; - ioarcb->cmd_pkt.cdb[5] = - ((u64) ioa_cfg->host_rrq_dma) & 0xff; - ioarcb->cmd_pkt.cdb[7] = - ((sizeof(u32) * IPR_NUM_CMD_BLKS) >> 8) & 0xff; - ioarcb->cmd_pkt.cdb[8] = - (sizeof(u32) * IPR_NUM_CMD_BLKS) & 0xff; + ioarcb->cmd_pkt.cdb[0] = IPR_ID_HOST_RR_Q; + ioarcb->res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE); - if (ioa_cfg->sis64) { - ioarcb->cmd_pkt.cdb[10] = - ((u64) ioa_cfg->host_rrq_dma >> 56) & 0xff; - ioarcb->cmd_pkt.cdb[11] = - ((u64) ioa_cfg->host_rrq_dma >> 48) & 0xff; - ioarcb->cmd_pkt.cdb[12] = - ((u64) ioa_cfg->host_rrq_dma >> 40) & 0xff; - ioarcb->cmd_pkt.cdb[13] = - ((u64) ioa_cfg->host_rrq_dma >> 32) & 0xff; - } + ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD; + if (ioa_cfg->sis64) + ioarcb->cmd_pkt.cdb[1] = 0x1; - ipr_cmd->job_step = ipr_ioafp_std_inquiry; + if (ioa_cfg->nvectors == 1) + ioarcb->cmd_pkt.cdb[1] &= ~IPR_ID_HRRQ_SELE_ENABLE; + else + ioarcb->cmd_pkt.cdb[1] |= IPR_ID_HRRQ_SELE_ENABLE; + + ioarcb->cmd_pkt.cdb[2] = + ((u64) hrrq->host_rrq_dma >> 24) & 0xff; + ioarcb->cmd_pkt.cdb[3] = + ((u64) hrrq->host_rrq_dma >> 16) & 0xff; + ioarcb->cmd_pkt.cdb[4] = + ((u64) hrrq->host_rrq_dma >> 8) & 0xff; + ioarcb->cmd_pkt.cdb[5] = + ((u64) hrrq->host_rrq_dma) & 0xff; + ioarcb->cmd_pkt.cdb[7] = + ((sizeof(u32) * hrrq->size) >> 8) & 0xff; + ioarcb->cmd_pkt.cdb[8] = + (sizeof(u32) * hrrq->size) & 0xff; + + if (ioarcb->cmd_pkt.cdb[1] & IPR_ID_HRRQ_SELE_ENABLE) + ioarcb->cmd_pkt.cdb[9] = + ioa_cfg->identify_hrrq_index; - ipr_do_req(ipr_cmd, ipr_reset_ioa_job, ipr_timeout, IPR_INTERNAL_TIMEOUT); + if (ioa_cfg->sis64) { + ioarcb->cmd_pkt.cdb[10] = + ((u64) hrrq->host_rrq_dma >> 56) & 0xff; + ioarcb->cmd_pkt.cdb[11] = + ((u64) hrrq->host_rrq_dma >> 48) & 0xff; + ioarcb->cmd_pkt.cdb[12] = + ((u64) hrrq->host_rrq_dma >> 40) & 0xff; + ioarcb->cmd_pkt.cdb[13] = + ((u64) hrrq->host_rrq_dma >> 32) & 0xff; + } + + if (ioarcb->cmd_pkt.cdb[1] & IPR_ID_HRRQ_SELE_ENABLE) + ioarcb->cmd_pkt.cdb[14] = + ioa_cfg->identify_hrrq_index; + + ipr_do_req(ipr_cmd, ipr_reset_ioa_job, ipr_timeout, + IPR_INTERNAL_TIMEOUT); + + if (++ioa_cfg->identify_hrrq_index < ioa_cfg->hrrq_num) + ipr_cmd->job_step = ipr_ioafp_identify_hrrq; + + LEAVE; + return IPR_RC_JOB_RETURN; + } LEAVE; - return IPR_RC_JOB_RETURN; + return IPR_RC_JOB_CONTINUE; } /** @@ -7365,7 +7713,9 @@ static void ipr_reset_timer_done(struct ipr_cmnd *ipr_cmd) static void ipr_reset_start_timer(struct ipr_cmnd *ipr_cmd, unsigned long timeout) { - list_add_tail(&ipr_cmd->queue, &ipr_cmd->ioa_cfg->pending_q); + + ENTER; + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); ipr_cmd->done = ipr_reset_ioa_job; ipr_cmd->timer.data = (unsigned long) ipr_cmd; @@ -7383,13 +7733,26 @@ static void ipr_reset_start_timer(struct ipr_cmnd *ipr_cmd, **/ static void ipr_init_ioa_mem(struct ipr_ioa_cfg *ioa_cfg) { - memset(ioa_cfg->host_rrq, 0, sizeof(u32) * IPR_NUM_CMD_BLKS); + struct ipr_hrr_queue *hrrq; + + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + memset(hrrq->host_rrq, 0, sizeof(u32) * hrrq->size); + + /* Initialize Host RRQ pointers */ + hrrq->hrrq_start = hrrq->host_rrq; + hrrq->hrrq_end = &hrrq->host_rrq[hrrq->size - 1]; + hrrq->hrrq_curr = hrrq->hrrq_start; + hrrq->toggle_bit = 1; + spin_unlock(&hrrq->_lock); + } + wmb(); - /* Initialize Host RRQ pointers */ - ioa_cfg->hrrq_start = ioa_cfg->host_rrq; - ioa_cfg->hrrq_end = &ioa_cfg->host_rrq[IPR_NUM_CMD_BLKS - 1]; - ioa_cfg->hrrq_curr = ioa_cfg->hrrq_start; - ioa_cfg->toggle_bit = 1; + ioa_cfg->identify_hrrq_index = 0; + if (ioa_cfg->hrrq_num == 1) + atomic_set(&ioa_cfg->hrrq_index, 0); + else + atomic_set(&ioa_cfg->hrrq_index, 1); /* Zero out config table */ memset(ioa_cfg->u.cfg_table, 0, ioa_cfg->cfg_table_size); @@ -7446,7 +7809,8 @@ static int ipr_reset_next_stage(struct ipr_cmnd *ipr_cmd) ipr_cmd->timer.function = (void (*)(unsigned long))ipr_oper_timeout; ipr_cmd->done = ipr_reset_ioa_job; add_timer(&ipr_cmd->timer); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); return IPR_RC_JOB_RETURN; } @@ -7466,12 +7830,18 @@ static int ipr_reset_enable_ioa(struct ipr_cmnd *ipr_cmd) struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; volatile u32 int_reg; volatile u64 maskval; + int i; ENTER; ipr_cmd->job_step = ipr_ioafp_identify_hrrq; ipr_init_ioa_mem(ioa_cfg); - ioa_cfg->allow_interrupts = 1; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_interrupts = 1; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); if (ioa_cfg->sis64) { /* Set the adapter to the correct endian mode. */ writel(IPR_ENDIAN_SWAP_KEY, ioa_cfg->regs.endian_swap_reg); @@ -7511,7 +7881,7 @@ static int ipr_reset_enable_ioa(struct ipr_cmnd *ipr_cmd) ipr_cmd->timer.function = (void (*)(unsigned long))ipr_oper_timeout; ipr_cmd->done = ipr_reset_ioa_job; add_timer(&ipr_cmd->timer); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); LEAVE; return IPR_RC_JOB_RETURN; @@ -8030,7 +8400,8 @@ static int ipr_reset_shutdown_ioa(struct ipr_cmnd *ipr_cmd) int rc = IPR_RC_JOB_CONTINUE; ENTER; - if (shutdown_type != IPR_SHUTDOWN_NONE && !ioa_cfg->ioa_is_dead) { + if (shutdown_type != IPR_SHUTDOWN_NONE && + !ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) { ipr_cmd->ioarcb.res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE); ipr_cmd->ioarcb.cmd_pkt.request_type = IPR_RQTYPE_IOACMD; ipr_cmd->ioarcb.cmd_pkt.cdb[0] = IPR_IOA_SHUTDOWN; @@ -8078,7 +8449,8 @@ static void ipr_reset_ioa_job(struct ipr_cmnd *ipr_cmd) * We are doing nested adapter resets and this is * not the current reset job. */ - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, + &ipr_cmd->hrrq->hrrq_free_q); return; } @@ -8113,9 +8485,15 @@ static void _ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, enum ipr_shutdown_type shutdown_type) { struct ipr_cmnd *ipr_cmd; + int i; ioa_cfg->in_reset_reload = 1; - ioa_cfg->allow_cmds = 0; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_cmds = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); scsi_block_requests(ioa_cfg->host); ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg); @@ -8141,7 +8519,9 @@ static void _ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, enum ipr_shutdown_type shutdown_type) { - if (ioa_cfg->ioa_is_dead) + int i; + + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) return; if (ioa_cfg->in_reset_reload) { @@ -8156,7 +8536,12 @@ static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, "IOA taken offline - error recovery failed\n"); ioa_cfg->reset_retries = 0; - ioa_cfg->ioa_is_dead = 1; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].ioa_is_dead = 1; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); if (ioa_cfg->in_ioa_bringdown) { ioa_cfg->reset_cmd = NULL; @@ -8188,9 +8573,17 @@ static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, */ static int ipr_reset_freeze(struct ipr_cmnd *ipr_cmd) { + struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; + int i; + /* Disallow new interrupts, avoid loop */ - ipr_cmd->ioa_cfg->allow_interrupts = 0; - list_add_tail(&ipr_cmd->queue, &ipr_cmd->ioa_cfg->pending_q); + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_interrupts = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); ipr_cmd->done = ipr_reset_ioa_job; return IPR_RC_JOB_RETURN; } @@ -8247,13 +8640,19 @@ static void ipr_pci_perm_failure(struct pci_dev *pdev) { unsigned long flags = 0; struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev); + int i; spin_lock_irqsave(ioa_cfg->host->host_lock, flags); if (ioa_cfg->sdt_state == WAIT_FOR_DUMP) ioa_cfg->sdt_state = ABORT_DUMP; ioa_cfg->reset_retries = IPR_NUM_RESET_RELOAD_RETRIES; ioa_cfg->in_ioa_bringdown = 1; - ioa_cfg->allow_cmds = 0; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_cmds = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE); spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); } @@ -8310,12 +8709,11 @@ static int ipr_probe_ioa_part2(struct ipr_ioa_cfg *ioa_cfg) } else _ipr_initiate_ioa_reset(ioa_cfg, ipr_reset_enable_ioa, IPR_SHUTDOWN_NONE); - spin_unlock_irqrestore(ioa_cfg->host->host_lock, host_lock_flags); wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); spin_lock_irqsave(ioa_cfg->host->host_lock, host_lock_flags); - if (ioa_cfg->ioa_is_dead) { + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) { rc = -EIO; } else if (ipr_invalid_adapter(ioa_cfg)) { if (!ipr_testmode) @@ -8376,8 +8774,13 @@ static void ipr_free_mem(struct ipr_ioa_cfg *ioa_cfg) pci_free_consistent(ioa_cfg->pdev, sizeof(struct ipr_misc_cbs), ioa_cfg->vpd_cbs, ioa_cfg->vpd_cbs_dma); ipr_free_cmd_blks(ioa_cfg); - pci_free_consistent(ioa_cfg->pdev, sizeof(u32) * IPR_NUM_CMD_BLKS, - ioa_cfg->host_rrq, ioa_cfg->host_rrq_dma); + + for (i = 0; i < ioa_cfg->hrrq_num; i++) + pci_free_consistent(ioa_cfg->pdev, + sizeof(u32) * ioa_cfg->hrrq[i].size, + ioa_cfg->hrrq[i].host_rrq, + ioa_cfg->hrrq[i].host_rrq_dma); + pci_free_consistent(ioa_cfg->pdev, ioa_cfg->cfg_table_size, ioa_cfg->u.cfg_table, ioa_cfg->cfg_table_dma); @@ -8408,8 +8811,23 @@ static void ipr_free_all_resources(struct ipr_ioa_cfg *ioa_cfg) struct pci_dev *pdev = ioa_cfg->pdev; ENTER; - free_irq(pdev->irq, ioa_cfg); - pci_disable_msi(pdev); + if (ioa_cfg->intr_flag == IPR_USE_MSI || + ioa_cfg->intr_flag == IPR_USE_MSIX) { + int i; + for (i = 0; i < ioa_cfg->nvectors; i++) + free_irq(ioa_cfg->vectors_info[i].vec, + &ioa_cfg->hrrq[i]); + } else + free_irq(pdev->irq, &ioa_cfg->hrrq[0]); + + if (ioa_cfg->intr_flag == IPR_USE_MSI) { + pci_disable_msi(pdev); + ioa_cfg->intr_flag &= ~IPR_USE_MSI; + } else if (ioa_cfg->intr_flag == IPR_USE_MSIX) { + pci_disable_msix(pdev); + ioa_cfg->intr_flag &= ~IPR_USE_MSIX; + } + iounmap(ioa_cfg->hdw_dma_regs); pci_release_regions(pdev); ipr_free_mem(ioa_cfg); @@ -8430,7 +8848,7 @@ static int ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg) struct ipr_cmnd *ipr_cmd; struct ipr_ioarcb *ioarcb; dma_addr_t dma_addr; - int i; + int i, entries_each_hrrq, hrrq_id = 0; ioa_cfg->ipr_cmd_pool = pci_pool_create(IPR_NAME, ioa_cfg->pdev, sizeof(struct ipr_cmnd), 512, 0); @@ -8446,6 +8864,41 @@ static int ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg) return -ENOMEM; } + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + if (ioa_cfg->hrrq_num > 1) { + if (i == 0) { + entries_each_hrrq = IPR_NUM_INTERNAL_CMD_BLKS; + ioa_cfg->hrrq[i].min_cmd_id = 0; + ioa_cfg->hrrq[i].max_cmd_id = + (entries_each_hrrq - 1); + } else { + entries_each_hrrq = + IPR_NUM_BASE_CMD_BLKS/ + (ioa_cfg->hrrq_num - 1); + ioa_cfg->hrrq[i].min_cmd_id = + IPR_NUM_INTERNAL_CMD_BLKS + + (i - 1) * entries_each_hrrq; + ioa_cfg->hrrq[i].max_cmd_id = + (IPR_NUM_INTERNAL_CMD_BLKS + + i * entries_each_hrrq - 1); + } + } else { + entries_each_hrrq = IPR_NUM_CMD_BLKS; + ioa_cfg->hrrq[i].min_cmd_id = 0; + ioa_cfg->hrrq[i].max_cmd_id = (entries_each_hrrq - 1); + } + ioa_cfg->hrrq[i].size = entries_each_hrrq; + } + + BUG_ON(ioa_cfg->hrrq_num == 0); + + i = IPR_NUM_CMD_BLKS - + ioa_cfg->hrrq[ioa_cfg->hrrq_num - 1].max_cmd_id - 1; + if (i > 0) { + ioa_cfg->hrrq[ioa_cfg->hrrq_num - 1].size += i; + ioa_cfg->hrrq[ioa_cfg->hrrq_num - 1].max_cmd_id += i; + } + for (i = 0; i < IPR_NUM_CMD_BLKS; i++) { ipr_cmd = pci_pool_alloc(ioa_cfg->ipr_cmd_pool, GFP_KERNEL, &dma_addr); @@ -8484,7 +8937,11 @@ static int ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg) ipr_cmd->sense_buffer_dma = dma_addr + offsetof(struct ipr_cmnd, sense_buffer); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + ipr_cmd->ioarcb.cmd_pkt.hrrq_id = hrrq_id; + ipr_cmd->hrrq = &ioa_cfg->hrrq[hrrq_id]; + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); + if (i >= ioa_cfg->hrrq[hrrq_id].max_cmd_id) + hrrq_id++; } return 0; @@ -8516,6 +8973,10 @@ static int ipr_alloc_mem(struct ipr_ioa_cfg *ioa_cfg) BITS_TO_LONGS(ioa_cfg->max_devs_supported), GFP_KERNEL); ioa_cfg->vset_ids = kzalloc(sizeof(unsigned long) * BITS_TO_LONGS(ioa_cfg->max_devs_supported), GFP_KERNEL); + + if (!ioa_cfg->target_ids || !ioa_cfg->array_ids + || !ioa_cfg->vset_ids) + goto out_free_res_entries; } for (i = 0; i < ioa_cfg->max_devs_supported; i++) { @@ -8530,15 +8991,34 @@ static int ipr_alloc_mem(struct ipr_ioa_cfg *ioa_cfg) if (!ioa_cfg->vpd_cbs) goto out_free_res_entries; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + INIT_LIST_HEAD(&ioa_cfg->hrrq[i].hrrq_free_q); + INIT_LIST_HEAD(&ioa_cfg->hrrq[i].hrrq_pending_q); + spin_lock_init(&ioa_cfg->hrrq[i]._lock); + if (i == 0) + ioa_cfg->hrrq[i].lock = ioa_cfg->host->host_lock; + else + ioa_cfg->hrrq[i].lock = &ioa_cfg->hrrq[i]._lock; + } + if (ipr_alloc_cmd_blks(ioa_cfg)) goto out_free_vpd_cbs; - ioa_cfg->host_rrq = pci_alloc_consistent(ioa_cfg->pdev, - sizeof(u32) * IPR_NUM_CMD_BLKS, - &ioa_cfg->host_rrq_dma); - - if (!ioa_cfg->host_rrq) - goto out_ipr_free_cmd_blocks; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + ioa_cfg->hrrq[i].host_rrq = pci_alloc_consistent(ioa_cfg->pdev, + sizeof(u32) * ioa_cfg->hrrq[i].size, + &ioa_cfg->hrrq[i].host_rrq_dma); + + if (!ioa_cfg->hrrq[i].host_rrq) { + while (--i > 0) + pci_free_consistent(pdev, + sizeof(u32) * ioa_cfg->hrrq[i].size, + ioa_cfg->hrrq[i].host_rrq, + ioa_cfg->hrrq[i].host_rrq_dma); + goto out_ipr_free_cmd_blocks; + } + ioa_cfg->hrrq[i].ioa_cfg = ioa_cfg; + } ioa_cfg->u.cfg_table = pci_alloc_consistent(ioa_cfg->pdev, ioa_cfg->cfg_table_size, @@ -8582,8 +9062,12 @@ out_free_hostrcb_dma: ioa_cfg->u.cfg_table, ioa_cfg->cfg_table_dma); out_free_host_rrq: - pci_free_consistent(pdev, sizeof(u32) * IPR_NUM_CMD_BLKS, - ioa_cfg->host_rrq, ioa_cfg->host_rrq_dma); + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + pci_free_consistent(pdev, + sizeof(u32) * ioa_cfg->hrrq[i].size, + ioa_cfg->hrrq[i].host_rrq, + ioa_cfg->hrrq[i].host_rrq_dma); + } out_ipr_free_cmd_blocks: ipr_free_cmd_blks(ioa_cfg); out_free_vpd_cbs: @@ -8591,6 +9075,9 @@ out_free_vpd_cbs: ioa_cfg->vpd_cbs, ioa_cfg->vpd_cbs_dma); out_free_res_entries: kfree(ioa_cfg->res_entries); + kfree(ioa_cfg->target_ids); + kfree(ioa_cfg->array_ids); + kfree(ioa_cfg->vset_ids); goto out; } @@ -8638,15 +9125,11 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg, ioa_cfg->doorbell = IPR_DOORBELL; sprintf(ioa_cfg->eye_catcher, IPR_EYECATCHER); sprintf(ioa_cfg->trace_start, IPR_TRACE_START_LABEL); - sprintf(ioa_cfg->ipr_free_label, IPR_FREEQ_LABEL); - sprintf(ioa_cfg->ipr_pending_label, IPR_PENDQ_LABEL); sprintf(ioa_cfg->cfg_table_start, IPR_CFG_TBL_START); sprintf(ioa_cfg->resource_table_label, IPR_RES_TABLE_LABEL); sprintf(ioa_cfg->ipr_hcam_label, IPR_HCAM_LABEL); sprintf(ioa_cfg->ipr_cmd_label, IPR_CMD_LABEL); - INIT_LIST_HEAD(&ioa_cfg->free_q); - INIT_LIST_HEAD(&ioa_cfg->pending_q); INIT_LIST_HEAD(&ioa_cfg->hostrcb_free_q); INIT_LIST_HEAD(&ioa_cfg->hostrcb_pending_q); INIT_LIST_HEAD(&ioa_cfg->free_res_q); @@ -8724,6 +9207,88 @@ ipr_get_chip_info(const struct pci_device_id *dev_id) return NULL; } +static int ipr_enable_msix(struct ipr_ioa_cfg *ioa_cfg) +{ + struct msix_entry entries[IPR_MAX_MSIX_VECTORS]; + int i, err, vectors; + + for (i = 0; i < ARRAY_SIZE(entries); ++i) + entries[i].entry = i; + + vectors = ipr_number_of_msix; + + while ((err = pci_enable_msix(ioa_cfg->pdev, entries, vectors)) > 0) + vectors = err; + + if (err < 0) { + pci_disable_msix(ioa_cfg->pdev); + return err; + } + + if (!err) { + for (i = 0; i < vectors; i++) + ioa_cfg->vectors_info[i].vec = entries[i].vector; + ioa_cfg->nvectors = vectors; + } + + return err; +} + +static int ipr_enable_msi(struct ipr_ioa_cfg *ioa_cfg) +{ + int i, err, vectors; + + vectors = ipr_number_of_msix; + + while ((err = pci_enable_msi_block(ioa_cfg->pdev, vectors)) > 0) + vectors = err; + + if (err < 0) { + pci_disable_msi(ioa_cfg->pdev); + return err; + } + + if (!err) { + for (i = 0; i < vectors; i++) + ioa_cfg->vectors_info[i].vec = ioa_cfg->pdev->irq + i; + ioa_cfg->nvectors = vectors; + } + + return err; +} + +static void name_msi_vectors(struct ipr_ioa_cfg *ioa_cfg) +{ + int vec_idx, n = sizeof(ioa_cfg->vectors_info[0].desc) - 1; + + for (vec_idx = 0; vec_idx < ioa_cfg->nvectors; vec_idx++) { + snprintf(ioa_cfg->vectors_info[vec_idx].desc, n, + "host%d-%d", ioa_cfg->host->host_no, vec_idx); + ioa_cfg->vectors_info[vec_idx]. + desc[strlen(ioa_cfg->vectors_info[vec_idx].desc)] = 0; + } +} + +static int ipr_request_other_msi_irqs(struct ipr_ioa_cfg *ioa_cfg) +{ + int i, rc; + + for (i = 1; i < ioa_cfg->nvectors; i++) { + rc = request_irq(ioa_cfg->vectors_info[i].vec, + ipr_isr_mhrrq, + 0, + ioa_cfg->vectors_info[i].desc, + &ioa_cfg->hrrq[i]); + if (rc) { + while (--i >= 0) + free_irq(ioa_cfg->vectors_info[i].vec, + &ioa_cfg->hrrq[i]); + return rc; + } + } + return 0; +} + /** * ipr_test_intr - Handle the interrupt generated in ipr_test_msi(). * @pdev: PCI device struct @@ -8740,6 +9305,7 @@ static irqreturn_t ipr_test_intr(int irq, void *devp) unsigned long lock_flags = 0; irqreturn_t rc = IRQ_HANDLED; + dev_info(&ioa_cfg->pdev->dev, "Received IRQ : %d\n", irq); spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); ioa_cfg->msi_received = 1; @@ -8787,9 +9353,9 @@ static int ipr_test_msi(struct ipr_ioa_cfg *ioa_cfg, struct pci_dev *pdev) writel(IPR_PCII_IO_DEBUG_ACKNOWLEDGE, ioa_cfg->regs.sense_interrupt_reg32); int_reg = readl(ioa_cfg->regs.sense_interrupt_reg); wait_event_timeout(ioa_cfg->msi_wait_q, ioa_cfg->msi_received, HZ); + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); ipr_mask_and_clear_interrupts(ioa_cfg, ~IPR_PCII_IOA_TRANS_TO_OPER); - spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); if (!ioa_cfg->msi_received) { /* MSI test failed */ dev_info(&pdev->dev, "MSI test failed. Falling back to LSI.\n"); @@ -8806,8 +9372,7 @@ static int ipr_test_msi(struct ipr_ioa_cfg *ioa_cfg, struct pci_dev *pdev) return rc; } -/** - * ipr_probe_ioa - Allocates memory and does first stage of initialization + /* ipr_probe_ioa - Allocates memory and does first stage of initialization * @pdev: PCI device struct * @dev_id: PCI device id struct * @@ -8823,6 +9388,7 @@ static int ipr_probe_ioa(struct pci_dev *pdev, void __iomem *ipr_regs; int rc = PCIBIOS_SUCCESSFUL; volatile u32 mask, uproc, interrupts; + unsigned long lock_flags; ENTER; @@ -8918,17 +9484,56 @@ static int ipr_probe_ioa(struct pci_dev *pdev, goto cleanup_nomem; } - /* Enable MSI style interrupts if they are supported. */ - if (ioa_cfg->ipr_chip->intr_type == IPR_USE_MSI && !pci_enable_msi(pdev)) { + if (ipr_number_of_msix > IPR_MAX_MSIX_VECTORS) { + dev_err(&pdev->dev, "The max number of MSIX is %d\n", + IPR_MAX_MSIX_VECTORS); + ipr_number_of_msix = IPR_MAX_MSIX_VECTORS; + } + + if (ioa_cfg->ipr_chip->intr_type == IPR_USE_MSI && + ipr_enable_msix(ioa_cfg) == 0) + ioa_cfg->intr_flag = IPR_USE_MSIX; + else if (ioa_cfg->ipr_chip->intr_type == IPR_USE_MSI && + ipr_enable_msi(ioa_cfg) == 0) + ioa_cfg->intr_flag = IPR_USE_MSI; + else { + ioa_cfg->intr_flag = IPR_USE_LSI; + ioa_cfg->nvectors = 1; + dev_info(&pdev->dev, "Cannot enable MSI.\n"); + } + + if (ioa_cfg->intr_flag == IPR_USE_MSI || + ioa_cfg->intr_flag == IPR_USE_MSIX) { rc = ipr_test_msi(ioa_cfg, pdev); - if (rc == -EOPNOTSUPP) - pci_disable_msi(pdev); + if (rc == -EOPNOTSUPP) { + if (ioa_cfg->intr_flag == IPR_USE_MSI) { + ioa_cfg->intr_flag &= ~IPR_USE_MSI; + pci_disable_msi(pdev); + } else if (ioa_cfg->intr_flag == IPR_USE_MSIX) { + ioa_cfg->intr_flag &= ~IPR_USE_MSIX; + pci_disable_msix(pdev); + } + + ioa_cfg->intr_flag = IPR_USE_LSI; + ioa_cfg->nvectors = 1; + } else if (rc) goto out_msi_disable; - else - dev_info(&pdev->dev, "MSI enabled with IRQ: %d\n", pdev->irq); - } else if (ipr_debug) - dev_info(&pdev->dev, "Cannot enable MSI.\n"); + else { + if (ioa_cfg->intr_flag == IPR_USE_MSI) + dev_info(&pdev->dev, + "Request for %d MSIs succeeded with starting IRQ: %d\n", + ioa_cfg->nvectors, pdev->irq); + else if (ioa_cfg->intr_flag == IPR_USE_MSIX) + dev_info(&pdev->dev, + "Request for %d MSIXs succeeded.", + ioa_cfg->nvectors); + } + } + + ioa_cfg->hrrq_num = min3(ioa_cfg->nvectors, + (unsigned int)num_online_cpus(), + (unsigned int)IPR_MAX_HRRQ_NUM); /* Save away PCI config space for use following IOA reset */ rc = pci_save_state(pdev); @@ -8975,11 +9580,24 @@ static int ipr_probe_ioa(struct pci_dev *pdev, if (interrupts & IPR_PCII_IOA_UNIT_CHECKED) ioa_cfg->ioa_unit_checked = 1; + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); ipr_mask_and_clear_interrupts(ioa_cfg, ~IPR_PCII_IOA_TRANS_TO_OPER); - rc = request_irq(pdev->irq, ipr_isr, - ioa_cfg->msi_received ? 0 : IRQF_SHARED, - IPR_NAME, ioa_cfg); + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + if (ioa_cfg->intr_flag == IPR_USE_MSI + || ioa_cfg->intr_flag == IPR_USE_MSIX) { + name_msi_vectors(ioa_cfg); + rc = request_irq(ioa_cfg->vectors_info[0].vec, ipr_isr, + 0, + ioa_cfg->vectors_info[0].desc, + &ioa_cfg->hrrq[0]); + if (!rc) + rc = ipr_request_other_msi_irqs(ioa_cfg); + } else { + rc = request_irq(pdev->irq, ipr_isr, + IRQF_SHARED, + IPR_NAME, &ioa_cfg->hrrq[0]); + } if (rc) { dev_err(&pdev->dev, "Couldn't register IRQ %d! rc=%d\n", pdev->irq, rc); @@ -9004,7 +9622,10 @@ out: cleanup_nolog: ipr_free_mem(ioa_cfg); out_msi_disable: - pci_disable_msi(pdev); + if (ioa_cfg->intr_flag == IPR_USE_MSI) + pci_disable_msi(pdev); + else if (ioa_cfg->intr_flag == IPR_USE_MSIX) + pci_disable_msix(pdev); cleanup_nomem: iounmap(ipr_regs); out_release_regions: @@ -9138,7 +9759,7 @@ static void ipr_remove(struct pci_dev *pdev) static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) { struct ipr_ioa_cfg *ioa_cfg; - int rc; + int rc, i; rc = ipr_probe_ioa(pdev, dev_id); @@ -9185,6 +9806,17 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) scsi_add_device(ioa_cfg->host, IPR_IOA_BUS, IPR_IOA_TARGET, IPR_IOA_LUN); ioa_cfg->allow_ml_add_del = 1; ioa_cfg->host->max_channel = IPR_VSET_BUS; + ioa_cfg->iopoll_weight = ioa_cfg->chip_cfg->iopoll_weight; + + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + for (i = 1; i < ioa_cfg->hrrq_num; i++) { + blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll, + ioa_cfg->iopoll_weight, ipr_iopoll); + blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll); + } + } + schedule_work(&ioa_cfg->work_q); return 0; } @@ -9203,8 +9835,16 @@ static void ipr_shutdown(struct pci_dev *pdev) { struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev); unsigned long lock_flags = 0; + int i; spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + ioa_cfg->iopoll_weight = 0; + for (i = 1; i < ioa_cfg->hrrq_num; i++) + blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll); + } + while (ioa_cfg->in_reset_reload) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); @@ -9277,6 +9917,8 @@ static struct pci_device_id ipr_pci_table[] = { { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2, PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57B2, 0, 0, 0 }, { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C0, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2, PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C3, 0, 0, 0 }, { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2, PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C4, 0, 0, 0 }, @@ -9290,6 +9932,14 @@ static struct pci_device_id ipr_pci_table[] = { PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C8, 0, 0, 0 }, { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57CE, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D5, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D6, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D7, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D8, 0, 0, 0 }, { } }; MODULE_DEVICE_TABLE(pci, ipr_pci_table); @@ -9316,9 +9966,7 @@ static struct pci_driver ipr_driver = { **/ static void ipr_halt_done(struct ipr_cmnd *ipr_cmd) { - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; - - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); } /** @@ -9340,7 +9988,7 @@ static int ipr_halt(struct notifier_block *nb, ulong event, void *buf) list_for_each_entry(ioa_cfg, &ipr_ioa_head, queue) { spin_lock_irqsave(ioa_cfg->host->host_lock, flags); - if (!ioa_cfg->allow_cmds) { + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); continue; } diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index c8a137f83bb1..1a9a246932ae 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -32,14 +32,15 @@ #include <linux/libata.h> #include <linux/list.h> #include <linux/kref.h> +#include <linux/blk-iopoll.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> /* * Literals */ -#define IPR_DRIVER_VERSION "2.5.4" -#define IPR_DRIVER_DATE "(July 11, 2012)" +#define IPR_DRIVER_VERSION "2.6.0" +#define IPR_DRIVER_DATE "(November 16, 2012)" /* * IPR_MAX_CMD_PER_LUN: This defines the maximum number of outstanding @@ -82,6 +83,7 @@ #define IPR_SUBS_DEV_ID_57B4 0x033B #define IPR_SUBS_DEV_ID_57B2 0x035F +#define IPR_SUBS_DEV_ID_57C0 0x0352 #define IPR_SUBS_DEV_ID_57C3 0x0353 #define IPR_SUBS_DEV_ID_57C4 0x0354 #define IPR_SUBS_DEV_ID_57C6 0x0357 @@ -94,6 +96,10 @@ #define IPR_SUBS_DEV_ID_574D 0x0356 #define IPR_SUBS_DEV_ID_57C8 0x035D +#define IPR_SUBS_DEV_ID_57D5 0x03FB +#define IPR_SUBS_DEV_ID_57D6 0x03FC +#define IPR_SUBS_DEV_ID_57D7 0x03FF +#define IPR_SUBS_DEV_ID_57D8 0x03FE #define IPR_NAME "ipr" /* @@ -298,6 +304,9 @@ IPR_PCII_NO_HOST_RRQ | IPR_PCII_IOARRIN_LOST | IPR_PCII_MMIO_ERROR) * Misc literals */ #define IPR_NUM_IOADL_ENTRIES IPR_MAX_SGLIST +#define IPR_MAX_MSIX_VECTORS 0x5 +#define IPR_MAX_HRRQ_NUM 0x10 +#define IPR_INIT_HRRQ 0x0 /* * Adapter interface types @@ -404,7 +413,7 @@ struct ipr_config_table_entry64 { __be64 dev_id; __be64 lun; __be64 lun_wwn[2]; -#define IPR_MAX_RES_PATH_LENGTH 24 +#define IPR_MAX_RES_PATH_LENGTH 48 __be64 res_path; struct ipr_std_inq_data std_inq_data; u8 reserved2[4]; @@ -459,9 +468,39 @@ struct ipr_supported_device { u8 reserved2[16]; }__attribute__((packed, aligned (4))); +struct ipr_hrr_queue { + struct ipr_ioa_cfg *ioa_cfg; + __be32 *host_rrq; + dma_addr_t host_rrq_dma; +#define IPR_HRRQ_REQ_RESP_HANDLE_MASK 0xfffffffc +#define IPR_HRRQ_RESP_BIT_SET 0x00000002 +#define IPR_HRRQ_TOGGLE_BIT 0x00000001 +#define IPR_HRRQ_REQ_RESP_HANDLE_SHIFT 2 +#define IPR_ID_HRRQ_SELE_ENABLE 0x02 + volatile __be32 *hrrq_start; + volatile __be32 *hrrq_end; + volatile __be32 *hrrq_curr; + + struct list_head hrrq_free_q; + struct list_head hrrq_pending_q; + spinlock_t _lock; + spinlock_t *lock; + + volatile u32 toggle_bit; + u32 size; + u32 min_cmd_id; + u32 max_cmd_id; + u8 allow_interrupts:1; + u8 ioa_is_dead:1; + u8 allow_cmds:1; + + struct blk_iopoll iopoll; +}; + /* Command packet structure */ struct ipr_cmd_pkt { - __be16 reserved; /* Reserved by IOA */ + u8 reserved; /* Reserved by IOA */ + u8 hrrq_id; u8 request_type; #define IPR_RQTYPE_SCSICDB 0x00 #define IPR_RQTYPE_IOACMD 0x01 @@ -1022,6 +1061,10 @@ struct ipr_hostrcb64_fabric_desc { struct ipr_hostrcb64_config_element elem[1]; }__attribute__((packed, aligned (8))); +#define for_each_hrrq(hrrq, ioa_cfg) \ + for (hrrq = (ioa_cfg)->hrrq; \ + hrrq < ((ioa_cfg)->hrrq + (ioa_cfg)->hrrq_num); hrrq++) + #define for_each_fabric_cfg(fabric, cfg) \ for (cfg = (fabric)->elem; \ cfg < ((fabric)->elem + be16_to_cpu((fabric)->num_entries)); \ @@ -1308,6 +1351,7 @@ struct ipr_chip_cfg_t { u16 max_cmds; u8 cache_line_size; u8 clear_isr; + u32 iopoll_weight; struct ipr_interrupt_offsets regs; }; @@ -1317,6 +1361,7 @@ struct ipr_chip_t { u16 intr_type; #define IPR_USE_LSI 0x00 #define IPR_USE_MSI 0x01 +#define IPR_USE_MSIX 0x02 u16 sis_type; #define IPR_SIS32 0x00 #define IPR_SIS64 0x01 @@ -1375,13 +1420,10 @@ struct ipr_ioa_cfg { struct list_head queue; - u8 allow_interrupts:1; u8 in_reset_reload:1; u8 in_ioa_bringdown:1; u8 ioa_unit_checked:1; - u8 ioa_is_dead:1; u8 dump_taken:1; - u8 allow_cmds:1; u8 allow_ml_add_del:1; u8 needs_hard_reset:1; u8 dual_raid:1; @@ -1413,21 +1455,7 @@ struct ipr_ioa_cfg { char trace_start[8]; #define IPR_TRACE_START_LABEL "trace" struct ipr_trace_entry *trace; - u32 trace_index:IPR_NUM_TRACE_INDEX_BITS; - - /* - * Queue for free command blocks - */ - char ipr_free_label[8]; -#define IPR_FREEQ_LABEL "free-q" - struct list_head free_q; - - /* - * Queue for command blocks outstanding to the adapter - */ - char ipr_pending_label[8]; -#define IPR_PENDQ_LABEL "pend-q" - struct list_head pending_q; + atomic_t trace_index; char cfg_table_start[8]; #define IPR_CFG_TBL_START "cfg" @@ -1452,16 +1480,10 @@ struct ipr_ioa_cfg { struct list_head hostrcb_free_q; struct list_head hostrcb_pending_q; - __be32 *host_rrq; - dma_addr_t host_rrq_dma; -#define IPR_HRRQ_REQ_RESP_HANDLE_MASK 0xfffffffc -#define IPR_HRRQ_RESP_BIT_SET 0x00000002 -#define IPR_HRRQ_TOGGLE_BIT 0x00000001 -#define IPR_HRRQ_REQ_RESP_HANDLE_SHIFT 2 - volatile __be32 *hrrq_start; - volatile __be32 *hrrq_end; - volatile __be32 *hrrq_curr; - volatile u32 toggle_bit; + struct ipr_hrr_queue hrrq[IPR_MAX_HRRQ_NUM]; + u32 hrrq_num; + atomic_t hrrq_index; + u16 identify_hrrq_index; struct ipr_bus_attributes bus_attr[IPR_MAX_NUM_BUSES]; @@ -1507,6 +1529,17 @@ struct ipr_ioa_cfg { u32 max_cmds; struct ipr_cmnd **ipr_cmnd_list; dma_addr_t *ipr_cmnd_list_dma; + + u16 intr_flag; + unsigned int nvectors; + + struct { + unsigned short vec; + char desc[22]; + } vectors_info[IPR_MAX_MSIX_VECTORS]; + + u32 iopoll_weight; + }; /* struct ipr_ioa_cfg */ struct ipr_cmnd { @@ -1544,6 +1577,7 @@ struct ipr_cmnd { struct scsi_device *sdev; } u; + struct ipr_hrr_queue *hrrq; struct ipr_ioa_cfg *ioa_cfg; }; @@ -1717,7 +1751,8 @@ struct ipr_ucode_image_header { if (ipr_is_device(hostrcb)) { \ if ((hostrcb)->ioa_cfg->sis64) { \ printk(KERN_ERR IPR_NAME ": %s: " fmt, \ - ipr_format_res_path(hostrcb->hcam.u.error64.fd_res_path, \ + ipr_format_res_path(hostrcb->ioa_cfg, \ + hostrcb->hcam.u.error64.fd_res_path, \ hostrcb->rp_buffer, \ sizeof(hostrcb->rp_buffer)), \ __VA_ARGS__); \ diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index df4c13a5534c..7706c99ec8bb 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -466,11 +466,13 @@ enum intr_type_t { MSIX, }; +#define LPFC_CT_CTX_MAX 64 struct unsol_rcv_ct_ctx { uint32_t ctxt_id; uint32_t SID; - uint32_t flags; -#define UNSOL_VALID 0x00000001 + uint32_t valid; +#define UNSOL_INVALID 0 +#define UNSOL_VALID 1 uint16_t oxid; uint16_t rxid; }; @@ -750,6 +752,15 @@ struct lpfc_hba { void __iomem *ctrl_regs_memmap_p;/* Kernel memory mapped address for PCI BAR2 */ + void __iomem *pci_bar0_memmap_p; /* Kernel memory mapped address for + PCI BAR0 with dual-ULP support */ + void __iomem *pci_bar2_memmap_p; /* Kernel memory mapped address for + PCI BAR2 with dual-ULP support */ + void __iomem *pci_bar4_memmap_p; /* Kernel memory mapped address for + PCI BAR4 with dual-ULP support */ +#define PCI_64BIT_BAR0 0 +#define PCI_64BIT_BAR2 2 +#define PCI_64BIT_BAR4 4 void __iomem *MBslimaddr; /* virtual address for mbox cmds */ void __iomem *HAregaddr; /* virtual address for host attn reg */ void __iomem *CAregaddr; /* virtual address for chip attn reg */ @@ -938,7 +949,7 @@ struct lpfc_hba { spinlock_t ct_ev_lock; /* synchronize access to ct_ev_waiters */ struct list_head ct_ev_waiters; - struct unsol_rcv_ct_ctx ct_ctx[64]; + struct unsol_rcv_ct_ctx ct_ctx[LPFC_CT_CTX_MAX]; uint32_t ctx_idx; uint8_t menlo_flag; /* menlo generic flags */ diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index f7368eb80415..32d5683e6181 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -955,9 +955,9 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, spin_lock_irqsave(&phba->ct_ev_lock, flags); if (phba->sli_rev == LPFC_SLI_REV4) { evt_dat->immed_dat = phba->ctx_idx; - phba->ctx_idx = (phba->ctx_idx + 1) % 64; + phba->ctx_idx = (phba->ctx_idx + 1) % LPFC_CT_CTX_MAX; /* Provide warning for over-run of the ct_ctx array */ - if (phba->ct_ctx[evt_dat->immed_dat].flags & + if (phba->ct_ctx[evt_dat->immed_dat].valid == UNSOL_VALID) lpfc_printf_log(phba, KERN_WARNING, LOG_ELS, "2717 CT context array entry " @@ -973,7 +973,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, piocbq->iocb.unsli3.rcvsli3.ox_id; phba->ct_ctx[evt_dat->immed_dat].SID = piocbq->iocb.un.rcvels.remoteID; - phba->ct_ctx[evt_dat->immed_dat].flags = UNSOL_VALID; + phba->ct_ctx[evt_dat->immed_dat].valid = UNSOL_VALID; } else evt_dat->immed_dat = piocbq->iocb.ulpContext; @@ -1013,6 +1013,47 @@ error_ct_unsol_exit: } /** + * lpfc_bsg_ct_unsol_abort - handler ct abort to management plane + * @phba: Pointer to HBA context object. + * @dmabuf: pointer to a dmabuf that describes the FC sequence + * + * This function handles abort to the CT command toward management plane + * for SLI4 port. + * + * If the pending context of a CT command to management plane present, clears + * such context and returns 1 for handled; otherwise, it returns 0 indicating + * no context exists. + **/ +int +lpfc_bsg_ct_unsol_abort(struct lpfc_hba *phba, struct hbq_dmabuf *dmabuf) +{ + struct fc_frame_header fc_hdr; + struct fc_frame_header *fc_hdr_ptr = &fc_hdr; + int ctx_idx, handled = 0; + uint16_t oxid, rxid; + uint32_t sid; + + memcpy(fc_hdr_ptr, dmabuf->hbuf.virt, sizeof(struct fc_frame_header)); + sid = sli4_sid_from_fc_hdr(fc_hdr_ptr); + oxid = be16_to_cpu(fc_hdr_ptr->fh_ox_id); + rxid = be16_to_cpu(fc_hdr_ptr->fh_rx_id); + + for (ctx_idx = 0; ctx_idx < LPFC_CT_CTX_MAX; ctx_idx++) { + if (phba->ct_ctx[ctx_idx].valid != UNSOL_VALID) + continue; + if (phba->ct_ctx[ctx_idx].rxid != rxid) + continue; + if (phba->ct_ctx[ctx_idx].oxid != oxid) + continue; + if (phba->ct_ctx[ctx_idx].SID != sid) + continue; + phba->ct_ctx[ctx_idx].valid = UNSOL_INVALID; + handled = 1; + } + return handled; +} + +/** * lpfc_bsg_hba_set_event - process a SET_EVENT bsg vendor command * @job: SET_EVENT fc_bsg_job **/ @@ -1318,7 +1359,7 @@ lpfc_issue_ct_rsp(struct lpfc_hba *phba, struct fc_bsg_job *job, uint32_t tag, icmd->ulpClass = CLASS3; if (phba->sli_rev == LPFC_SLI_REV4) { /* Do not issue unsol response if oxid not marked as valid */ - if (!(phba->ct_ctx[tag].flags & UNSOL_VALID)) { + if (phba->ct_ctx[tag].valid != UNSOL_VALID) { rc = IOCB_ERROR; goto issue_ct_rsp_exit; } @@ -1352,7 +1393,7 @@ lpfc_issue_ct_rsp(struct lpfc_hba *phba, struct fc_bsg_job *job, uint32_t tag, phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]; /* The exchange is done, mark the entry as invalid */ - phba->ct_ctx[tag].flags &= ~UNSOL_VALID; + phba->ct_ctx[tag].valid = UNSOL_INVALID; } else icmd->ulpContext = (ushort) tag; diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 69d66e3662cb..76ca65dae781 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -164,8 +164,7 @@ void lpfc_hb_timeout_handler(struct lpfc_hba *); void lpfc_ct_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *); -void lpfc_sli4_ct_abort_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *, - struct lpfc_iocbq *); +int lpfc_ct_handle_unsol_abort(struct lpfc_hba *, struct hbq_dmabuf *); int lpfc_ns_cmd(struct lpfc_vport *, int, uint8_t, uint32_t); int lpfc_fdmi_cmd(struct lpfc_vport *, struct lpfc_nodelist *, int); void lpfc_fdmi_tmo(unsigned long); @@ -427,6 +426,7 @@ int lpfc_bsg_request(struct fc_bsg_job *); int lpfc_bsg_timeout(struct fc_bsg_job *); int lpfc_bsg_ct_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *); +int lpfc_bsg_ct_unsol_abort(struct lpfc_hba *, struct hbq_dmabuf *); void __lpfc_sli_ringtx_put(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *); struct lpfc_iocbq *lpfc_sli_ringtx_get(struct lpfc_hba *, diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index 65f9fb6862e6..7bff3a19af56 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -164,37 +164,24 @@ lpfc_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, } /** - * lpfc_sli4_ct_abort_unsol_event - Default handle for sli4 unsol abort + * lpfc_ct_handle_unsol_abort - ct upper level protocol abort handler * @phba: Pointer to HBA context object. - * @pring: Pointer to the driver internal I/O ring. - * @piocbq: Pointer to the IOCBQ. + * @dmabuf: pointer to a dmabuf that describes the FC sequence * - * This function serves as the default handler for the sli4 unsolicited - * abort event. It shall be invoked when there is no application interface - * registered unsolicited abort handler. This handler does nothing but - * just simply releases the dma buffer used by the unsol abort event. + * This function serves as the upper level protocol abort handler for CT + * protocol. + * + * Return 1 if abort has been handled, 0 otherwise. **/ -void -lpfc_sli4_ct_abort_unsol_event(struct lpfc_hba *phba, - struct lpfc_sli_ring *pring, - struct lpfc_iocbq *piocbq) +int +lpfc_ct_handle_unsol_abort(struct lpfc_hba *phba, struct hbq_dmabuf *dmabuf) { - IOCB_t *icmd = &piocbq->iocb; - struct lpfc_dmabuf *bdeBuf; - uint32_t size; + int handled; - /* Forward abort event to any process registered to receive ct event */ - if (lpfc_bsg_ct_unsol_event(phba, pring, piocbq) == 0) - return; + /* CT upper level goes through BSG */ + handled = lpfc_bsg_ct_unsol_abort(phba, dmabuf); - /* If there is no BDE associated with IOCB, there is nothing to do */ - if (icmd->ulpBdeCount == 0) - return; - bdeBuf = piocbq->context2; - piocbq->context2 = NULL; - size = icmd->un.cont64[0].tus.f.bdeSize; - lpfc_ct_unsol_buffer(phba, piocbq, bdeBuf, size); - lpfc_in_buf_free(phba, bdeBuf); + return handled; } static void diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index b9440deaad45..08d156a9094f 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -3122,6 +3122,13 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, case IOERR_SEQUENCE_TIMEOUT: case IOERR_INVALID_RPI: + if (cmd == ELS_CMD_PLOGI && + did == NameServer_DID) { + /* Continue forever if plogi to */ + /* the nameserver fails */ + maxretry = 0; + delay = 100; + } retry = 1; break; } @@ -6517,7 +6524,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, struct lpfc_nodelist *ndlp; struct ls_rjt stat; uint32_t *payload; - uint32_t cmd, did, newnode, rjt_err = 0; + uint32_t cmd, did, newnode; + uint8_t rjt_exp, rjt_err = 0; IOCB_t *icmd = &elsiocb->iocb; if (!vport || !(elsiocb->context2)) @@ -6606,12 +6614,14 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, /* If Nport discovery is delayed, reject PLOGIs */ if (vport->fc_flag & FC_DISC_DELAYED) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } if (vport->port_state < LPFC_DISC_AUTH) { if (!(phba->pport->fc_flag & FC_PT2PT) || (phba->pport->fc_flag & FC_PT2PT_PLOGI)) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } /* We get here, and drop thru, if we are PT2PT with @@ -6648,6 +6658,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, lpfc_send_els_event(vport, ndlp, payload); if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_LOGO); @@ -6661,6 +6672,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, lpfc_send_els_event(vport, ndlp, payload); if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_PRLO); @@ -6680,6 +6692,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, phba->fc_stat.elsRcvADISC++; if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, @@ -6693,6 +6706,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, phba->fc_stat.elsRcvPDISC++; if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, @@ -6730,6 +6744,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, phba->fc_stat.elsRcvPRLI++; if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_PRLI); @@ -6813,6 +6828,11 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, if (newnode) lpfc_nlp_put(ndlp); break; + case ELS_CMD_REC: + /* receive this due to exchange closed */ + rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_INVALID_OX_RX; + break; default: lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, "RCV ELS cmd: cmd:x%x did:x%x/ste:x%x", @@ -6820,6 +6840,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, /* Unsupported ELS command, reject */ rjt_err = LSRJT_CMD_UNSUPPORTED; + rjt_exp = LSEXP_NOTHING_MORE; /* Unknown ELS command <elsCmd> received from NPORT <did> */ lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS, @@ -6834,7 +6855,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, if (rjt_err) { memset(&stat, 0, sizeof(stat)); stat.un.b.lsRjtRsnCode = rjt_err; - stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE; + stat.un.b.lsRjtRsnCodeExp = rjt_exp; lpfc_els_rsp_reject(vport, stat.un.lsRjtError, elsiocb, ndlp, NULL); } diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h index 7398ca862e97..e8c476031703 100644 --- a/drivers/scsi/lpfc/lpfc_hw.h +++ b/drivers/scsi/lpfc/lpfc_hw.h @@ -538,6 +538,7 @@ struct fc_vft_header { #define ELS_CMD_ECHO 0x10000000 #define ELS_CMD_TEST 0x11000000 #define ELS_CMD_RRQ 0x12000000 +#define ELS_CMD_REC 0x13000000 #define ELS_CMD_PRLI 0x20100014 #define ELS_CMD_PRLO 0x21100014 #define ELS_CMD_PRLO_ACC 0x02100014 @@ -574,6 +575,7 @@ struct fc_vft_header { #define ELS_CMD_ECHO 0x10 #define ELS_CMD_TEST 0x11 #define ELS_CMD_RRQ 0x12 +#define ELS_CMD_REC 0x13 #define ELS_CMD_PRLI 0x14001020 #define ELS_CMD_PRLO 0x14001021 #define ELS_CMD_PRLO_ACC 0x14001002 diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index a47cfbdd05f2..6e93b886cd4d 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -106,6 +106,7 @@ struct lpfc_sli_intf { #define LPFC_SLI4_MB_WORD_COUNT 64 #define LPFC_MAX_MQ_PAGE 8 +#define LPFC_MAX_WQ_PAGE_V0 4 #define LPFC_MAX_WQ_PAGE 8 #define LPFC_MAX_CQ_PAGE 4 #define LPFC_MAX_EQ_PAGE 8 @@ -703,24 +704,41 @@ struct lpfc_register { * BAR0. The offsets are the same so the driver must account for * any base address difference. */ -#define LPFC_RQ_DOORBELL 0x00A0 -#define lpfc_rq_doorbell_num_posted_SHIFT 16 -#define lpfc_rq_doorbell_num_posted_MASK 0x3FFF -#define lpfc_rq_doorbell_num_posted_WORD word0 -#define lpfc_rq_doorbell_id_SHIFT 0 -#define lpfc_rq_doorbell_id_MASK 0xFFFF -#define lpfc_rq_doorbell_id_WORD word0 - -#define LPFC_WQ_DOORBELL 0x0040 -#define lpfc_wq_doorbell_num_posted_SHIFT 24 -#define lpfc_wq_doorbell_num_posted_MASK 0x00FF -#define lpfc_wq_doorbell_num_posted_WORD word0 -#define lpfc_wq_doorbell_index_SHIFT 16 -#define lpfc_wq_doorbell_index_MASK 0x00FF -#define lpfc_wq_doorbell_index_WORD word0 -#define lpfc_wq_doorbell_id_SHIFT 0 -#define lpfc_wq_doorbell_id_MASK 0xFFFF -#define lpfc_wq_doorbell_id_WORD word0 +#define LPFC_ULP0_RQ_DOORBELL 0x00A0 +#define LPFC_ULP1_RQ_DOORBELL 0x00C0 +#define lpfc_rq_db_list_fm_num_posted_SHIFT 24 +#define lpfc_rq_db_list_fm_num_posted_MASK 0x00FF +#define lpfc_rq_db_list_fm_num_posted_WORD word0 +#define lpfc_rq_db_list_fm_index_SHIFT 16 +#define lpfc_rq_db_list_fm_index_MASK 0x00FF +#define lpfc_rq_db_list_fm_index_WORD word0 +#define lpfc_rq_db_list_fm_id_SHIFT 0 +#define lpfc_rq_db_list_fm_id_MASK 0xFFFF +#define lpfc_rq_db_list_fm_id_WORD word0 +#define lpfc_rq_db_ring_fm_num_posted_SHIFT 16 +#define lpfc_rq_db_ring_fm_num_posted_MASK 0x3FFF +#define lpfc_rq_db_ring_fm_num_posted_WORD word0 +#define lpfc_rq_db_ring_fm_id_SHIFT 0 +#define lpfc_rq_db_ring_fm_id_MASK 0xFFFF +#define lpfc_rq_db_ring_fm_id_WORD word0 + +#define LPFC_ULP0_WQ_DOORBELL 0x0040 +#define LPFC_ULP1_WQ_DOORBELL 0x0060 +#define lpfc_wq_db_list_fm_num_posted_SHIFT 24 +#define lpfc_wq_db_list_fm_num_posted_MASK 0x00FF +#define lpfc_wq_db_list_fm_num_posted_WORD word0 +#define lpfc_wq_db_list_fm_index_SHIFT 16 +#define lpfc_wq_db_list_fm_index_MASK 0x00FF +#define lpfc_wq_db_list_fm_index_WORD word0 +#define lpfc_wq_db_list_fm_id_SHIFT 0 +#define lpfc_wq_db_list_fm_id_MASK 0xFFFF +#define lpfc_wq_db_list_fm_id_WORD word0 +#define lpfc_wq_db_ring_fm_num_posted_SHIFT 16 +#define lpfc_wq_db_ring_fm_num_posted_MASK 0x3FFF +#define lpfc_wq_db_ring_fm_num_posted_WORD word0 +#define lpfc_wq_db_ring_fm_id_SHIFT 0 +#define lpfc_wq_db_ring_fm_id_MASK 0xFFFF +#define lpfc_wq_db_ring_fm_id_WORD word0 #define LPFC_EQCQ_DOORBELL 0x0120 #define lpfc_eqcq_doorbell_se_SHIFT 31 @@ -1131,12 +1149,22 @@ struct lpfc_mbx_wq_create { struct { /* Version 0 Request */ uint32_t word0; #define lpfc_mbx_wq_create_num_pages_SHIFT 0 -#define lpfc_mbx_wq_create_num_pages_MASK 0x0000FFFF +#define lpfc_mbx_wq_create_num_pages_MASK 0x000000FF #define lpfc_mbx_wq_create_num_pages_WORD word0 +#define lpfc_mbx_wq_create_dua_SHIFT 8 +#define lpfc_mbx_wq_create_dua_MASK 0x00000001 +#define lpfc_mbx_wq_create_dua_WORD word0 #define lpfc_mbx_wq_create_cq_id_SHIFT 16 #define lpfc_mbx_wq_create_cq_id_MASK 0x0000FFFF #define lpfc_mbx_wq_create_cq_id_WORD word0 - struct dma_address page[LPFC_MAX_WQ_PAGE]; + struct dma_address page[LPFC_MAX_WQ_PAGE_V0]; + uint32_t word9; +#define lpfc_mbx_wq_create_bua_SHIFT 0 +#define lpfc_mbx_wq_create_bua_MASK 0x00000001 +#define lpfc_mbx_wq_create_bua_WORD word9 +#define lpfc_mbx_wq_create_ulp_num_SHIFT 8 +#define lpfc_mbx_wq_create_ulp_num_MASK 0x000000FF +#define lpfc_mbx_wq_create_ulp_num_WORD word9 } request; struct { /* Version 1 Request */ uint32_t word0; /* Word 0 is the same as in v0 */ @@ -1160,6 +1188,17 @@ struct lpfc_mbx_wq_create { #define lpfc_mbx_wq_create_q_id_SHIFT 0 #define lpfc_mbx_wq_create_q_id_MASK 0x0000FFFF #define lpfc_mbx_wq_create_q_id_WORD word0 + uint32_t doorbell_offset; + uint32_t word2; +#define lpfc_mbx_wq_create_bar_set_SHIFT 0 +#define lpfc_mbx_wq_create_bar_set_MASK 0x0000FFFF +#define lpfc_mbx_wq_create_bar_set_WORD word2 +#define WQ_PCI_BAR_0_AND_1 0x00 +#define WQ_PCI_BAR_2_AND_3 0x01 +#define WQ_PCI_BAR_4_AND_5 0x02 +#define lpfc_mbx_wq_create_db_format_SHIFT 16 +#define lpfc_mbx_wq_create_db_format_MASK 0x0000FFFF +#define lpfc_mbx_wq_create_db_format_WORD word2 } response; } u; }; @@ -1223,14 +1262,31 @@ struct lpfc_mbx_rq_create { #define lpfc_mbx_rq_create_num_pages_SHIFT 0 #define lpfc_mbx_rq_create_num_pages_MASK 0x0000FFFF #define lpfc_mbx_rq_create_num_pages_WORD word0 +#define lpfc_mbx_rq_create_dua_SHIFT 16 +#define lpfc_mbx_rq_create_dua_MASK 0x00000001 +#define lpfc_mbx_rq_create_dua_WORD word0 +#define lpfc_mbx_rq_create_bqu_SHIFT 17 +#define lpfc_mbx_rq_create_bqu_MASK 0x00000001 +#define lpfc_mbx_rq_create_bqu_WORD word0 +#define lpfc_mbx_rq_create_ulp_num_SHIFT 24 +#define lpfc_mbx_rq_create_ulp_num_MASK 0x000000FF +#define lpfc_mbx_rq_create_ulp_num_WORD word0 struct rq_context context; struct dma_address page[LPFC_MAX_WQ_PAGE]; } request; struct { uint32_t word0; -#define lpfc_mbx_rq_create_q_id_SHIFT 0 -#define lpfc_mbx_rq_create_q_id_MASK 0x0000FFFF -#define lpfc_mbx_rq_create_q_id_WORD word0 +#define lpfc_mbx_rq_create_q_id_SHIFT 0 +#define lpfc_mbx_rq_create_q_id_MASK 0x0000FFFF +#define lpfc_mbx_rq_create_q_id_WORD word0 + uint32_t doorbell_offset; + uint32_t word2; +#define lpfc_mbx_rq_create_bar_set_SHIFT 0 +#define lpfc_mbx_rq_create_bar_set_MASK 0x0000FFFF +#define lpfc_mbx_rq_create_bar_set_WORD word2 +#define lpfc_mbx_rq_create_db_format_SHIFT 16 +#define lpfc_mbx_rq_create_db_format_MASK 0x0000FFFF +#define lpfc_mbx_rq_create_db_format_WORD word2 } response; } u; }; @@ -1388,6 +1444,33 @@ struct lpfc_mbx_get_rsrc_extent_info { } u; }; +struct lpfc_mbx_query_fw_config { + struct mbox_header header; + struct { + uint32_t config_number; +#define LPFC_FC_FCOE 0x00000007 + uint32_t asic_revision; + uint32_t physical_port; + uint32_t function_mode; +#define LPFC_FCOE_INI_MODE 0x00000040 +#define LPFC_FCOE_TGT_MODE 0x00000080 +#define LPFC_DUA_MODE 0x00000800 + uint32_t ulp0_mode; +#define LPFC_ULP_FCOE_INIT_MODE 0x00000040 +#define LPFC_ULP_FCOE_TGT_MODE 0x00000080 + uint32_t ulp0_nap_words[12]; + uint32_t ulp1_mode; + uint32_t ulp1_nap_words[12]; + uint32_t function_capabilities; + uint32_t cqid_base; + uint32_t cqid_tot; + uint32_t eqid_base; + uint32_t eqid_tot; + uint32_t ulp0_nap2_words[2]; + uint32_t ulp1_nap2_words[2]; + } rsp; +}; + struct lpfc_id_range { uint32_t word5; #define lpfc_mbx_rsrc_id_word4_0_SHIFT 0 @@ -1803,51 +1886,6 @@ struct lpfc_mbx_redisc_fcf_tbl { #define lpfc_mbx_redisc_fcf_index_WORD word12 }; -struct lpfc_mbx_query_fw_cfg { - struct mbox_header header; - uint32_t config_number; - uint32_t asic_rev; - uint32_t phys_port; - uint32_t function_mode; -/* firmware Function Mode */ -#define lpfc_function_mode_toe_SHIFT 0 -#define lpfc_function_mode_toe_MASK 0x00000001 -#define lpfc_function_mode_toe_WORD function_mode -#define lpfc_function_mode_nic_SHIFT 1 -#define lpfc_function_mode_nic_MASK 0x00000001 -#define lpfc_function_mode_nic_WORD function_mode -#define lpfc_function_mode_rdma_SHIFT 2 -#define lpfc_function_mode_rdma_MASK 0x00000001 -#define lpfc_function_mode_rdma_WORD function_mode -#define lpfc_function_mode_vm_SHIFT 3 -#define lpfc_function_mode_vm_MASK 0x00000001 -#define lpfc_function_mode_vm_WORD function_mode -#define lpfc_function_mode_iscsi_i_SHIFT 4 -#define lpfc_function_mode_iscsi_i_MASK 0x00000001 -#define lpfc_function_mode_iscsi_i_WORD function_mode -#define lpfc_function_mode_iscsi_t_SHIFT 5 -#define lpfc_function_mode_iscsi_t_MASK 0x00000001 -#define lpfc_function_mode_iscsi_t_WORD function_mode -#define lpfc_function_mode_fcoe_i_SHIFT 6 -#define lpfc_function_mode_fcoe_i_MASK 0x00000001 -#define lpfc_function_mode_fcoe_i_WORD function_mode -#define lpfc_function_mode_fcoe_t_SHIFT 7 -#define lpfc_function_mode_fcoe_t_MASK 0x00000001 -#define lpfc_function_mode_fcoe_t_WORD function_mode -#define lpfc_function_mode_dal_SHIFT 8 -#define lpfc_function_mode_dal_MASK 0x00000001 -#define lpfc_function_mode_dal_WORD function_mode -#define lpfc_function_mode_lro_SHIFT 9 -#define lpfc_function_mode_lro_MASK 0x00000001 -#define lpfc_function_mode_lro_WORD function_mode -#define lpfc_function_mode_flex10_SHIFT 10 -#define lpfc_function_mode_flex10_MASK 0x00000001 -#define lpfc_function_mode_flex10_WORD function_mode -#define lpfc_function_mode_ncsi_SHIFT 11 -#define lpfc_function_mode_ncsi_MASK 0x00000001 -#define lpfc_function_mode_ncsi_WORD function_mode -}; - /* Status field for embedded SLI_CONFIG mailbox command */ #define STATUS_SUCCESS 0x0 #define STATUS_FAILED 0x1 @@ -2965,7 +3003,7 @@ struct lpfc_mqe { struct lpfc_mbx_read_config rd_config; struct lpfc_mbx_request_features req_ftrs; struct lpfc_mbx_post_hdr_tmpl hdr_tmpl; - struct lpfc_mbx_query_fw_cfg query_fw_cfg; + struct lpfc_mbx_query_fw_config query_fw_cfg; struct lpfc_mbx_supp_pages supp_pages; struct lpfc_mbx_pc_sli4_params sli4_params; struct lpfc_mbx_get_sli4_parameters get_sli4_parameters; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 89ad55807012..314b4f61b9e3 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -3165,14 +3165,10 @@ destroy_port(struct lpfc_vport *vport) int lpfc_get_instance(void) { - int instance = 0; + int ret; - /* Assign an unused number */ - if (!idr_pre_get(&lpfc_hba_index, GFP_KERNEL)) - return -1; - if (idr_get_new(&lpfc_hba_index, NULL, &instance)) - return -1; - return instance; + ret = idr_alloc(&lpfc_hba_index, NULL, 0, 0, GFP_KERNEL); + return ret < 0 ? -1 : ret; } /** @@ -6233,9 +6229,11 @@ lpfc_sli4_bar0_register_memmap(struct lpfc_hba *phba, uint32_t if_type) phba->sli4_hba.conf_regs_memmap_p + LPFC_CTL_PORT_SEM_OFFSET; phba->sli4_hba.RQDBregaddr = - phba->sli4_hba.conf_regs_memmap_p + LPFC_RQ_DOORBELL; + phba->sli4_hba.conf_regs_memmap_p + + LPFC_ULP0_RQ_DOORBELL; phba->sli4_hba.WQDBregaddr = - phba->sli4_hba.conf_regs_memmap_p + LPFC_WQ_DOORBELL; + phba->sli4_hba.conf_regs_memmap_p + + LPFC_ULP0_WQ_DOORBELL; phba->sli4_hba.EQCQDBregaddr = phba->sli4_hba.conf_regs_memmap_p + LPFC_EQCQ_DOORBELL; phba->sli4_hba.MQDBregaddr = @@ -6289,9 +6287,11 @@ lpfc_sli4_bar2_register_memmap(struct lpfc_hba *phba, uint32_t vf) return -ENODEV; phba->sli4_hba.RQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p + - vf * LPFC_VFR_PAGE_SIZE + LPFC_RQ_DOORBELL); + vf * LPFC_VFR_PAGE_SIZE + + LPFC_ULP0_RQ_DOORBELL); phba->sli4_hba.WQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p + - vf * LPFC_VFR_PAGE_SIZE + LPFC_WQ_DOORBELL); + vf * LPFC_VFR_PAGE_SIZE + + LPFC_ULP0_WQ_DOORBELL); phba->sli4_hba.EQCQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p + vf * LPFC_VFR_PAGE_SIZE + LPFC_EQCQ_DOORBELL); phba->sli4_hba.MQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p + @@ -6987,6 +6987,19 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba) phba->sli4_hba.fcp_wq = NULL; } + if (phba->pci_bar0_memmap_p) { + iounmap(phba->pci_bar0_memmap_p); + phba->pci_bar0_memmap_p = NULL; + } + if (phba->pci_bar2_memmap_p) { + iounmap(phba->pci_bar2_memmap_p); + phba->pci_bar2_memmap_p = NULL; + } + if (phba->pci_bar4_memmap_p) { + iounmap(phba->pci_bar4_memmap_p); + phba->pci_bar4_memmap_p = NULL; + } + /* Release FCP CQ mapping array */ if (phba->sli4_hba.fcp_cq_map != NULL) { kfree(phba->sli4_hba.fcp_cq_map); @@ -7050,6 +7063,53 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba) int rc = -ENOMEM; int fcp_eqidx, fcp_cqidx, fcp_wqidx; int fcp_cq_index = 0; + uint32_t shdr_status, shdr_add_status; + union lpfc_sli4_cfg_shdr *shdr; + LPFC_MBOXQ_t *mboxq; + uint32_t length; + + /* Check for dual-ULP support */ + mboxq = (LPFC_MBOXQ_t *)mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!mboxq) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3249 Unable to allocate memory for " + "QUERY_FW_CFG mailbox command\n"); + return -ENOMEM; + } + length = (sizeof(struct lpfc_mbx_query_fw_config) - + sizeof(struct lpfc_sli4_cfg_mhdr)); + lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_COMMON, + LPFC_MBOX_OPCODE_QUERY_FW_CFG, + length, LPFC_SLI4_MBX_EMBED); + + rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL); + + shdr = (union lpfc_sli4_cfg_shdr *) + &mboxq->u.mqe.un.sli4_config.header.cfg_shdr; + shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response); + shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response); + if (shdr_status || shdr_add_status || rc) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3250 QUERY_FW_CFG mailbox failed with status " + "x%x add_status x%x, mbx status x%x\n", + shdr_status, shdr_add_status, rc); + if (rc != MBX_TIMEOUT) + mempool_free(mboxq, phba->mbox_mem_pool); + rc = -ENXIO; + goto out_error; + } + + phba->sli4_hba.fw_func_mode = + mboxq->u.mqe.un.query_fw_cfg.rsp.function_mode; + phba->sli4_hba.ulp0_mode = mboxq->u.mqe.un.query_fw_cfg.rsp.ulp0_mode; + phba->sli4_hba.ulp1_mode = mboxq->u.mqe.un.query_fw_cfg.rsp.ulp1_mode; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3251 QUERY_FW_CFG: func_mode:x%x, ulp0_mode:x%x, " + "ulp1_mode:x%x\n", phba->sli4_hba.fw_func_mode, + phba->sli4_hba.ulp0_mode, phba->sli4_hba.ulp1_mode); + + if (rc != MBX_TIMEOUT) + mempool_free(mboxq, phba->mbox_mem_pool); /* * Set up HBA Event Queues (EQs) @@ -7664,78 +7724,6 @@ out: } /** - * lpfc_sli4_send_nop_mbox_cmds - Send sli-4 nop mailbox commands - * @phba: pointer to lpfc hba data structure. - * @cnt: number of nop mailbox commands to send. - * - * This routine is invoked to send a number @cnt of NOP mailbox command and - * wait for each command to complete. - * - * Return: the number of NOP mailbox command completed. - **/ -static int -lpfc_sli4_send_nop_mbox_cmds(struct lpfc_hba *phba, uint32_t cnt) -{ - LPFC_MBOXQ_t *mboxq; - int length, cmdsent; - uint32_t mbox_tmo; - uint32_t rc = 0; - uint32_t shdr_status, shdr_add_status; - union lpfc_sli4_cfg_shdr *shdr; - - if (cnt == 0) { - lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, - "2518 Requested to send 0 NOP mailbox cmd\n"); - return cnt; - } - - mboxq = (LPFC_MBOXQ_t *)mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); - if (!mboxq) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "2519 Unable to allocate memory for issuing " - "NOP mailbox command\n"); - return 0; - } - - /* Set up NOP SLI4_CONFIG mailbox-ioctl command */ - length = (sizeof(struct lpfc_mbx_nop) - - sizeof(struct lpfc_sli4_cfg_mhdr)); - - for (cmdsent = 0; cmdsent < cnt; cmdsent++) { - lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_COMMON, - LPFC_MBOX_OPCODE_NOP, length, - LPFC_SLI4_MBX_EMBED); - if (!phba->sli4_hba.intr_enable) - rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL); - else { - mbox_tmo = lpfc_mbox_tmo_val(phba, mboxq); - rc = lpfc_sli_issue_mbox_wait(phba, mboxq, mbox_tmo); - } - if (rc == MBX_TIMEOUT) - break; - /* Check return status */ - shdr = (union lpfc_sli4_cfg_shdr *) - &mboxq->u.mqe.un.sli4_config.header.cfg_shdr; - shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response); - shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, - &shdr->response); - if (shdr_status || shdr_add_status || rc) { - lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, - "2520 NOP mailbox command failed " - "status x%x add_status x%x mbx " - "status x%x\n", shdr_status, - shdr_add_status, rc); - break; - } - } - - if (rc != MBX_TIMEOUT) - mempool_free(mboxq, phba->mbox_mem_pool); - - return cmdsent; -} - -/** * lpfc_sli4_pci_mem_setup - Setup SLI4 HBA PCI memory space. * @phba: pointer to lpfc hba data structure. * @@ -8503,37 +8491,6 @@ lpfc_unset_hba(struct lpfc_hba *phba) } /** - * lpfc_sli4_unset_hba - Unset SLI4 hba device initialization. - * @phba: pointer to lpfc hba data structure. - * - * This routine is invoked to unset the HBA device initialization steps to - * a device with SLI-4 interface spec. - **/ -static void -lpfc_sli4_unset_hba(struct lpfc_hba *phba) -{ - struct lpfc_vport *vport = phba->pport; - struct Scsi_Host *shost = lpfc_shost_from_vport(vport); - - spin_lock_irq(shost->host_lock); - vport->load_flag |= FC_UNLOADING; - spin_unlock_irq(shost->host_lock); - - phba->pport->work_port_events = 0; - - /* Stop the SLI4 device port */ - lpfc_stop_port(phba); - - lpfc_sli4_disable_intr(phba); - - /* Reset SLI4 HBA FCoE function */ - lpfc_pci_function_reset(phba); - lpfc_sli4_queue_destroy(phba); - - return; -} - -/** * lpfc_sli4_xri_exchange_busy_wait - Wait for device XRI exchange busy * @phba: Pointer to HBA context object. * @@ -9595,7 +9552,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) struct Scsi_Host *shost = NULL; int error, ret; uint32_t cfg_mode, intr_mode; - int mcnt; int adjusted_fcp_io_channel; /* Allocate memory for HBA structure */ @@ -9684,58 +9640,35 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) shost = lpfc_shost_from_vport(vport); /* save shost for error cleanup */ /* Now, trying to enable interrupt and bring up the device */ cfg_mode = phba->cfg_use_msi; - while (true) { - /* Put device to a known state before enabling interrupt */ - lpfc_stop_port(phba); - /* Configure and enable interrupt */ - intr_mode = lpfc_sli4_enable_intr(phba, cfg_mode); - if (intr_mode == LPFC_INTR_ERROR) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "0426 Failed to enable interrupt.\n"); - error = -ENODEV; - goto out_free_sysfs_attr; - } - /* Default to single EQ for non-MSI-X */ - if (phba->intr_type != MSIX) - adjusted_fcp_io_channel = 1; - else - adjusted_fcp_io_channel = phba->cfg_fcp_io_channel; - phba->cfg_fcp_io_channel = adjusted_fcp_io_channel; - /* Set up SLI-4 HBA */ - if (lpfc_sli4_hba_setup(phba)) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "1421 Failed to set up hba\n"); - error = -ENODEV; - goto out_disable_intr; - } - /* Send NOP mbx cmds for non-INTx mode active interrupt test */ - if (intr_mode != 0) - mcnt = lpfc_sli4_send_nop_mbox_cmds(phba, - LPFC_ACT_INTR_CNT); - - /* Check active interrupts received only for MSI/MSI-X */ - if (intr_mode == 0 || - phba->sli.slistat.sli_intr >= LPFC_ACT_INTR_CNT) { - /* Log the current active interrupt mode */ - phba->intr_mode = intr_mode; - lpfc_log_intr_mode(phba, intr_mode); - break; - } - lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "0451 Configure interrupt mode (%d) " - "failed active interrupt test.\n", - intr_mode); - /* Unset the previous SLI-4 HBA setup. */ - /* - * TODO: Is this operation compatible with IF TYPE 2 - * devices? All port state is deleted and cleared. - */ - lpfc_sli4_unset_hba(phba); - /* Try next level of interrupt mode */ - cfg_mode = --intr_mode; + /* Put device to a known state before enabling interrupt */ + lpfc_stop_port(phba); + /* Configure and enable interrupt */ + intr_mode = lpfc_sli4_enable_intr(phba, cfg_mode); + if (intr_mode == LPFC_INTR_ERROR) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "0426 Failed to enable interrupt.\n"); + error = -ENODEV; + goto out_free_sysfs_attr; + } + /* Default to single EQ for non-MSI-X */ + if (phba->intr_type != MSIX) + adjusted_fcp_io_channel = 1; + else + adjusted_fcp_io_channel = phba->cfg_fcp_io_channel; + phba->cfg_fcp_io_channel = adjusted_fcp_io_channel; + /* Set up SLI-4 HBA */ + if (lpfc_sli4_hba_setup(phba)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "1421 Failed to set up hba\n"); + error = -ENODEV; + goto out_disable_intr; } + /* Log the current active interrupt mode */ + phba->intr_mode = intr_mode; + lpfc_log_intr_mode(phba, intr_mode); + /* Perform post initialization setup */ lpfc_post_init_setup(phba); diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index d8fadcb2db73..46128c679202 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -1115,6 +1115,13 @@ out: "0261 Cannot Register NameServer login\n"); } + /* + ** In case the node reference counter does not go to zero, ensure that + ** the stale state for the node is not processed. + */ + + ndlp->nlp_prev_state = ndlp->nlp_state; + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_DEFER_RM; spin_unlock_irq(shost->host_lock); @@ -2159,13 +2166,16 @@ lpfc_cmpl_plogi_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, { struct lpfc_iocbq *cmdiocb, *rspiocb; IOCB_t *irsp; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); cmdiocb = (struct lpfc_iocbq *) arg; rspiocb = cmdiocb->context_un.rsp_iocb; irsp = &rspiocb->iocb; if (irsp->ulpStatus) { + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_DEFER_RM; + spin_unlock_irq(shost->host_lock); return NLP_STE_FREED_NODE; } return ndlp->nlp_state; diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 60e5a177644c..98af07c6e300 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -288,6 +288,26 @@ lpfc_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason) } /** + * lpfc_change_queue_type() - Change a device's scsi tag queuing type + * @sdev: Pointer the scsi device whose queue depth is to change + * @tag_type: Identifier for queue tag type + */ +static int +lpfc_change_queue_type(struct scsi_device *sdev, int tag_type) +{ + if (sdev->tagged_supported) { + scsi_set_tag_type(sdev, tag_type); + if (tag_type) + scsi_activate_tcq(sdev, sdev->queue_depth); + else + scsi_deactivate_tcq(sdev, sdev->queue_depth); + } else + tag_type = 0; + + return tag_type; +} + +/** * lpfc_rampdown_queue_depth - Post RAMP_DOWN_QUEUE event to worker thread * @phba: The Hba for which this call is being executed. * @@ -3972,7 +3992,7 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd, break; } } else - fcp_cmnd->fcpCntl1 = 0; + fcp_cmnd->fcpCntl1 = SIMPLE_Q; sli4 = (phba->sli_rev == LPFC_SLI_REV4); @@ -5150,6 +5170,7 @@ struct scsi_host_template lpfc_template = { .max_sectors = 0xFFFF, .vendor_id = LPFC_NL_VENDOR_ID, .change_queue_depth = lpfc_change_queue_depth, + .change_queue_type = lpfc_change_queue_type, }; struct scsi_host_template lpfc_vport_template = { @@ -5172,4 +5193,5 @@ struct scsi_host_template lpfc_vport_template = { .shost_attrs = lpfc_vport_attrs, .max_sectors = 0xFFFF, .change_queue_depth = lpfc_change_queue_depth, + .change_queue_type = lpfc_change_queue_type, }; diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 624eab370396..55b6fc83ad71 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -124,10 +124,17 @@ lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe *wqe) /* Ring Doorbell */ doorbell.word0 = 0; - bf_set(lpfc_wq_doorbell_num_posted, &doorbell, 1); - bf_set(lpfc_wq_doorbell_index, &doorbell, host_index); - bf_set(lpfc_wq_doorbell_id, &doorbell, q->queue_id); - writel(doorbell.word0, q->phba->sli4_hba.WQDBregaddr); + if (q->db_format == LPFC_DB_LIST_FORMAT) { + bf_set(lpfc_wq_db_list_fm_num_posted, &doorbell, 1); + bf_set(lpfc_wq_db_list_fm_index, &doorbell, host_index); + bf_set(lpfc_wq_db_list_fm_id, &doorbell, q->queue_id); + } else if (q->db_format == LPFC_DB_RING_FORMAT) { + bf_set(lpfc_wq_db_ring_fm_num_posted, &doorbell, 1); + bf_set(lpfc_wq_db_ring_fm_id, &doorbell, q->queue_id); + } else { + return -EINVAL; + } + writel(doorbell.word0, q->db_regaddr); return 0; } @@ -456,10 +463,20 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq, /* Ring The Header Receive Queue Doorbell */ if (!(hq->host_index % hq->entry_repost)) { doorbell.word0 = 0; - bf_set(lpfc_rq_doorbell_num_posted, &doorbell, - hq->entry_repost); - bf_set(lpfc_rq_doorbell_id, &doorbell, hq->queue_id); - writel(doorbell.word0, hq->phba->sli4_hba.RQDBregaddr); + if (hq->db_format == LPFC_DB_RING_FORMAT) { + bf_set(lpfc_rq_db_ring_fm_num_posted, &doorbell, + hq->entry_repost); + bf_set(lpfc_rq_db_ring_fm_id, &doorbell, hq->queue_id); + } else if (hq->db_format == LPFC_DB_LIST_FORMAT) { + bf_set(lpfc_rq_db_list_fm_num_posted, &doorbell, + hq->entry_repost); + bf_set(lpfc_rq_db_list_fm_index, &doorbell, + hq->host_index); + bf_set(lpfc_rq_db_list_fm_id, &doorbell, hq->queue_id); + } else { + return -EINVAL; + } + writel(doorbell.word0, hq->db_regaddr); } return put_index; } @@ -4939,7 +4956,7 @@ out_free_mboxq: static void lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba) { - uint8_t fcp_eqidx; + int fcp_eqidx; lpfc_sli4_cq_release(phba->sli4_hba.mbx_cq, LPFC_QUEUE_REARM); lpfc_sli4_cq_release(phba->sli4_hba.els_cq, LPFC_QUEUE_REARM); @@ -5622,6 +5639,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba) } /* RPIs. */ count = phba->sli4_hba.max_cfg_param.max_rpi; + if (count <= 0) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "3279 Invalid provisioning of " + "rpi:%d\n", count); + rc = -EINVAL; + goto err_exit; + } base = phba->sli4_hba.max_cfg_param.rpi_base; longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG; phba->sli4_hba.rpi_bmask = kzalloc(longs * @@ -5644,6 +5668,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba) /* VPIs. */ count = phba->sli4_hba.max_cfg_param.max_vpi; + if (count <= 0) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "3280 Invalid provisioning of " + "vpi:%d\n", count); + rc = -EINVAL; + goto free_rpi_ids; + } base = phba->sli4_hba.max_cfg_param.vpi_base; longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG; phba->vpi_bmask = kzalloc(longs * @@ -5666,6 +5697,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba) /* XRIs. */ count = phba->sli4_hba.max_cfg_param.max_xri; + if (count <= 0) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "3281 Invalid provisioning of " + "xri:%d\n", count); + rc = -EINVAL; + goto free_vpi_ids; + } base = phba->sli4_hba.max_cfg_param.xri_base; longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG; phba->sli4_hba.xri_bmask = kzalloc(longs * @@ -5689,6 +5727,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba) /* VFIs. */ count = phba->sli4_hba.max_cfg_param.max_vfi; + if (count <= 0) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "3282 Invalid provisioning of " + "vfi:%d\n", count); + rc = -EINVAL; + goto free_xri_ids; + } base = phba->sli4_hba.max_cfg_param.vfi_base; longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG; phba->sli4_hba.vfi_bmask = kzalloc(longs * @@ -8370,7 +8415,7 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number, * This is a continuation of a commandi,(CX) so this * sglq is on the active list */ - sglq = __lpfc_get_active_sglq(phba, piocb->sli4_xritag); + sglq = __lpfc_get_active_sglq(phba, piocb->sli4_lxritag); if (!sglq) return IOCB_ERROR; } @@ -8855,12 +8900,6 @@ lpfc_sli_setup(struct lpfc_hba *phba) pring->prt[3].type = FC_TYPE_CT; pring->prt[3].lpfc_sli_rcv_unsol_event = lpfc_ct_unsol_event; - /* abort unsolicited sequence */ - pring->prt[4].profile = 0; /* Mask 4 */ - pring->prt[4].rctl = FC_RCTL_BA_ABTS; - pring->prt[4].type = FC_TYPE_BLS; - pring->prt[4].lpfc_sli_rcv_unsol_event = - lpfc_sli4_ct_abort_unsol_event; break; } totiocbsize += (pring->sli.sli3.numCiocb * @@ -11873,7 +11912,7 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id) struct lpfc_eqe *eqe; unsigned long iflag; int ecount = 0; - uint32_t fcp_eqidx; + int fcp_eqidx; /* Get the driver's phba structure from the dev_id */ fcp_eq_hdl = (struct lpfc_fcp_eq_hdl *)dev_id; @@ -11975,7 +12014,7 @@ lpfc_sli4_intr_handler(int irq, void *dev_id) struct lpfc_hba *phba; irqreturn_t hba_irq_rc; bool hba_handled = false; - uint32_t fcp_eqidx; + int fcp_eqidx; /* Get the driver's phba structure from the dev_id */ phba = (struct lpfc_hba *)dev_id; @@ -12097,6 +12136,54 @@ out_fail: } /** + * lpfc_dual_chute_pci_bar_map - Map pci base address register to host memory + * @phba: HBA structure that indicates port to create a queue on. + * @pci_barset: PCI BAR set flag. + * + * This function shall perform iomap of the specified PCI BAR address to host + * memory address if not already done so and return it. The returned host + * memory address can be NULL. + */ +static void __iomem * +lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset) +{ + struct pci_dev *pdev; + unsigned long bar_map, bar_map_len; + + if (!phba->pcidev) + return NULL; + else + pdev = phba->pcidev; + + switch (pci_barset) { + case WQ_PCI_BAR_0_AND_1: + if (!phba->pci_bar0_memmap_p) { + bar_map = pci_resource_start(pdev, PCI_64BIT_BAR0); + bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR0); + phba->pci_bar0_memmap_p = ioremap(bar_map, bar_map_len); + } + return phba->pci_bar0_memmap_p; + case WQ_PCI_BAR_2_AND_3: + if (!phba->pci_bar2_memmap_p) { + bar_map = pci_resource_start(pdev, PCI_64BIT_BAR2); + bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR2); + phba->pci_bar2_memmap_p = ioremap(bar_map, bar_map_len); + } + return phba->pci_bar2_memmap_p; + case WQ_PCI_BAR_4_AND_5: + if (!phba->pci_bar4_memmap_p) { + bar_map = pci_resource_start(pdev, PCI_64BIT_BAR4); + bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR4); + phba->pci_bar4_memmap_p = ioremap(bar_map, bar_map_len); + } + return phba->pci_bar4_memmap_p; + default: + break; + } + return NULL; +} + +/** * lpfc_modify_fcp_eq_delay - Modify Delay Multiplier on FCP EQs * @phba: HBA structure that indicates port to create a queue on. * @startq: The starting FCP EQ to modify @@ -12673,6 +12760,9 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, union lpfc_sli4_cfg_shdr *shdr; uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz; struct dma_address *page; + void __iomem *bar_memmap_p; + uint32_t db_offset; + uint16_t pci_barset; /* sanity check on queue memory */ if (!wq || !cq) @@ -12696,6 +12786,7 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, cq->queue_id); bf_set(lpfc_mbox_hdr_version, &shdr->request, phba->sli4_hba.pc_sli4_params.wqv); + if (phba->sli4_hba.pc_sli4_params.wqv == LPFC_Q_CREATE_VERSION_1) { bf_set(lpfc_mbx_wq_create_wqe_count, &wq_create->u.request_1, wq->entry_count); @@ -12723,6 +12814,10 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, page[dmabuf->buffer_tag].addr_lo = putPaddrLow(dmabuf->phys); page[dmabuf->buffer_tag].addr_hi = putPaddrHigh(dmabuf->phys); } + + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) + bf_set(lpfc_mbx_wq_create_dua, &wq_create->u.request, 1); + rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL); /* The IOCTL status is embedded in the mailbox subheader. */ shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response); @@ -12740,6 +12835,47 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, status = -ENXIO; goto out; } + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) { + wq->db_format = bf_get(lpfc_mbx_wq_create_db_format, + &wq_create->u.response); + if ((wq->db_format != LPFC_DB_LIST_FORMAT) && + (wq->db_format != LPFC_DB_RING_FORMAT)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3265 WQ[%d] doorbell format not " + "supported: x%x\n", wq->queue_id, + wq->db_format); + status = -EINVAL; + goto out; + } + pci_barset = bf_get(lpfc_mbx_wq_create_bar_set, + &wq_create->u.response); + bar_memmap_p = lpfc_dual_chute_pci_bar_map(phba, pci_barset); + if (!bar_memmap_p) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3263 WQ[%d] failed to memmap pci " + "barset:x%x\n", wq->queue_id, + pci_barset); + status = -ENOMEM; + goto out; + } + db_offset = wq_create->u.response.doorbell_offset; + if ((db_offset != LPFC_ULP0_WQ_DOORBELL) && + (db_offset != LPFC_ULP1_WQ_DOORBELL)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3252 WQ[%d] doorbell offset not " + "supported: x%x\n", wq->queue_id, + db_offset); + status = -EINVAL; + goto out; + } + wq->db_regaddr = bar_memmap_p + db_offset; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3264 WQ[%d]: barset:x%x, offset:x%x\n", + wq->queue_id, pci_barset, db_offset); + } else { + wq->db_format = LPFC_DB_LIST_FORMAT; + wq->db_regaddr = phba->sli4_hba.WQDBregaddr; + } wq->type = LPFC_WQ; wq->assoc_qid = cq->queue_id; wq->subtype = subtype; @@ -12816,6 +12952,9 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, uint32_t shdr_status, shdr_add_status; union lpfc_sli4_cfg_shdr *shdr; uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz; + void __iomem *bar_memmap_p; + uint32_t db_offset; + uint16_t pci_barset; /* sanity check on queue memory */ if (!hrq || !drq || !cq) @@ -12894,6 +13033,9 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, rq_create->u.request.page[dmabuf->buffer_tag].addr_hi = putPaddrHigh(dmabuf->phys); } + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) + bf_set(lpfc_mbx_rq_create_dua, &rq_create->u.request, 1); + rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL); /* The IOCTL status is embedded in the mailbox subheader. */ shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response); @@ -12911,6 +13053,50 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, status = -ENXIO; goto out; } + + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) { + hrq->db_format = bf_get(lpfc_mbx_rq_create_db_format, + &rq_create->u.response); + if ((hrq->db_format != LPFC_DB_LIST_FORMAT) && + (hrq->db_format != LPFC_DB_RING_FORMAT)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3262 RQ [%d] doorbell format not " + "supported: x%x\n", hrq->queue_id, + hrq->db_format); + status = -EINVAL; + goto out; + } + + pci_barset = bf_get(lpfc_mbx_rq_create_bar_set, + &rq_create->u.response); + bar_memmap_p = lpfc_dual_chute_pci_bar_map(phba, pci_barset); + if (!bar_memmap_p) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3269 RQ[%d] failed to memmap pci " + "barset:x%x\n", hrq->queue_id, + pci_barset); + status = -ENOMEM; + goto out; + } + + db_offset = rq_create->u.response.doorbell_offset; + if ((db_offset != LPFC_ULP0_RQ_DOORBELL) && + (db_offset != LPFC_ULP1_RQ_DOORBELL)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3270 RQ[%d] doorbell offset not " + "supported: x%x\n", hrq->queue_id, + db_offset); + status = -EINVAL; + goto out; + } + hrq->db_regaddr = bar_memmap_p + db_offset; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3266 RQ[qid:%d]: barset:x%x, offset:x%x\n", + hrq->queue_id, pci_barset, db_offset); + } else { + hrq->db_format = LPFC_DB_RING_FORMAT; + hrq->db_regaddr = phba->sli4_hba.RQDBregaddr; + } hrq->type = LPFC_HRQ; hrq->assoc_qid = cq->queue_id; hrq->subtype = subtype; @@ -12976,6 +13162,8 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, rq_create->u.request.page[dmabuf->buffer_tag].addr_hi = putPaddrHigh(dmabuf->phys); } + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) + bf_set(lpfc_mbx_rq_create_dua, &rq_create->u.request, 1); rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL); /* The IOCTL status is embedded in the mailbox subheader. */ shdr = (union lpfc_sli4_cfg_shdr *) &rq_create->header.cfg_shdr; @@ -14063,6 +14251,40 @@ lpfc_sli4_abort_partial_seq(struct lpfc_vport *vport, } /** + * lpfc_sli4_abort_ulp_seq - Abort assembled unsol sequence from ulp + * @vport: pointer to a vitural port + * @dmabuf: pointer to a dmabuf that describes the FC sequence + * + * This function tries to abort from the assembed sequence from upper level + * protocol, described by the information from basic abbort @dmabuf. It + * checks to see whether such pending context exists at upper level protocol. + * If so, it shall clean up the pending context. + * + * Return + * true -- if there is matching pending context of the sequence cleaned + * at ulp; + * false -- if there is no matching pending context of the sequence present + * at ulp. + **/ +static bool +lpfc_sli4_abort_ulp_seq(struct lpfc_vport *vport, struct hbq_dmabuf *dmabuf) +{ + struct lpfc_hba *phba = vport->phba; + int handled; + + /* Accepting abort at ulp with SLI4 only */ + if (phba->sli_rev < LPFC_SLI_REV4) + return false; + + /* Register all caring upper level protocols to attend abort */ + handled = lpfc_ct_handle_unsol_abort(phba, dmabuf); + if (handled) + return true; + + return false; +} + +/** * lpfc_sli4_seq_abort_rsp_cmpl - BLS ABORT RSP seq abort iocb complete handler * @phba: Pointer to HBA context object. * @cmd_iocbq: pointer to the command iocbq structure. @@ -14077,8 +14299,14 @@ lpfc_sli4_seq_abort_rsp_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmd_iocbq, struct lpfc_iocbq *rsp_iocbq) { - if (cmd_iocbq) + struct lpfc_nodelist *ndlp; + + if (cmd_iocbq) { + ndlp = (struct lpfc_nodelist *)cmd_iocbq->context1; + lpfc_nlp_put(ndlp); + lpfc_nlp_not_used(ndlp); lpfc_sli_release_iocbq(phba, cmd_iocbq); + } /* Failure means BLS ABORT RSP did not get delivered to remote node*/ if (rsp_iocbq && rsp_iocbq->iocb.ulpStatus) @@ -14118,9 +14346,10 @@ lpfc_sli4_xri_inrange(struct lpfc_hba *phba, * event after aborting the sequence handling. **/ static void -lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, - struct fc_frame_header *fc_hdr) +lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport, + struct fc_frame_header *fc_hdr, bool aborted) { + struct lpfc_hba *phba = vport->phba; struct lpfc_iocbq *ctiocb = NULL; struct lpfc_nodelist *ndlp; uint16_t oxid, rxid, xri, lxri; @@ -14135,12 +14364,27 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, oxid = be16_to_cpu(fc_hdr->fh_ox_id); rxid = be16_to_cpu(fc_hdr->fh_rx_id); - ndlp = lpfc_findnode_did(phba->pport, sid); + ndlp = lpfc_findnode_did(vport, sid); if (!ndlp) { - lpfc_printf_log(phba, KERN_WARNING, LOG_ELS, - "1268 Find ndlp returned NULL for oxid:x%x " - "SID:x%x\n", oxid, sid); - return; + ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + if (!ndlp) { + lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS, + "1268 Failed to allocate ndlp for " + "oxid:x%x SID:x%x\n", oxid, sid); + return; + } + lpfc_nlp_init(vport, ndlp, sid); + /* Put ndlp onto pport node list */ + lpfc_enqueue_node(vport, ndlp); + } else if (!NLP_CHK_NODE_ACT(ndlp)) { + /* re-setup ndlp without removing from node list */ + ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE); + if (!ndlp) { + lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS, + "3275 Failed to active ndlp found " + "for oxid:x%x SID:x%x\n", oxid, sid); + return; + } } /* Allocate buffer for rsp iocb */ @@ -14164,7 +14408,7 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, icmd->ulpLe = 1; icmd->ulpClass = CLASS3; icmd->ulpContext = phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]; - ctiocb->context1 = ndlp; + ctiocb->context1 = lpfc_nlp_get(ndlp); ctiocb->iocb_cmpl = NULL; ctiocb->vport = phba->pport; @@ -14183,14 +14427,24 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, if (lxri != NO_XRI) lpfc_set_rrq_active(phba, ndlp, lxri, (xri == oxid) ? rxid : oxid, 0); - /* If the oxid maps to the FCP XRI range or if it is out of range, - * send a BLS_RJT. The driver no longer has that exchange. - * Override the IOCB for a BA_RJT. + /* For BA_ABTS from exchange responder, if the logical xri with + * the oxid maps to the FCP XRI range, the port no longer has + * that exchange context, send a BLS_RJT. Override the IOCB for + * a BA_RJT. */ - if (xri > (phba->sli4_hba.max_cfg_param.max_xri + - phba->sli4_hba.max_cfg_param.xri_base) || - xri > (lpfc_sli4_get_els_iocb_cnt(phba) + - phba->sli4_hba.max_cfg_param.xri_base)) { + if ((fctl & FC_FC_EX_CTX) && + (lxri > lpfc_sli4_get_els_iocb_cnt(phba))) { + icmd->un.xseq64.w5.hcsw.Rctl = FC_RCTL_BA_RJT; + bf_set(lpfc_vndr_code, &icmd->un.bls_rsp, 0); + bf_set(lpfc_rsn_expln, &icmd->un.bls_rsp, FC_BA_RJT_INV_XID); + bf_set(lpfc_rsn_code, &icmd->un.bls_rsp, FC_BA_RJT_UNABLE); + } + + /* If BA_ABTS failed to abort a partially assembled receive sequence, + * the driver no longer has that exchange, send a BLS_RJT. Override + * the IOCB for a BA_RJT. + */ + if (aborted == false) { icmd->un.xseq64.w5.hcsw.Rctl = FC_RCTL_BA_RJT; bf_set(lpfc_vndr_code, &icmd->un.bls_rsp, 0); bf_set(lpfc_rsn_expln, &icmd->un.bls_rsp, FC_BA_RJT_INV_XID); @@ -14214,17 +14468,19 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, bf_set(lpfc_abts_oxid, &icmd->un.bls_rsp, oxid); /* Xmit CT abts response on exchange <xid> */ - lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "1200 Send BLS cmd x%x on oxid x%x Data: x%x\n", - icmd->un.xseq64.w5.hcsw.Rctl, oxid, phba->link_state); + lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, + "1200 Send BLS cmd x%x on oxid x%x Data: x%x\n", + icmd->un.xseq64.w5.hcsw.Rctl, oxid, phba->link_state); rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, ctiocb, 0); if (rc == IOCB_ERROR) { - lpfc_printf_log(phba, KERN_ERR, LOG_ELS, - "2925 Failed to issue CT ABTS RSP x%x on " - "xri x%x, Data x%x\n", - icmd->un.xseq64.w5.hcsw.Rctl, oxid, - phba->link_state); + lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS, + "2925 Failed to issue CT ABTS RSP x%x on " + "xri x%x, Data x%x\n", + icmd->un.xseq64.w5.hcsw.Rctl, oxid, + phba->link_state); + lpfc_nlp_put(ndlp); + ctiocb->context1 = NULL; lpfc_sli_release_iocbq(phba, ctiocb); } } @@ -14249,32 +14505,25 @@ lpfc_sli4_handle_unsol_abort(struct lpfc_vport *vport, struct lpfc_hba *phba = vport->phba; struct fc_frame_header fc_hdr; uint32_t fctl; - bool abts_par; + bool aborted; /* Make a copy of fc_hdr before the dmabuf being released */ memcpy(&fc_hdr, dmabuf->hbuf.virt, sizeof(struct fc_frame_header)); fctl = sli4_fctl_from_fc_hdr(&fc_hdr); if (fctl & FC_FC_EX_CTX) { - /* - * ABTS sent by responder to exchange, just free the buffer - */ - lpfc_in_buf_free(phba, &dmabuf->dbuf); + /* ABTS by responder to exchange, no cleanup needed */ + aborted = true; } else { - /* - * ABTS sent by initiator to exchange, need to do cleanup - */ - /* Try to abort partially assembled seq */ - abts_par = lpfc_sli4_abort_partial_seq(vport, dmabuf); - - /* Send abort to ULP if partially seq abort failed */ - if (abts_par == false) - lpfc_sli4_send_seq_to_ulp(vport, dmabuf); - else - lpfc_in_buf_free(phba, &dmabuf->dbuf); + /* ABTS by initiator to exchange, need to do cleanup */ + aborted = lpfc_sli4_abort_partial_seq(vport, dmabuf); + if (aborted == false) + aborted = lpfc_sli4_abort_ulp_seq(vport, dmabuf); } - /* Send basic accept (BA_ACC) to the abort requester */ - lpfc_sli4_seq_abort_rsp(phba, &fc_hdr); + lpfc_in_buf_free(phba, &dmabuf->dbuf); + + /* Respond with BA_ACC or BA_RJT accordingly */ + lpfc_sli4_seq_abort_rsp(vport, &fc_hdr, aborted); } /** @@ -15307,10 +15556,13 @@ lpfc_sli4_fcf_rr_next_index_get(struct lpfc_hba *phba) { uint16_t next_fcf_index; +initial_priority: /* Search start from next bit of currently registered FCF index */ + next_fcf_index = phba->fcf.current_rec.fcf_indx; + next_priority: - next_fcf_index = (phba->fcf.current_rec.fcf_indx + 1) % - LPFC_SLI4_FCF_TBL_INDX_MAX; + /* Determine the next fcf index to check */ + next_fcf_index = (next_fcf_index + 1) % LPFC_SLI4_FCF_TBL_INDX_MAX; next_fcf_index = find_next_bit(phba->fcf.fcf_rr_bmask, LPFC_SLI4_FCF_TBL_INDX_MAX, next_fcf_index); @@ -15337,7 +15589,7 @@ next_priority: * at that level and continue the selection process. */ if (lpfc_check_next_fcf_pri_level(phba)) - goto next_priority; + goto initial_priority; lpfc_printf_log(phba, KERN_WARNING, LOG_FIP, "2844 No roundrobin failover FCF available\n"); if (next_fcf_index >= LPFC_SLI4_FCF_TBL_INDX_MAX) diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 44c427a45d66..be02b59ea279 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -139,6 +139,10 @@ struct lpfc_queue { struct lpfc_sli_ring *pring; /* ptr to io ring associated with q */ + uint16_t db_format; +#define LPFC_DB_RING_FORMAT 0x01 +#define LPFC_DB_LIST_FORMAT 0x02 + void __iomem *db_regaddr; /* For q stats */ uint32_t q_cnt_1; uint32_t q_cnt_2; @@ -508,6 +512,10 @@ struct lpfc_sli4_hba { struct lpfc_queue *hdr_rq; /* Slow-path Header Receive queue */ struct lpfc_queue *dat_rq; /* Slow-path Data Receive queue */ + uint8_t fw_func_mode; /* FW function protocol mode */ + uint32_t ulp0_mode; /* ULP0 protocol mode */ + uint32_t ulp1_mode; /* ULP1 protocol mode */ + /* Setup information for various queue parameters */ int eq_esize; int eq_ecount; diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index ba596e854bbc..f3b7795a296b 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -18,7 +18,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "8.3.36" +#define LPFC_DRIVER_VERSION "8.3.37" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c index ffd85c511c8e..5e24e7e73714 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.c +++ b/drivers/scsi/mpt2sas/mpt2sas_base.c @@ -155,7 +155,7 @@ _base_fault_reset_work(struct work_struct *work) struct task_struct *p; spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags); - if (ioc->shost_recovery) + if (ioc->shost_recovery || ioc->pci_error_recovery) goto rearm_timer; spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); @@ -164,6 +164,20 @@ _base_fault_reset_work(struct work_struct *work) printk(MPT2SAS_INFO_FMT "%s : SAS host is non-operational !!!!\n", ioc->name, __func__); + /* It may be possible that EEH recovery can resolve some of + * pci bus failure issues rather removing the dead ioc function + * by considering controller is in a non-operational state. So + * here priority is given to the EEH recovery. If it doesn't + * not resolve this issue, mpt2sas driver will consider this + * controller to non-operational state and remove the dead ioc + * function. + */ + if (ioc->non_operational_loop++ < 5) { + spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, + flags); + goto rearm_timer; + } + /* * Call _scsih_flush_pending_cmds callback so that we flush all * pending commands back to OS. This call is required to aovid @@ -193,6 +207,8 @@ _base_fault_reset_work(struct work_struct *work) return; /* don't rearm timer */ } + ioc->non_operational_loop = 0; + if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { rc = mpt2sas_base_hard_reset_handler(ioc, CAN_SLEEP, FORCE_BIG_HAMMER); @@ -4386,6 +4402,7 @@ mpt2sas_base_attach(struct MPT2SAS_ADAPTER *ioc) if (missing_delay[0] != -1 && missing_delay[1] != -1) _base_update_missing_delay(ioc, missing_delay[0], missing_delay[1]); + ioc->non_operational_loop = 0; return 0; diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h index 543d8d637479..c6ee7aad7501 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.h +++ b/drivers/scsi/mpt2sas/mpt2sas_base.h @@ -835,6 +835,7 @@ struct MPT2SAS_ADAPTER { u16 cpu_msix_table_sz; u32 ioc_reset_count; MPT2SAS_FLUSH_RUNNING_CMDS schedule_dead_ioc_flush_running_cmds; + u32 non_operational_loop; /* internal commands, callback index */ u8 scsi_io_cb_idx; diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 04f8010f0770..18360032a520 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -42,7 +42,6 @@ * USA. */ -#include <linux/version.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/errno.h> @@ -1310,7 +1309,6 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ADAPTER *ioc, void *sg_local, *chain; u32 chain_offset; u32 chain_length; - u32 chain_flags; int sges_left; u32 sges_in_segment; u8 simple_sgl_flags; @@ -1356,8 +1354,7 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ADAPTER *ioc, sges_in_segment--; } - /* initializing the chain flags and pointers */ - chain_flags = MPI2_SGE_FLAGS_CHAIN_ELEMENT << MPI2_SGE_FLAGS_SHIFT; + /* initializing the pointers */ chain_req = _base_get_chain_buffer_tracker(ioc, smid); if (!chain_req) return -1; diff --git a/drivers/scsi/mpt3sas/mpt3sas_config.c b/drivers/scsi/mpt3sas/mpt3sas_config.c index ce7e59b2fc08..1df9ed4f371d 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_config.c +++ b/drivers/scsi/mpt3sas/mpt3sas_config.c @@ -41,7 +41,6 @@ * USA. */ -#include <linux/version.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index 8af944d7d13d..054d5231c974 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -42,7 +42,6 @@ * USA. */ -#include <linux/version.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/errno.h> @@ -3136,7 +3135,7 @@ _ctl_diag_trigger_mpi_store(struct device *cdev, spin_lock_irqsave(&ioc->diag_trigger_lock, flags); sz = min(sizeof(struct SL_WH_MPI_TRIGGERS_T), count); memset(&ioc->diag_trigger_mpi, 0, - sizeof(struct SL_WH_EVENT_TRIGGERS_T)); + sizeof(ioc->diag_trigger_mpi)); memcpy(&ioc->diag_trigger_mpi, buf, sz); if (ioc->diag_trigger_mpi.ValidEntries > NUM_VALID_ENTRIES) ioc->diag_trigger_mpi.ValidEntries = NUM_VALID_ENTRIES; diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 6421a06c4ce2..dcbf7c880cb2 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -41,7 +41,6 @@ * USA. */ -#include <linux/version.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -2755,13 +2754,11 @@ _scsih_block_io_to_children_attached_directly(struct MPT3SAS_ADAPTER *ioc, int i; u16 handle; u16 reason_code; - u8 phy_number; for (i = 0; i < event_data->NumEntries; i++) { handle = le16_to_cpu(event_data->PHY[i].AttachedDevHandle); if (!handle) continue; - phy_number = event_data->StartPhyNum + i; reason_code = event_data->PHY[i].PhyStatus & MPI2_EVENT_SAS_TOPO_RC_MASK; if (reason_code == MPI2_EVENT_SAS_TOPO_RC_DELAY_NOT_RESPONDING) diff --git a/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c b/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c index da6c5f25749c..6f8d6213040b 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c +++ b/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c @@ -42,7 +42,6 @@ * USA. */ -#include <linux/version.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/errno.h> diff --git a/drivers/scsi/qla4xxx/ql4_83xx.c b/drivers/scsi/qla4xxx/ql4_83xx.c index 6e9af20be12f..5d8fe4f75650 100644 --- a/drivers/scsi/qla4xxx/ql4_83xx.c +++ b/drivers/scsi/qla4xxx/ql4_83xx.c @@ -538,7 +538,7 @@ struct device_info { int port_num; }; -static int qla4_83xx_can_perform_reset(struct scsi_qla_host *ha) +int qla4_83xx_can_perform_reset(struct scsi_qla_host *ha) { uint32_t drv_active; uint32_t dev_part, dev_part1, dev_part2; @@ -1351,31 +1351,58 @@ exit_start_fw: /*----------------------Interrupt Related functions ---------------------*/ -void qla4_83xx_disable_intrs(struct scsi_qla_host *ha) +static void qla4_83xx_disable_iocb_intrs(struct scsi_qla_host *ha) +{ + if (test_and_clear_bit(AF_83XX_IOCB_INTR_ON, &ha->flags)) + qla4_8xxx_intr_disable(ha); +} + +static void qla4_83xx_disable_mbox_intrs(struct scsi_qla_host *ha) { uint32_t mb_int, ret; - if (test_and_clear_bit(AF_INTERRUPTS_ON, &ha->flags)) - qla4_8xxx_mbx_intr_disable(ha); + if (test_and_clear_bit(AF_83XX_MBOX_INTR_ON, &ha->flags)) { + ret = readl(&ha->qla4_83xx_reg->mbox_int); + mb_int = ret & ~INT_ENABLE_FW_MB; + writel(mb_int, &ha->qla4_83xx_reg->mbox_int); + writel(1, &ha->qla4_83xx_reg->leg_int_mask); + } +} - ret = readl(&ha->qla4_83xx_reg->mbox_int); - mb_int = ret & ~INT_ENABLE_FW_MB; - writel(mb_int, &ha->qla4_83xx_reg->mbox_int); - writel(1, &ha->qla4_83xx_reg->leg_int_mask); +void qla4_83xx_disable_intrs(struct scsi_qla_host *ha) +{ + qla4_83xx_disable_mbox_intrs(ha); + qla4_83xx_disable_iocb_intrs(ha); } -void qla4_83xx_enable_intrs(struct scsi_qla_host *ha) +static void qla4_83xx_enable_iocb_intrs(struct scsi_qla_host *ha) +{ + if (!test_bit(AF_83XX_IOCB_INTR_ON, &ha->flags)) { + qla4_8xxx_intr_enable(ha); + set_bit(AF_83XX_IOCB_INTR_ON, &ha->flags); + } +} + +void qla4_83xx_enable_mbox_intrs(struct scsi_qla_host *ha) { uint32_t mb_int; - qla4_8xxx_mbx_intr_enable(ha); - mb_int = INT_ENABLE_FW_MB; - writel(mb_int, &ha->qla4_83xx_reg->mbox_int); - writel(0, &ha->qla4_83xx_reg->leg_int_mask); + if (!test_bit(AF_83XX_MBOX_INTR_ON, &ha->flags)) { + mb_int = INT_ENABLE_FW_MB; + writel(mb_int, &ha->qla4_83xx_reg->mbox_int); + writel(0, &ha->qla4_83xx_reg->leg_int_mask); + set_bit(AF_83XX_MBOX_INTR_ON, &ha->flags); + } +} - set_bit(AF_INTERRUPTS_ON, &ha->flags); + +void qla4_83xx_enable_intrs(struct scsi_qla_host *ha) +{ + qla4_83xx_enable_mbox_intrs(ha); + qla4_83xx_enable_iocb_intrs(ha); } + void qla4_83xx_queue_mbox_cmd(struct scsi_qla_host *ha, uint32_t *mbx_cmd, int incount) { diff --git a/drivers/scsi/qla4xxx/ql4_attr.c b/drivers/scsi/qla4xxx/ql4_attr.c index 76819b71ada7..19ee55a6226c 100644 --- a/drivers/scsi/qla4xxx/ql4_attr.c +++ b/drivers/scsi/qla4xxx/ql4_attr.c @@ -74,16 +74,22 @@ qla4_8xxx_sysfs_write_fw_dump(struct file *filep, struct kobject *kobj, } break; case 2: - /* Reset HBA */ + /* Reset HBA and collect FW dump */ ha->isp_ops->idc_lock(ha); dev_state = qla4_8xxx_rd_direct(ha, QLA8XXX_CRB_DEV_STATE); if (dev_state == QLA8XXX_DEV_READY) { - ql4_printk(KERN_INFO, ha, - "%s: Setting Need reset, reset_owner is 0x%x.\n", - __func__, ha->func_num); + ql4_printk(KERN_INFO, ha, "%s: Setting Need reset\n", + __func__); qla4_8xxx_wr_direct(ha, QLA8XXX_CRB_DEV_STATE, QLA8XXX_DEV_NEED_RESET); - set_bit(AF_8XXX_RST_OWNER, &ha->flags); + if (is_qla8022(ha) || + (is_qla8032(ha) && + qla4_83xx_can_perform_reset(ha))) { + set_bit(AF_8XXX_RST_OWNER, &ha->flags); + set_bit(AF_FW_RECOVERY, &ha->flags); + ql4_printk(KERN_INFO, ha, "%s: Reset owner is 0x%x\n", + __func__, ha->func_num); + } } else ql4_printk(KERN_INFO, ha, "%s: Reset not performed as device state is 0x%x\n", diff --git a/drivers/scsi/qla4xxx/ql4_def.h b/drivers/scsi/qla4xxx/ql4_def.h index 329d553eae94..129f5dd02822 100644 --- a/drivers/scsi/qla4xxx/ql4_def.h +++ b/drivers/scsi/qla4xxx/ql4_def.h @@ -136,6 +136,7 @@ #define RESPONSE_QUEUE_DEPTH 64 #define QUEUE_SIZE 64 #define DMA_BUFFER_SIZE 512 +#define IOCB_HIWAT_CUSHION 4 /* * Misc @@ -180,6 +181,7 @@ #define DISABLE_ACB_TOV 30 #define IP_CONFIG_TOV 30 #define LOGIN_TOV 12 +#define BOOT_LOGIN_RESP_TOV 60 #define MAX_RESET_HA_RETRIES 2 #define FW_ALIVE_WAIT_TOV 3 @@ -314,6 +316,7 @@ struct ql4_tuple_ddb { * DDB flags. */ #define DF_RELOGIN 0 /* Relogin to device */ +#define DF_BOOT_TGT 1 /* Boot target entry */ #define DF_ISNS_DISCOVERED 2 /* Device was discovered via iSNS */ #define DF_FO_MASKED 3 @@ -501,6 +504,7 @@ struct scsi_qla_host { #define AF_INTERRUPTS_ON 6 /* 0x00000040 */ #define AF_GET_CRASH_RECORD 7 /* 0x00000080 */ #define AF_LINK_UP 8 /* 0x00000100 */ +#define AF_LOOPBACK 9 /* 0x00000200 */ #define AF_IRQ_ATTACHED 10 /* 0x00000400 */ #define AF_DISABLE_ACB_COMPLETE 11 /* 0x00000800 */ #define AF_HA_REMOVAL 12 /* 0x00001000 */ @@ -516,6 +520,8 @@ struct scsi_qla_host { #define AF_8XXX_RST_OWNER 25 /* 0x02000000 */ #define AF_82XX_DUMP_READING 26 /* 0x04000000 */ #define AF_83XX_NO_FW_DUMP 27 /* 0x08000000 */ +#define AF_83XX_IOCB_INTR_ON 28 /* 0x10000000 */ +#define AF_83XX_MBOX_INTR_ON 29 /* 0x20000000 */ unsigned long dpc_flags; @@ -537,6 +543,7 @@ struct scsi_qla_host { uint32_t tot_ddbs; uint16_t iocb_cnt; + uint16_t iocb_hiwat; /* SRB cache. */ #define SRB_MIN_REQ 128 @@ -838,7 +845,8 @@ static inline int is_aer_supported(struct scsi_qla_host *ha) static inline int adapter_up(struct scsi_qla_host *ha) { return (test_bit(AF_ONLINE, &ha->flags) != 0) && - (test_bit(AF_LINK_UP, &ha->flags) != 0); + (test_bit(AF_LINK_UP, &ha->flags) != 0) && + (!test_bit(AF_LOOPBACK, &ha->flags)); } static inline struct scsi_qla_host* to_qla_host(struct Scsi_Host *shost) diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h index 1c4795020357..ad9d2e2d370f 100644 --- a/drivers/scsi/qla4xxx/ql4_fw.h +++ b/drivers/scsi/qla4xxx/ql4_fw.h @@ -495,7 +495,7 @@ struct qla_flt_region { #define MBOX_ASTS_IPV6_LCL_PREFIX_IGNORED 0x802D #define MBOX_ASTS_ICMPV6_ERROR_MSG_RCVD 0x802E #define MBOX_ASTS_IDC_COMPLETE 0x8100 -#define MBOX_ASTS_IDC_NOTIFY 0x8101 +#define MBOX_ASTS_IDC_REQUEST_NOTIFICATION 0x8101 #define MBOX_ASTS_TXSCVR_INSERTED 0x8130 #define MBOX_ASTS_TXSCVR_REMOVED 0x8131 @@ -522,6 +522,10 @@ struct qla_flt_region { #define FLASH_OPT_COMMIT 2 #define FLASH_OPT_RMW_COMMIT 3 +/* Loopback type */ +#define ENABLE_INTERNAL_LOOPBACK 0x04 +#define ENABLE_EXTERNAL_LOOPBACK 0x08 + /*************************************************************************/ /* Host Adapter Initialization Control Block (from host) */ diff --git a/drivers/scsi/qla4xxx/ql4_glbl.h b/drivers/scsi/qla4xxx/ql4_glbl.h index 57a5a3cf5770..982293edf02c 100644 --- a/drivers/scsi/qla4xxx/ql4_glbl.h +++ b/drivers/scsi/qla4xxx/ql4_glbl.h @@ -253,12 +253,14 @@ void qla4_8xxx_set_rst_ready(struct scsi_qla_host *ha); void qla4_8xxx_clear_rst_ready(struct scsi_qla_host *ha); int qla4_8xxx_device_bootstrap(struct scsi_qla_host *ha); void qla4_8xxx_get_minidump(struct scsi_qla_host *ha); -int qla4_8xxx_mbx_intr_disable(struct scsi_qla_host *ha); -int qla4_8xxx_mbx_intr_enable(struct scsi_qla_host *ha); +int qla4_8xxx_intr_disable(struct scsi_qla_host *ha); +int qla4_8xxx_intr_enable(struct scsi_qla_host *ha); int qla4_8xxx_set_param(struct scsi_qla_host *ha, int param); int qla4_8xxx_update_idc_reg(struct scsi_qla_host *ha); int qla4_83xx_post_idc_ack(struct scsi_qla_host *ha); void qla4_83xx_disable_pause(struct scsi_qla_host *ha); +void qla4_83xx_enable_mbox_intrs(struct scsi_qla_host *ha); +int qla4_83xx_can_perform_reset(struct scsi_qla_host *ha); extern int ql4xextended_error_logging; extern int ql4xdontresethba; diff --git a/drivers/scsi/qla4xxx/ql4_init.c b/drivers/scsi/qla4xxx/ql4_init.c index 1aca1b4f70b8..8fc8548ba4ba 100644 --- a/drivers/scsi/qla4xxx/ql4_init.c +++ b/drivers/scsi/qla4xxx/ql4_init.c @@ -195,12 +195,10 @@ exit_get_sys_info_no_free: * @ha: pointer to host adapter structure. * **/ -static int qla4xxx_init_local_data(struct scsi_qla_host *ha) +static void qla4xxx_init_local_data(struct scsi_qla_host *ha) { /* Initialize aen queue */ ha->aen_q_count = MAX_AEN_ENTRIES; - - return qla4xxx_get_firmware_status(ha); } static uint8_t @@ -935,14 +933,23 @@ int qla4xxx_initialize_adapter(struct scsi_qla_host *ha, int is_reset) if (ha->isp_ops->start_firmware(ha) == QLA_ERROR) goto exit_init_hba; + /* + * For ISP83XX, mailbox and IOCB interrupts are enabled separately. + * Mailbox interrupts must be enabled prior to issuing any mailbox + * command in order to prevent the possibility of losing interrupts + * while switching from polling to interrupt mode. IOCB interrupts are + * enabled via isp_ops->enable_intrs. + */ + if (is_qla8032(ha)) + qla4_83xx_enable_mbox_intrs(ha); + if (qla4xxx_about_firmware(ha) == QLA_ERROR) goto exit_init_hba; if (ha->isp_ops->get_sys_info(ha) == QLA_ERROR) goto exit_init_hba; - if (qla4xxx_init_local_data(ha) == QLA_ERROR) - goto exit_init_hba; + qla4xxx_init_local_data(ha); status = qla4xxx_init_firmware(ha); if (status == QLA_ERROR) diff --git a/drivers/scsi/qla4xxx/ql4_iocb.c b/drivers/scsi/qla4xxx/ql4_iocb.c index f48f37a281d1..14fec976f634 100644 --- a/drivers/scsi/qla4xxx/ql4_iocb.c +++ b/drivers/scsi/qla4xxx/ql4_iocb.c @@ -316,7 +316,7 @@ int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb) goto queuing_error; /* total iocbs active */ - if ((ha->iocb_cnt + req_cnt) >= REQUEST_QUEUE_DEPTH) + if ((ha->iocb_cnt + req_cnt) >= ha->iocb_hiwat) goto queuing_error; /* Build command packet */ diff --git a/drivers/scsi/qla4xxx/ql4_isr.c b/drivers/scsi/qla4xxx/ql4_isr.c index 15ea81465ce4..1b83dc283d2e 100644 --- a/drivers/scsi/qla4xxx/ql4_isr.c +++ b/drivers/scsi/qla4xxx/ql4_isr.c @@ -582,6 +582,33 @@ exit_prq_error: } /** + * qla4_83xx_loopback_in_progress: Is loopback in progress? + * @ha: Pointer to host adapter structure. + * @ret: 1 = loopback in progress, 0 = loopback not in progress + **/ +static int qla4_83xx_loopback_in_progress(struct scsi_qla_host *ha) +{ + int rval = 1; + + if (is_qla8032(ha)) { + if ((ha->idc_info.info2 & ENABLE_INTERNAL_LOOPBACK) || + (ha->idc_info.info2 & ENABLE_EXTERNAL_LOOPBACK)) { + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: Loopback diagnostics in progress\n", + __func__)); + rval = 1; + } else { + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: Loopback diagnostics not in progress\n", + __func__)); + rval = 0; + } + } + + return rval; +} + +/** * qla4xxx_isr_decode_mailbox - decodes mailbox status * @ha: Pointer to host adapter structure. * @mailbox_status: Mailbox status. @@ -676,8 +703,10 @@ static void qla4xxx_isr_decode_mailbox(struct scsi_qla_host * ha, case MBOX_ASTS_LINK_DOWN: clear_bit(AF_LINK_UP, &ha->flags); - if (test_bit(AF_INIT_DONE, &ha->flags)) + if (test_bit(AF_INIT_DONE, &ha->flags)) { set_bit(DPC_LINK_CHANGED, &ha->dpc_flags); + qla4xxx_wake_dpc(ha); + } ql4_printk(KERN_INFO, ha, "%s: LINK DOWN\n", __func__); qla4xxx_post_aen_work(ha, ISCSI_EVENT_LINKDOWN, @@ -806,7 +835,7 @@ static void qla4xxx_isr_decode_mailbox(struct scsi_qla_host * ha, " removed\n", ha->host_no, mbox_sts[0])); break; - case MBOX_ASTS_IDC_NOTIFY: + case MBOX_ASTS_IDC_REQUEST_NOTIFICATION: { uint32_t opcode; if (is_qla8032(ha)) { @@ -840,6 +869,11 @@ static void qla4xxx_isr_decode_mailbox(struct scsi_qla_host * ha, DEBUG2(ql4_printk(KERN_INFO, ha, "scsi:%ld: AEN %04x IDC Complete notification\n", ha->host_no, mbox_sts[0])); + + if (qla4_83xx_loopback_in_progress(ha)) + set_bit(AF_LOOPBACK, &ha->flags); + else + clear_bit(AF_LOOPBACK, &ha->flags); } break; @@ -1124,17 +1158,18 @@ irqreturn_t qla4_83xx_intr_handler(int irq, void *dev_id) /* Legacy interrupt is valid if bit31 of leg_int_ptr is set */ if (!(leg_int_ptr & LEG_INT_PTR_B31)) { - ql4_printk(KERN_ERR, ha, - "%s: Legacy Interrupt Bit 31 not set, spurious interrupt!\n", - __func__); + DEBUG2(ql4_printk(KERN_ERR, ha, + "%s: Legacy Interrupt Bit 31 not set, spurious interrupt!\n", + __func__)); return IRQ_NONE; } /* Validate the PCIE function ID set in leg_int_ptr bits [19..16] */ if ((leg_int_ptr & PF_BITS_MASK) != ha->pf_bit) { - ql4_printk(KERN_ERR, ha, - "%s: Incorrect function ID 0x%x in legacy interrupt register, ha->pf_bit = 0x%x\n", - __func__, (leg_int_ptr & PF_BITS_MASK), ha->pf_bit); + DEBUG2(ql4_printk(KERN_ERR, ha, + "%s: Incorrect function ID 0x%x in legacy interrupt register, ha->pf_bit = 0x%x\n", + __func__, (leg_int_ptr & PF_BITS_MASK), + ha->pf_bit)); return IRQ_NONE; } @@ -1437,11 +1472,14 @@ irq_not_attached: void qla4xxx_free_irqs(struct scsi_qla_host *ha) { - if (test_bit(AF_MSIX_ENABLED, &ha->flags)) - qla4_8xxx_disable_msix(ha); - else if (test_and_clear_bit(AF_MSI_ENABLED, &ha->flags)) { - free_irq(ha->pdev->irq, ha); - pci_disable_msi(ha->pdev); - } else if (test_and_clear_bit(AF_INTx_ENABLED, &ha->flags)) - free_irq(ha->pdev->irq, ha); + if (test_and_clear_bit(AF_IRQ_ATTACHED, &ha->flags)) { + if (test_bit(AF_MSIX_ENABLED, &ha->flags)) { + qla4_8xxx_disable_msix(ha); + } else if (test_and_clear_bit(AF_MSI_ENABLED, &ha->flags)) { + free_irq(ha->pdev->irq, ha); + pci_disable_msi(ha->pdev); + } else if (test_and_clear_bit(AF_INTx_ENABLED, &ha->flags)) { + free_irq(ha->pdev->irq, ha); + } + } } diff --git a/drivers/scsi/qla4xxx/ql4_mbx.c b/drivers/scsi/qla4xxx/ql4_mbx.c index 3d41034191f0..81e738d61ec0 100644 --- a/drivers/scsi/qla4xxx/ql4_mbx.c +++ b/drivers/scsi/qla4xxx/ql4_mbx.c @@ -44,6 +44,30 @@ void qla4xxx_process_mbox_intr(struct scsi_qla_host *ha, int out_count) } /** + * qla4xxx_is_intr_poll_mode – Are we allowed to poll for interrupts? + * @ha: Pointer to host adapter structure. + * @ret: 1=polling mode, 0=non-polling mode + **/ +static int qla4xxx_is_intr_poll_mode(struct scsi_qla_host *ha) +{ + int rval = 1; + + if (is_qla8032(ha)) { + if (test_bit(AF_IRQ_ATTACHED, &ha->flags) && + test_bit(AF_83XX_MBOX_INTR_ON, &ha->flags)) + rval = 0; + } else { + if (test_bit(AF_IRQ_ATTACHED, &ha->flags) && + test_bit(AF_INTERRUPTS_ON, &ha->flags) && + test_bit(AF_ONLINE, &ha->flags) && + !test_bit(AF_HA_REMOVAL, &ha->flags)) + rval = 0; + } + + return rval; +} + +/** * qla4xxx_mailbox_command - issues mailbox commands * @ha: Pointer to host adapter structure. * @inCount: number of mailbox registers to load. @@ -153,33 +177,28 @@ int qla4xxx_mailbox_command(struct scsi_qla_host *ha, uint8_t inCount, /* * Wait for completion: Poll or completion queue */ - if (test_bit(AF_IRQ_ATTACHED, &ha->flags) && - test_bit(AF_INTERRUPTS_ON, &ha->flags) && - test_bit(AF_ONLINE, &ha->flags) && - !test_bit(AF_HA_REMOVAL, &ha->flags)) { - /* Do not poll for completion. Use completion queue */ - set_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags); - wait_for_completion_timeout(&ha->mbx_intr_comp, MBOX_TOV * HZ); - clear_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags); - } else { + if (qla4xxx_is_intr_poll_mode(ha)) { /* Poll for command to complete */ wait_count = jiffies + MBOX_TOV * HZ; while (test_bit(AF_MBOX_COMMAND_DONE, &ha->flags) == 0) { if (time_after_eq(jiffies, wait_count)) break; - /* * Service the interrupt. * The ISR will save the mailbox status registers * to a temporary storage location in the adapter * structure. */ - spin_lock_irqsave(&ha->hardware_lock, flags); ha->isp_ops->process_mailbox_interrupt(ha, outCount); spin_unlock_irqrestore(&ha->hardware_lock, flags); msleep(10); } + } else { + /* Do not poll for completion. Use completion queue */ + set_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags); + wait_for_completion_timeout(&ha->mbx_intr_comp, MBOX_TOV * HZ); + clear_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags); } /* Check for mailbox timeout. */ @@ -678,8 +697,24 @@ int qla4xxx_get_firmware_status(struct scsi_qla_host * ha) return QLA_ERROR; } - ql4_printk(KERN_INFO, ha, "%ld firmware IOCBs available (%d).\n", - ha->host_no, mbox_sts[2]); + /* High-water mark of IOCBs */ + ha->iocb_hiwat = mbox_sts[2]; + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: firmware IOCBs available = %d\n", __func__, + ha->iocb_hiwat)); + + if (ha->iocb_hiwat > IOCB_HIWAT_CUSHION) + ha->iocb_hiwat -= IOCB_HIWAT_CUSHION; + + /* Ideally, we should not enter this code, as the # of firmware + * IOCBs is hard-coded in the firmware. We set a default + * iocb_hiwat here just in case */ + if (ha->iocb_hiwat == 0) { + ha->iocb_hiwat = REQUEST_QUEUE_DEPTH / 4; + DEBUG2(ql4_printk(KERN_WARNING, ha, + "%s: Setting IOCB's to = %d\n", __func__, + ha->iocb_hiwat)); + } return QLA_SUCCESS; } diff --git a/drivers/scsi/qla4xxx/ql4_nx.c b/drivers/scsi/qla4xxx/ql4_nx.c index 499a92db1cf6..71d3d234f526 100644 --- a/drivers/scsi/qla4xxx/ql4_nx.c +++ b/drivers/scsi/qla4xxx/ql4_nx.c @@ -2986,7 +2986,7 @@ int qla4_8xxx_load_risc(struct scsi_qla_host *ha) retval = qla4_8xxx_device_state_handler(ha); - if (retval == QLA_SUCCESS && !test_bit(AF_INIT_DONE, &ha->flags)) + if (retval == QLA_SUCCESS && !test_bit(AF_IRQ_ATTACHED, &ha->flags)) retval = qla4xxx_request_irqs(ha); return retval; @@ -3427,11 +3427,11 @@ int qla4_8xxx_get_sys_info(struct scsi_qla_host *ha) } /* Make sure we receive the minimum required data to cache internally */ - if (mbox_sts[4] < offsetof(struct mbx_sys_info, reserved)) { + if ((is_qla8032(ha) ? mbox_sts[3] : mbox_sts[4]) < + offsetof(struct mbx_sys_info, reserved)) { DEBUG2(printk("scsi%ld: %s: GET_SYS_INFO data receive" " error (%x)\n", ha->host_no, __func__, mbox_sts[4])); goto exit_validate_mac82; - } /* Save M.A.C. address & serial_number */ @@ -3463,7 +3463,7 @@ exit_validate_mac82: /* Interrupt handling helpers. */ -int qla4_8xxx_mbx_intr_enable(struct scsi_qla_host *ha) +int qla4_8xxx_intr_enable(struct scsi_qla_host *ha) { uint32_t mbox_cmd[MBOX_REG_COUNT]; uint32_t mbox_sts[MBOX_REG_COUNT]; @@ -3484,7 +3484,7 @@ int qla4_8xxx_mbx_intr_enable(struct scsi_qla_host *ha) return QLA_SUCCESS; } -int qla4_8xxx_mbx_intr_disable(struct scsi_qla_host *ha) +int qla4_8xxx_intr_disable(struct scsi_qla_host *ha) { uint32_t mbox_cmd[MBOX_REG_COUNT]; uint32_t mbox_sts[MBOX_REG_COUNT]; @@ -3509,7 +3509,7 @@ int qla4_8xxx_mbx_intr_disable(struct scsi_qla_host *ha) void qla4_82xx_enable_intrs(struct scsi_qla_host *ha) { - qla4_8xxx_mbx_intr_enable(ha); + qla4_8xxx_intr_enable(ha); spin_lock_irq(&ha->hardware_lock); /* BIT 10 - reset */ @@ -3522,7 +3522,7 @@ void qla4_82xx_disable_intrs(struct scsi_qla_host *ha) { if (test_and_clear_bit(AF_INTERRUPTS_ON, &ha->flags)) - qla4_8xxx_mbx_intr_disable(ha); + qla4_8xxx_intr_disable(ha); spin_lock_irq(&ha->hardware_lock); /* BIT 10 - set */ diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 4cec123a6a6a..6142729167f4 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -1337,18 +1337,18 @@ static int qla4xxx_session_get_param(struct iscsi_cls_session *cls_sess, sess->password_in, BIDI_CHAP, &idx); if (rval) - return -EINVAL; - - len = sprintf(buf, "%hu\n", idx); + len = sprintf(buf, "\n"); + else + len = sprintf(buf, "%hu\n", idx); break; case ISCSI_PARAM_CHAP_OUT_IDX: rval = qla4xxx_get_chap_index(ha, sess->username, sess->password, LOCAL_CHAP, &idx); if (rval) - return -EINVAL; - - len = sprintf(buf, "%hu\n", idx); + len = sprintf(buf, "\n"); + else + len = sprintf(buf, "%hu\n", idx); break; default: return iscsi_session_get_param(cls_sess, param, buf); @@ -2242,6 +2242,7 @@ static int qla4xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) test_bit(DPC_HA_NEED_QUIESCENT, &ha->dpc_flags) || !test_bit(AF_ONLINE, &ha->flags) || !test_bit(AF_LINK_UP, &ha->flags) || + test_bit(AF_LOOPBACK, &ha->flags) || test_bit(DPC_RESET_HA_FW_CONTEXT, &ha->dpc_flags)) goto qc_host_busy; @@ -2978,6 +2979,7 @@ static int qla4xxx_recover_adapter(struct scsi_qla_host *ha) if (status == QLA_SUCCESS) { if (!test_bit(AF_FW_RECOVERY, &ha->flags)) qla4xxx_cmd_wait(ha); + ha->isp_ops->disable_intrs(ha); qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS); qla4xxx_abort_active_cmds(ha, DID_RESET << 16); @@ -3479,7 +3481,8 @@ dpc_post_reset_ha: } /* ---- link change? --- */ - if (test_and_clear_bit(DPC_LINK_CHANGED, &ha->dpc_flags)) { + if (!test_bit(AF_LOOPBACK, &ha->flags) && + test_and_clear_bit(DPC_LINK_CHANGED, &ha->dpc_flags)) { if (!test_bit(AF_LINK_UP, &ha->flags)) { /* ---- link down? --- */ qla4xxx_mark_all_devices_missing(ha); @@ -3508,10 +3511,8 @@ static void qla4xxx_free_adapter(struct scsi_qla_host *ha) { qla4xxx_abort_active_cmds(ha, DID_NO_CONNECT << 16); - if (test_bit(AF_INTERRUPTS_ON, &ha->flags)) { - /* Turn-off interrupts on the card. */ - ha->isp_ops->disable_intrs(ha); - } + /* Turn-off interrupts on the card. */ + ha->isp_ops->disable_intrs(ha); if (is_qla40XX(ha)) { writel(set_rmask(CSR_SCSI_PROCESSOR_INTR), @@ -3547,8 +3548,7 @@ static void qla4xxx_free_adapter(struct scsi_qla_host *ha) } /* Detach interrupts */ - if (test_and_clear_bit(AF_IRQ_ATTACHED, &ha->flags)) - qla4xxx_free_irqs(ha); + qla4xxx_free_irqs(ha); /* free extra memory */ qla4xxx_mem_free(ha); @@ -4687,7 +4687,8 @@ static struct iscsi_endpoint *qla4xxx_get_ep_fwdb(struct scsi_qla_host *ha, struct iscsi_endpoint *ep; struct sockaddr_in *addr; struct sockaddr_in6 *addr6; - struct sockaddr *dst_addr; + struct sockaddr *t_addr; + struct sockaddr_storage *dst_addr; char *ip; /* TODO: need to destroy on unload iscsi_endpoint*/ @@ -4696,21 +4697,23 @@ static struct iscsi_endpoint *qla4xxx_get_ep_fwdb(struct scsi_qla_host *ha, return NULL; if (fw_ddb_entry->options & DDB_OPT_IPV6_DEVICE) { - dst_addr->sa_family = AF_INET6; + t_addr = (struct sockaddr *)dst_addr; + t_addr->sa_family = AF_INET6; addr6 = (struct sockaddr_in6 *)dst_addr; ip = (char *)&addr6->sin6_addr; memcpy(ip, fw_ddb_entry->ip_addr, IPv6_ADDR_LEN); addr6->sin6_port = htons(le16_to_cpu(fw_ddb_entry->port)); } else { - dst_addr->sa_family = AF_INET; + t_addr = (struct sockaddr *)dst_addr; + t_addr->sa_family = AF_INET; addr = (struct sockaddr_in *)dst_addr; ip = (char *)&addr->sin_addr; memcpy(ip, fw_ddb_entry->ip_addr, IP_ADDR_LEN); addr->sin_port = htons(le16_to_cpu(fw_ddb_entry->port)); } - ep = qla4xxx_ep_connect(ha->host, dst_addr, 0); + ep = qla4xxx_ep_connect(ha->host, (struct sockaddr *)dst_addr, 0); vfree(dst_addr); return ep; } @@ -4725,7 +4728,8 @@ static int qla4xxx_verify_boot_idx(struct scsi_qla_host *ha, uint16_t idx) } static void qla4xxx_setup_flash_ddb_entry(struct scsi_qla_host *ha, - struct ddb_entry *ddb_entry) + struct ddb_entry *ddb_entry, + uint16_t idx) { uint16_t def_timeout; @@ -4745,6 +4749,10 @@ static void qla4xxx_setup_flash_ddb_entry(struct scsi_qla_host *ha, def_timeout : LOGIN_TOV; ddb_entry->default_time2wait = le16_to_cpu(ddb_entry->fw_ddb_entry.iscsi_def_time2wait); + + if (ql4xdisablesysfsboot && + (idx == ha->pri_ddb_idx || idx == ha->sec_ddb_idx)) + set_bit(DF_BOOT_TGT, &ddb_entry->flags); } static void qla4xxx_wait_for_ip_configuration(struct scsi_qla_host *ha) @@ -4881,7 +4889,7 @@ static void qla4xxx_remove_failed_ddb(struct scsi_qla_host *ha, static int qla4xxx_sess_conn_setup(struct scsi_qla_host *ha, struct dev_db_entry *fw_ddb_entry, - int is_reset) + int is_reset, uint16_t idx) { struct iscsi_cls_session *cls_sess; struct iscsi_session *sess; @@ -4919,7 +4927,7 @@ static int qla4xxx_sess_conn_setup(struct scsi_qla_host *ha, memcpy(&ddb_entry->fw_ddb_entry, fw_ddb_entry, sizeof(struct dev_db_entry)); - qla4xxx_setup_flash_ddb_entry(ha, ddb_entry); + qla4xxx_setup_flash_ddb_entry(ha, ddb_entry, idx); cls_conn = iscsi_conn_setup(cls_sess, sizeof(struct qla_conn), conn_id); @@ -5036,7 +5044,7 @@ static void qla4xxx_build_nt_list(struct scsi_qla_host *ha, goto continue_next_nt; } - ret = qla4xxx_sess_conn_setup(ha, fw_ddb_entry, is_reset); + ret = qla4xxx_sess_conn_setup(ha, fw_ddb_entry, is_reset, idx); if (ret == QLA_ERROR) goto exit_nt_list; @@ -5116,6 +5124,78 @@ void qla4xxx_build_ddb_list(struct scsi_qla_host *ha, int is_reset) } /** + * qla4xxx_wait_login_resp_boot_tgt - Wait for iSCSI boot target login + * response. + * @ha: pointer to adapter structure + * + * When the boot entry is normal iSCSI target then DF_BOOT_TGT flag will be + * set in DDB and we will wait for login response of boot targets during + * probe. + **/ +static void qla4xxx_wait_login_resp_boot_tgt(struct scsi_qla_host *ha) +{ + struct ddb_entry *ddb_entry; + struct dev_db_entry *fw_ddb_entry = NULL; + dma_addr_t fw_ddb_entry_dma; + unsigned long wtime; + uint32_t ddb_state; + int max_ddbs, idx, ret; + + max_ddbs = is_qla40XX(ha) ? MAX_DEV_DB_ENTRIES_40XX : + MAX_DEV_DB_ENTRIES; + + fw_ddb_entry = dma_alloc_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry), + &fw_ddb_entry_dma, GFP_KERNEL); + if (!fw_ddb_entry) { + ql4_printk(KERN_ERR, ha, + "%s: Unable to allocate dma buffer\n", __func__); + goto exit_login_resp; + } + + wtime = jiffies + (HZ * BOOT_LOGIN_RESP_TOV); + + for (idx = 0; idx < max_ddbs; idx++) { + ddb_entry = qla4xxx_lookup_ddb_by_fw_index(ha, idx); + if (ddb_entry == NULL) + continue; + + if (test_bit(DF_BOOT_TGT, &ddb_entry->flags)) { + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: DDB index [%d]\n", __func__, + ddb_entry->fw_ddb_index)); + do { + ret = qla4xxx_get_fwddb_entry(ha, + ddb_entry->fw_ddb_index, + fw_ddb_entry, fw_ddb_entry_dma, + NULL, NULL, &ddb_state, NULL, + NULL, NULL); + if (ret == QLA_ERROR) + goto exit_login_resp; + + if ((ddb_state == DDB_DS_SESSION_ACTIVE) || + (ddb_state == DDB_DS_SESSION_FAILED)) + break; + + schedule_timeout_uninterruptible(HZ); + + } while ((time_after(wtime, jiffies))); + + if (!time_after(wtime, jiffies)) { + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: Login response wait timer expired\n", + __func__)); + goto exit_login_resp; + } + } + } + +exit_login_resp: + if (fw_ddb_entry) + dma_free_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry), + fw_ddb_entry, fw_ddb_entry_dma); +} + +/** * qla4xxx_probe_adapter - callback function to probe HBA * @pdev: pointer to pci_dev structure * @pci_device_id: pointer to pci_device entry @@ -5270,7 +5350,7 @@ static int qla4xxx_probe_adapter(struct pci_dev *pdev, if (is_qla80XX(ha)) { ha->isp_ops->idc_lock(ha); dev_state = qla4_8xxx_rd_direct(ha, - QLA82XX_CRB_DEV_STATE); + QLA8XXX_CRB_DEV_STATE); ha->isp_ops->idc_unlock(ha); if (dev_state == QLA8XXX_DEV_FAILED) { ql4_printk(KERN_WARNING, ha, "%s: don't retry " @@ -5368,6 +5448,7 @@ skip_retry_init: /* Perform the build ddb list and login to each */ qla4xxx_build_ddb_list(ha, INIT_ADAPTER); iscsi_host_for_each_session(ha->host, qla4xxx_login_flash_ddb); + qla4xxx_wait_login_resp_boot_tgt(ha); qla4xxx_create_chap_list(ha); @@ -6008,14 +6089,6 @@ static int qla4xxx_host_reset(struct Scsi_Host *shost, int reset_type) goto exit_host_reset; } - rval = qla4xxx_wait_for_hba_online(ha); - if (rval != QLA_SUCCESS) { - DEBUG2(ql4_printk(KERN_INFO, ha, "%s: Unable to reset host " - "adapter\n", __func__)); - rval = -EIO; - goto exit_host_reset; - } - if (test_bit(DPC_RESET_HA, &ha->dpc_flags)) goto recover_adapter; @@ -6115,7 +6188,6 @@ qla4xxx_pci_mmio_enabled(struct pci_dev *pdev) static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha) { uint32_t rval = QLA_ERROR; - uint32_t ret = 0; int fn; struct pci_dev *other_pdev = NULL; @@ -6201,16 +6273,7 @@ static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha) qla4_8xxx_wr_direct(ha, QLA8XXX_CRB_DRV_STATE, 0); qla4_8xxx_set_drv_active(ha); ha->isp_ops->idc_unlock(ha); - ret = qla4xxx_request_irqs(ha); - if (ret) { - ql4_printk(KERN_WARNING, ha, "Failed to " - "reserve interrupt %d already in use.\n", - ha->pdev->irq); - rval = QLA_ERROR; - } else { - ha->isp_ops->enable_intrs(ha); - rval = QLA_SUCCESS; - } + ha->isp_ops->enable_intrs(ha); } } else { ql4_printk(KERN_INFO, ha, "scsi%ld: %s: devfn 0x%x is not " @@ -6220,18 +6283,9 @@ static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha) QLA8XXX_DEV_READY)) { clear_bit(AF_FW_RECOVERY, &ha->flags); rval = qla4xxx_initialize_adapter(ha, RESET_ADAPTER); - if (rval == QLA_SUCCESS) { - ret = qla4xxx_request_irqs(ha); - if (ret) { - ql4_printk(KERN_WARNING, ha, "Failed to" - " reserve interrupt %d already in" - " use.\n", ha->pdev->irq); - rval = QLA_ERROR; - } else { - ha->isp_ops->enable_intrs(ha); - rval = QLA_SUCCESS; - } - } + if (rval == QLA_SUCCESS) + ha->isp_ops->enable_intrs(ha); + ha->isp_ops->idc_lock(ha); qla4_8xxx_set_drv_active(ha); ha->isp_ops->idc_unlock(ha); diff --git a/drivers/scsi/qla4xxx/ql4_version.h b/drivers/scsi/qla4xxx/ql4_version.h index f6df2ea91ab5..6775a45af315 100644 --- a/drivers/scsi/qla4xxx/ql4_version.h +++ b/drivers/scsi/qla4xxx/ql4_version.h @@ -5,4 +5,4 @@ * See LICENSE.qla4xxx for copyright and licensing details. */ -#define QLA4XXX_DRIVER_VERSION "5.03.00-k1" +#define QLA4XXX_DRIVER_VERSION "5.03.00-k4" diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 59d427bf08e2..0a74b975efdf 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2503,6 +2503,15 @@ show_priv_session_creator(struct device *dev, struct device_attribute *attr, } static ISCSI_CLASS_ATTR(priv_sess, creator, S_IRUGO, show_priv_session_creator, NULL); +static ssize_t +show_priv_session_target_id(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); + return sprintf(buf, "%d\n", session->target_id); +} +static ISCSI_CLASS_ATTR(priv_sess, target_id, S_IRUGO, + show_priv_session_target_id, NULL); #define iscsi_priv_session_attr_show(field, format) \ static ssize_t \ @@ -2575,6 +2584,7 @@ static struct attribute *iscsi_session_attrs[] = { &dev_attr_priv_sess_creator.attr, &dev_attr_sess_chap_out_idx.attr, &dev_attr_sess_chap_in_idx.attr, + &dev_attr_priv_sess_target_id.attr, NULL, }; @@ -2638,6 +2648,8 @@ static umode_t iscsi_session_attr_is_visible(struct kobject *kobj, return S_IRUGO; else if (attr == &dev_attr_priv_sess_creator.attr) return S_IRUGO; + else if (attr == &dev_attr_priv_sess_target_id.attr) + return S_IRUGO; else { WARN_ONCE(1, "Invalid session attr"); return 0; diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index be2c9a6561ff..9f0c46547459 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1391,24 +1391,23 @@ static Sg_device *sg_alloc(struct gendisk *disk, struct scsi_device *scsidp) return ERR_PTR(-ENOMEM); } - if (!idr_pre_get(&sg_index_idr, GFP_KERNEL)) { - printk(KERN_WARNING "idr expansion Sg_device failure\n"); - error = -ENOMEM; - goto out; - } - + idr_preload(GFP_KERNEL); write_lock_irqsave(&sg_index_lock, iflags); - error = idr_get_new(&sg_index_idr, sdp, &k); - if (error) { - write_unlock_irqrestore(&sg_index_lock, iflags); - printk(KERN_WARNING "idr allocation Sg_device failure: %d\n", - error); - goto out; + error = idr_alloc(&sg_index_idr, sdp, 0, SG_MAX_DEVS, GFP_NOWAIT); + if (error < 0) { + if (error == -ENOSPC) { + sdev_printk(KERN_WARNING, scsidp, + "Unable to attach sg device type=%d, minor number exceeds %d\n", + scsidp->type, SG_MAX_DEVS - 1); + error = -ENODEV; + } else { + printk(KERN_WARNING + "idr allocation Sg_device failure: %d\n", error); + } + goto out_unlock; } - - if (unlikely(k >= SG_MAX_DEVS)) - goto overflow; + k = error; SCSI_LOG_TIMEOUT(3, printk("sg_alloc: dev=%d \n", k)); sprintf(disk->disk_name, "sg%d", k); @@ -1420,25 +1419,17 @@ static Sg_device *sg_alloc(struct gendisk *disk, struct scsi_device *scsidp) sdp->sg_tablesize = queue_max_segments(q); sdp->index = k; kref_init(&sdp->d_ref); + error = 0; +out_unlock: write_unlock_irqrestore(&sg_index_lock, iflags); + idr_preload_end(); - error = 0; - out: if (error) { kfree(sdp); return ERR_PTR(error); } return sdp; - - overflow: - idr_remove(&sg_index_idr, k); - write_unlock_irqrestore(&sg_index_lock, iflags); - sdev_printk(KERN_WARNING, scsidp, - "Unable to attach sg device type=%d, minor " - "number exceeds %d\n", scsidp->type, SG_MAX_DEVS - 1); - error = -ENODEV; - goto out; } static int diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 3e2b3717cb5c..86974471af68 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -4076,7 +4076,7 @@ static int st_probe(struct device *dev) struct st_modedef *STm; struct st_partstat *STps; struct st_buffer *buffer; - int i, dev_num, error; + int i, error; char *stp; if (SDp->type != TYPE_TAPE) @@ -4178,27 +4178,17 @@ static int st_probe(struct device *dev) tpnt->blksize_changed = 0; mutex_init(&tpnt->lock); - if (!idr_pre_get(&st_index_idr, GFP_KERNEL)) { - pr_warn("st: idr expansion failed\n"); - error = -ENOMEM; - goto out_put_disk; - } - + idr_preload(GFP_KERNEL); spin_lock(&st_index_lock); - error = idr_get_new(&st_index_idr, tpnt, &dev_num); + error = idr_alloc(&st_index_idr, tpnt, 0, ST_MAX_TAPES + 1, GFP_NOWAIT); spin_unlock(&st_index_lock); - if (error) { + idr_preload_end(); + if (error < 0) { pr_warn("st: idr allocation failed: %d\n", error); goto out_put_disk; } - - if (dev_num > ST_MAX_TAPES) { - pr_err("st: Too many tape devices (max. %d).\n", ST_MAX_TAPES); - goto out_put_index; - } - - tpnt->index = dev_num; - sprintf(disk->disk_name, "st%d", dev_num); + tpnt->index = error; + sprintf(disk->disk_name, "st%d", tpnt->index); dev_set_drvdata(dev, tpnt); @@ -4218,9 +4208,8 @@ static int st_probe(struct device *dev) out_remove_devs: remove_cdevs(tpnt); -out_put_index: spin_lock(&st_index_lock); - idr_remove(&st_index_idr, dev_num); + idr_remove(&st_index_idr, tpnt->index); spin_unlock(&st_index_lock); out_put_disk: put_disk(disk); diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index 538ebe213129..24456a0de6b2 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -2880,7 +2880,6 @@ static int binder_release(struct inode *nodp, struct file *filp) static void binder_deferred_release(struct binder_proc *proc) { - struct hlist_node *pos; struct binder_transaction *t; struct rb_node *n; int threads, nodes, incoming_refs, outgoing_refs, buffers, active_transactions, page_count; @@ -2924,7 +2923,7 @@ static void binder_deferred_release(struct binder_proc *proc) node->local_weak_refs = 0; hlist_add_head(&node->dead_node, &binder_dead_nodes); - hlist_for_each_entry(ref, pos, &node->refs, node_entry) { + hlist_for_each_entry(ref, &node->refs, node_entry) { incoming_refs++; if (ref->death) { death++; @@ -3156,12 +3155,11 @@ static void print_binder_thread(struct seq_file *m, static void print_binder_node(struct seq_file *m, struct binder_node *node) { struct binder_ref *ref; - struct hlist_node *pos; struct binder_work *w; int count; count = 0; - hlist_for_each_entry(ref, pos, &node->refs, node_entry) + hlist_for_each_entry(ref, &node->refs, node_entry) count++; seq_printf(m, " node %d: u%p c%p hs %d hw %d ls %d lw %d is %d iw %d", @@ -3171,7 +3169,7 @@ static void print_binder_node(struct seq_file *m, struct binder_node *node) node->internal_strong_refs, count); if (count) { seq_puts(m, " proc"); - hlist_for_each_entry(ref, pos, &node->refs, node_entry) + hlist_for_each_entry(ref, &node->refs, node_entry) seq_printf(m, " %d", ref->proc->pid); } seq_puts(m, "\n"); @@ -3369,7 +3367,6 @@ static void print_binder_proc_stats(struct seq_file *m, static int binder_state_show(struct seq_file *m, void *unused) { struct binder_proc *proc; - struct hlist_node *pos; struct binder_node *node; int do_lock = !binder_debug_no_lock; @@ -3380,10 +3377,10 @@ static int binder_state_show(struct seq_file *m, void *unused) if (!hlist_empty(&binder_dead_nodes)) seq_puts(m, "dead nodes:\n"); - hlist_for_each_entry(node, pos, &binder_dead_nodes, dead_node) + hlist_for_each_entry(node, &binder_dead_nodes, dead_node) print_binder_node(m, node); - hlist_for_each_entry(proc, pos, &binder_procs, proc_node) + hlist_for_each_entry(proc, &binder_procs, proc_node) print_binder_proc(m, proc, 1); if (do_lock) binder_unlock(__func__); @@ -3393,7 +3390,6 @@ static int binder_state_show(struct seq_file *m, void *unused) static int binder_stats_show(struct seq_file *m, void *unused) { struct binder_proc *proc; - struct hlist_node *pos; int do_lock = !binder_debug_no_lock; if (do_lock) @@ -3403,7 +3399,7 @@ static int binder_stats_show(struct seq_file *m, void *unused) print_binder_stats(m, "", &binder_stats); - hlist_for_each_entry(proc, pos, &binder_procs, proc_node) + hlist_for_each_entry(proc, &binder_procs, proc_node) print_binder_proc_stats(m, proc); if (do_lock) binder_unlock(__func__); @@ -3413,14 +3409,13 @@ static int binder_stats_show(struct seq_file *m, void *unused) static int binder_transactions_show(struct seq_file *m, void *unused) { struct binder_proc *proc; - struct hlist_node *pos; int do_lock = !binder_debug_no_lock; if (do_lock) binder_lock(__func__); seq_puts(m, "binder transactions:\n"); - hlist_for_each_entry(proc, pos, &binder_procs, proc_node) + hlist_for_each_entry(proc, &binder_procs, proc_node) print_binder_proc(m, proc, 0); if (do_lock) binder_unlock(__func__); diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 23a98e658306..9435a3d369a7 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -144,23 +144,24 @@ struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *buf) spin_lock_init(&tiqn->login_stats.lock); spin_lock_init(&tiqn->logout_stats.lock); - if (!idr_pre_get(&tiqn_idr, GFP_KERNEL)) { - pr_err("idr_pre_get() for tiqn_idr failed\n"); - kfree(tiqn); - return ERR_PTR(-ENOMEM); - } tiqn->tiqn_state = TIQN_STATE_ACTIVE; + idr_preload(GFP_KERNEL); spin_lock(&tiqn_lock); - ret = idr_get_new(&tiqn_idr, NULL, &tiqn->tiqn_index); + + ret = idr_alloc(&tiqn_idr, NULL, 0, 0, GFP_NOWAIT); if (ret < 0) { - pr_err("idr_get_new() failed for tiqn->tiqn_index\n"); + pr_err("idr_alloc() failed for tiqn->tiqn_index\n"); spin_unlock(&tiqn_lock); + idr_preload_end(); kfree(tiqn); return ERR_PTR(ret); } + tiqn->tiqn_index = ret; list_add_tail(&tiqn->tiqn_list, &g_tiqn_list); + spin_unlock(&tiqn_lock); + idr_preload_end(); pr_debug("CORE[0] - Added iSCSI Target IQN: %s\n", tiqn->tiqn); diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index fdb632f0ab85..2535d4d46c0e 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -247,19 +247,16 @@ static int iscsi_login_zero_tsih_s1( spin_lock_init(&sess->session_usage_lock); spin_lock_init(&sess->ttt_lock); - if (!idr_pre_get(&sess_idr, GFP_KERNEL)) { - pr_err("idr_pre_get() for sess_idr failed\n"); - iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, - ISCSI_LOGIN_STATUS_NO_RESOURCES); - kfree(sess); - return -ENOMEM; - } + idr_preload(GFP_KERNEL); spin_lock_bh(&sess_idr_lock); - ret = idr_get_new(&sess_idr, NULL, &sess->session_index); + ret = idr_alloc(&sess_idr, NULL, 0, 0, GFP_NOWAIT); + if (ret >= 0) + sess->session_index = ret; spin_unlock_bh(&sess_idr_lock); + idr_preload_end(); if (ret < 0) { - pr_err("idr_get_new() for sess_idr failed\n"); + pr_err("idr_alloc() for sess_idr failed\n"); iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_NO_RESOURCES); kfree(sess); diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c index 6659dd36e806..113f33598b9f 100644 --- a/drivers/target/tcm_fc/tfc_sess.c +++ b/drivers/target/tcm_fc/tfc_sess.c @@ -169,7 +169,6 @@ static struct ft_sess *ft_sess_get(struct fc_lport *lport, u32 port_id) { struct ft_tport *tport; struct hlist_head *head; - struct hlist_node *pos; struct ft_sess *sess; rcu_read_lock(); @@ -178,7 +177,7 @@ static struct ft_sess *ft_sess_get(struct fc_lport *lport, u32 port_id) goto out; head = &tport->hash[ft_sess_hash(port_id)]; - hlist_for_each_entry_rcu(sess, pos, head, hash) { + hlist_for_each_entry_rcu(sess, head, hash) { if (sess->port_id == port_id) { kref_get(&sess->kref); rcu_read_unlock(); @@ -201,10 +200,9 @@ static struct ft_sess *ft_sess_create(struct ft_tport *tport, u32 port_id, { struct ft_sess *sess; struct hlist_head *head; - struct hlist_node *pos; head = &tport->hash[ft_sess_hash(port_id)]; - hlist_for_each_entry_rcu(sess, pos, head, hash) + hlist_for_each_entry_rcu(sess, head, hash) if (sess->port_id == port_id) return sess; @@ -253,11 +251,10 @@ static void ft_sess_unhash(struct ft_sess *sess) static struct ft_sess *ft_sess_delete(struct ft_tport *tport, u32 port_id) { struct hlist_head *head; - struct hlist_node *pos; struct ft_sess *sess; head = &tport->hash[ft_sess_hash(port_id)]; - hlist_for_each_entry_rcu(sess, pos, head, hash) { + hlist_for_each_entry_rcu(sess, head, hash) { if (sess->port_id == port_id) { ft_sess_unhash(sess); return sess; @@ -273,12 +270,11 @@ static struct ft_sess *ft_sess_delete(struct ft_tport *tport, u32 port_id) static void ft_sess_delete_all(struct ft_tport *tport) { struct hlist_head *head; - struct hlist_node *pos; struct ft_sess *sess; for (head = tport->hash; head < &tport->hash[FT_SESS_HASH_SIZE]; head++) { - hlist_for_each_entry_rcu(sess, pos, head, hash) { + hlist_for_each_entry_rcu(sess, head, hash) { ft_sess_unhash(sess); transport_deregister_session_configfs(sess->se_sess); ft_sess_put(sess); /* release from table */ diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 836828e29a87..c33fa5315d6b 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -73,21 +73,14 @@ static struct cpufreq_cooling_device *notify_device; */ static int get_idr(struct idr *idr, int *id) { - int err; -again: - if (unlikely(idr_pre_get(idr, GFP_KERNEL) == 0)) - return -ENOMEM; + int ret; mutex_lock(&cooling_cpufreq_lock); - err = idr_get_new(idr, NULL, id); + ret = idr_alloc(idr, NULL, 0, 0, GFP_KERNEL); mutex_unlock(&cooling_cpufreq_lock); - - if (unlikely(err == -EAGAIN)) - goto again; - else if (unlikely(err)) - return err; - - *id = *id & MAX_IDR_MASK; + if (unlikely(ret < 0)) + return ret; + *id = ret; return 0; } diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index 8c8ce806180f..84e95f32cdb6 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -132,23 +132,16 @@ EXPORT_SYMBOL_GPL(thermal_unregister_governor); static int get_idr(struct idr *idr, struct mutex *lock, int *id) { - int err; - -again: - if (unlikely(idr_pre_get(idr, GFP_KERNEL) == 0)) - return -ENOMEM; + int ret; if (lock) mutex_lock(lock); - err = idr_get_new(idr, NULL, id); + ret = idr_alloc(idr, NULL, 0, 0, GFP_KERNEL); if (lock) mutex_unlock(lock); - if (unlikely(err == -EAGAIN)) - goto again; - else if (unlikely(err)) - return err; - - *id = *id & MAX_IDR_MASK; + if (unlikely(ret < 0)) + return ret; + *id = ret; return 0; } diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 814655ee2d61..3687f0cad642 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -870,21 +870,20 @@ static struct input_handler sysrq_handler = { static bool sysrq_handler_registered; +unsigned short platform_sysrq_reset_seq[] __weak = { KEY_RESERVED }; + static inline void sysrq_register_handler(void) { - extern unsigned short platform_sysrq_reset_seq[] __weak; unsigned short key; int error; int i; - if (platform_sysrq_reset_seq) { - for (i = 0; i < ARRAY_SIZE(sysrq_reset_seq); i++) { - key = platform_sysrq_reset_seq[i]; - if (key == KEY_RESERVED || key > KEY_MAX) - break; + for (i = 0; i < ARRAY_SIZE(sysrq_reset_seq); i++) { + key = platform_sysrq_reset_seq[i]; + if (key == KEY_RESERVED || key > KEY_MAX) + break; - sysrq_reset_seq[sysrq_reset_seq_len++] = key; - } + sysrq_reset_seq[sysrq_reset_seq_len++] = key; } error = input_register_handler(&sysrq_handler); diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 5110f367f1f1..c8b926291e28 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -369,26 +369,15 @@ static void uio_dev_del_attributes(struct uio_device *idev) static int uio_get_minor(struct uio_device *idev) { int retval = -ENOMEM; - int id; mutex_lock(&minor_lock); - if (idr_pre_get(&uio_idr, GFP_KERNEL) == 0) - goto exit; - - retval = idr_get_new(&uio_idr, idev, &id); - if (retval < 0) { - if (retval == -EAGAIN) - retval = -ENOMEM; - goto exit; - } - if (id < UIO_MAX_DEVICES) { - idev->minor = id; - } else { + retval = idr_alloc(&uio_idr, idev, 0, UIO_MAX_DEVICES, GFP_KERNEL); + if (retval >= 0) { + idev->minor = retval; + } else if (retval == -ENOSPC) { dev_err(idev->dev, "too many uio devices\n"); retval = -EINVAL; - idr_remove(&uio_idr, id); } -exit: mutex_unlock(&minor_lock); return retval; } diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 28e2d5b2c0c7..fcc12f3e60a3 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -139,23 +139,8 @@ EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); */ static int vfio_alloc_group_minor(struct vfio_group *group) { - int ret, minor; - -again: - if (unlikely(idr_pre_get(&vfio.group_idr, GFP_KERNEL) == 0)) - return -ENOMEM; - /* index 0 is used by /dev/vfio/vfio */ - ret = idr_get_new_above(&vfio.group_idr, group, 1, &minor); - if (ret == -EAGAIN) - goto again; - if (ret || minor > MINORMASK) { - if (minor > MINORMASK) - idr_remove(&vfio.group_idr, minor); - return -ENOSPC; - } - - return minor; + return idr_alloc(&vfio.group_idr, group, 1, MINORMASK + 1, GFP_KERNEL); } static void vfio_free_group_minor(int minor) diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig index be27b551473f..db10d0120d2b 100644 --- a/drivers/video/backlight/Kconfig +++ b/drivers/video/backlight/Kconfig @@ -384,6 +384,12 @@ config BACKLIGHT_LP855X This supports TI LP8550, LP8551, LP8552, LP8553, LP8556 and LP8557 backlight driver. +config BACKLIGHT_LP8788 + tristate "Backlight driver for TI LP8788 MFD" + depends on BACKLIGHT_CLASS_DEVICE && MFD_LP8788 + help + This supports TI LP8788 backlight driver. + config BACKLIGHT_OT200 tristate "Backlight driver for ot200 visualisation device" depends on BACKLIGHT_CLASS_DEVICE && CS5535_MFGPT && GPIO_CS5535 diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile index 4606c218e8e4..96c4d620c5ce 100644 --- a/drivers/video/backlight/Makefile +++ b/drivers/video/backlight/Makefile @@ -38,6 +38,7 @@ obj-$(CONFIG_BACKLIGHT_LM3630) += lm3630_bl.o obj-$(CONFIG_BACKLIGHT_LM3639) += lm3639_bl.o obj-$(CONFIG_BACKLIGHT_LOCOMO) += locomolcd.o obj-$(CONFIG_BACKLIGHT_LP855X) += lp855x_bl.o +obj-$(CONFIG_BACKLIGHT_LP8788) += lp8788_bl.o obj-$(CONFIG_BACKLIGHT_MAX8925) += max8925_bl.o obj-$(CONFIG_BACKLIGHT_OMAP1) += omap1_bl.o obj-$(CONFIG_BACKLIGHT_OT200) += ot200_bl.o diff --git a/drivers/video/backlight/ams369fg06.c b/drivers/video/backlight/ams369fg06.c index d29e49443f29..c02aa2c2575a 100644 --- a/drivers/video/backlight/ams369fg06.c +++ b/drivers/video/backlight/ams369fg06.c @@ -317,10 +317,7 @@ static int ams369fg06_power_on(struct ams369fg06 *lcd) pd = lcd->lcd_pd; bd = lcd->bd; - if (!pd->power_on) { - dev_err(lcd->dev, "power_on is NULL.\n"); - return -EINVAL; - } else { + if (pd->power_on) { pd->power_on(lcd->ld, 1); msleep(pd->power_on_delay); } @@ -370,7 +367,8 @@ static int ams369fg06_power_off(struct ams369fg06 *lcd) msleep(pd->power_off_delay); - pd->power_on(lcd->ld, 0); + if (pd->power_on) + pd->power_on(lcd->ld, 0); return 0; } diff --git a/drivers/video/backlight/lp8788_bl.c b/drivers/video/backlight/lp8788_bl.c new file mode 100644 index 000000000000..4bb8b4f140cf --- /dev/null +++ b/drivers/video/backlight/lp8788_bl.c @@ -0,0 +1,333 @@ +/* + * TI LP8788 MFD - backlight driver + * + * Copyright 2012 Texas Instruments + * + * Author: Milo(Woogyom) Kim <milo.kim@ti.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/backlight.h> +#include <linux/err.h> +#include <linux/mfd/lp8788.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/pwm.h> +#include <linux/slab.h> + +/* Register address */ +#define LP8788_BL_CONFIG 0x96 +#define LP8788_BL_EN BIT(0) +#define LP8788_BL_PWM_INPUT_EN BIT(5) +#define LP8788_BL_FULLSCALE_SHIFT 2 +#define LP8788_BL_DIM_MODE_SHIFT 1 +#define LP8788_BL_PWM_POLARITY_SHIFT 6 + +#define LP8788_BL_BRIGHTNESS 0x97 + +#define LP8788_BL_RAMP 0x98 +#define LP8788_BL_RAMP_RISE_SHIFT 4 + +#define MAX_BRIGHTNESS 127 +#define DEFAULT_BL_NAME "lcd-backlight" + +struct lp8788_bl_config { + enum lp8788_bl_ctrl_mode bl_mode; + enum lp8788_bl_dim_mode dim_mode; + enum lp8788_bl_full_scale_current full_scale; + enum lp8788_bl_ramp_step rise_time; + enum lp8788_bl_ramp_step fall_time; + enum pwm_polarity pwm_pol; +}; + +struct lp8788_bl { + struct lp8788 *lp; + struct backlight_device *bl_dev; + struct lp8788_backlight_platform_data *pdata; + enum lp8788_bl_ctrl_mode mode; + struct pwm_device *pwm; +}; + +struct lp8788_bl_config default_bl_config = { + .bl_mode = LP8788_BL_REGISTER_ONLY, + .dim_mode = LP8788_DIM_EXPONENTIAL, + .full_scale = LP8788_FULLSCALE_1900uA, + .rise_time = LP8788_RAMP_8192us, + .fall_time = LP8788_RAMP_8192us, + .pwm_pol = PWM_POLARITY_NORMAL, +}; + +static inline bool is_brightness_ctrl_by_pwm(enum lp8788_bl_ctrl_mode mode) +{ + return (mode == LP8788_BL_COMB_PWM_BASED); +} + +static inline bool is_brightness_ctrl_by_register(enum lp8788_bl_ctrl_mode mode) +{ + return (mode == LP8788_BL_REGISTER_ONLY || + mode == LP8788_BL_COMB_REGISTER_BASED); +} + +static int lp8788_backlight_configure(struct lp8788_bl *bl) +{ + struct lp8788_backlight_platform_data *pdata = bl->pdata; + struct lp8788_bl_config *cfg = &default_bl_config; + int ret; + u8 val; + + /* + * Update chip configuration if platform data exists, + * otherwise use the default settings. + */ + if (pdata) { + cfg->bl_mode = pdata->bl_mode; + cfg->dim_mode = pdata->dim_mode; + cfg->full_scale = pdata->full_scale; + cfg->rise_time = pdata->rise_time; + cfg->fall_time = pdata->fall_time; + cfg->pwm_pol = pdata->pwm_pol; + } + + /* Brightness ramp up/down */ + val = (cfg->rise_time << LP8788_BL_RAMP_RISE_SHIFT) | cfg->fall_time; + ret = lp8788_write_byte(bl->lp, LP8788_BL_RAMP, val); + if (ret) + return ret; + + /* Fullscale current setting */ + val = (cfg->full_scale << LP8788_BL_FULLSCALE_SHIFT) | + (cfg->dim_mode << LP8788_BL_DIM_MODE_SHIFT); + + /* Brightness control mode */ + switch (cfg->bl_mode) { + case LP8788_BL_REGISTER_ONLY: + val |= LP8788_BL_EN; + break; + case LP8788_BL_COMB_PWM_BASED: + case LP8788_BL_COMB_REGISTER_BASED: + val |= LP8788_BL_EN | LP8788_BL_PWM_INPUT_EN | + (cfg->pwm_pol << LP8788_BL_PWM_POLARITY_SHIFT); + break; + default: + dev_err(bl->lp->dev, "invalid mode: %d\n", cfg->bl_mode); + return -EINVAL; + } + + bl->mode = cfg->bl_mode; + + return lp8788_write_byte(bl->lp, LP8788_BL_CONFIG, val); +} + +static void lp8788_pwm_ctrl(struct lp8788_bl *bl, int br, int max_br) +{ + unsigned int period; + unsigned int duty; + struct device *dev; + struct pwm_device *pwm; + + if (!bl->pdata) + return; + + period = bl->pdata->period_ns; + duty = br * period / max_br; + dev = bl->lp->dev; + + /* request PWM device with the consumer name */ + if (!bl->pwm) { + pwm = devm_pwm_get(dev, LP8788_DEV_BACKLIGHT); + if (IS_ERR(pwm)) { + dev_err(dev, "can not get PWM device\n"); + return; + } + + bl->pwm = pwm; + } + + pwm_config(bl->pwm, duty, period); + if (duty) + pwm_enable(bl->pwm); + else + pwm_disable(bl->pwm); +} + +static int lp8788_bl_update_status(struct backlight_device *bl_dev) +{ + struct lp8788_bl *bl = bl_get_data(bl_dev); + enum lp8788_bl_ctrl_mode mode = bl->mode; + + if (bl_dev->props.state & BL_CORE_SUSPENDED) + bl_dev->props.brightness = 0; + + if (is_brightness_ctrl_by_pwm(mode)) { + int brt = bl_dev->props.brightness; + int max = bl_dev->props.max_brightness; + + lp8788_pwm_ctrl(bl, brt, max); + } else if (is_brightness_ctrl_by_register(mode)) { + u8 brt = bl_dev->props.brightness; + + lp8788_write_byte(bl->lp, LP8788_BL_BRIGHTNESS, brt); + } + + return 0; +} + +static int lp8788_bl_get_brightness(struct backlight_device *bl_dev) +{ + return bl_dev->props.brightness; +} + +static const struct backlight_ops lp8788_bl_ops = { + .options = BL_CORE_SUSPENDRESUME, + .update_status = lp8788_bl_update_status, + .get_brightness = lp8788_bl_get_brightness, +}; + +static int lp8788_backlight_register(struct lp8788_bl *bl) +{ + struct backlight_device *bl_dev; + struct backlight_properties props; + struct lp8788_backlight_platform_data *pdata = bl->pdata; + int init_brt; + char *name; + + props.type = BACKLIGHT_PLATFORM; + props.max_brightness = MAX_BRIGHTNESS; + + /* Initial brightness */ + if (pdata) + init_brt = min_t(int, pdata->initial_brightness, + props.max_brightness); + else + init_brt = 0; + + props.brightness = init_brt; + + /* Backlight device name */ + if (!pdata || !pdata->name) + name = DEFAULT_BL_NAME; + else + name = pdata->name; + + bl_dev = backlight_device_register(name, bl->lp->dev, bl, + &lp8788_bl_ops, &props); + if (IS_ERR(bl_dev)) + return PTR_ERR(bl_dev); + + bl->bl_dev = bl_dev; + + return 0; +} + +static void lp8788_backlight_unregister(struct lp8788_bl *bl) +{ + struct backlight_device *bl_dev = bl->bl_dev; + + if (bl_dev) + backlight_device_unregister(bl_dev); +} + +static ssize_t lp8788_get_bl_ctl_mode(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct lp8788_bl *bl = dev_get_drvdata(dev); + enum lp8788_bl_ctrl_mode mode = bl->mode; + char *strmode; + + if (is_brightness_ctrl_by_pwm(mode)) + strmode = "PWM based"; + else if (is_brightness_ctrl_by_register(mode)) + strmode = "Register based"; + else + strmode = "Invalid mode"; + + return scnprintf(buf, PAGE_SIZE, "%s\n", strmode); +} + +static DEVICE_ATTR(bl_ctl_mode, S_IRUGO, lp8788_get_bl_ctl_mode, NULL); + +static struct attribute *lp8788_attributes[] = { + &dev_attr_bl_ctl_mode.attr, + NULL, +}; + +static const struct attribute_group lp8788_attr_group = { + .attrs = lp8788_attributes, +}; + +static int lp8788_backlight_probe(struct platform_device *pdev) +{ + struct lp8788 *lp = dev_get_drvdata(pdev->dev.parent); + struct lp8788_bl *bl; + int ret; + + bl = devm_kzalloc(lp->dev, sizeof(struct lp8788_bl), GFP_KERNEL); + if (!bl) + return -ENOMEM; + + bl->lp = lp; + if (lp->pdata) + bl->pdata = lp->pdata->bl_pdata; + + platform_set_drvdata(pdev, bl); + + ret = lp8788_backlight_configure(bl); + if (ret) { + dev_err(lp->dev, "backlight config err: %d\n", ret); + goto err_dev; + } + + ret = lp8788_backlight_register(bl); + if (ret) { + dev_err(lp->dev, "register backlight err: %d\n", ret); + goto err_dev; + } + + ret = sysfs_create_group(&pdev->dev.kobj, &lp8788_attr_group); + if (ret) { + dev_err(lp->dev, "register sysfs err: %d\n", ret); + goto err_sysfs; + } + + backlight_update_status(bl->bl_dev); + + return 0; + +err_sysfs: + lp8788_backlight_unregister(bl); +err_dev: + return ret; +} + +static int lp8788_backlight_remove(struct platform_device *pdev) +{ + struct lp8788_bl *bl = platform_get_drvdata(pdev); + struct backlight_device *bl_dev = bl->bl_dev; + + bl_dev->props.brightness = 0; + backlight_update_status(bl_dev); + sysfs_remove_group(&pdev->dev.kobj, &lp8788_attr_group); + lp8788_backlight_unregister(bl); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +static struct platform_driver lp8788_bl_driver = { + .probe = lp8788_backlight_probe, + .remove = lp8788_backlight_remove, + .driver = { + .name = LP8788_DEV_BACKLIGHT, + .owner = THIS_MODULE, + }, +}; +module_platform_driver(lp8788_bl_driver); + +MODULE_DESCRIPTION("Texas Instruments LP8788 Backlight Driver"); +MODULE_AUTHOR("Milo Kim"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:lp8788-backlight"); diff --git a/drivers/w1/slaves/Kconfig b/drivers/w1/slaves/Kconfig index 67526690acbc..762561fbabbf 100644 --- a/drivers/w1/slaves/Kconfig +++ b/drivers/w1/slaves/Kconfig @@ -17,11 +17,16 @@ config W1_SLAVE_SMEM simple 64bit memory rom(ds2401/ds2411/ds1990*) to your wire. config W1_SLAVE_DS2408 - tristate "8-Channel Addressable Switch (IO Expander) 0x29 family support (DS2408)" - help - Say Y here if you want to use a 1-wire + tristate "8-Channel Addressable Switch (IO Expander) 0x29 family support (DS2408)" + help + Say Y here if you want to use a 1-wire + DS2408 8-Channel Addressable Switch device support - DS2408 8-Channel Addressable Switch device support +config W1_SLAVE_DS2413 + tristate "Dual Channel Addressable Switch 0x3a family support (DS2413)" + help + Say Y here if you want to use a 1-wire + DS2413 Dual Channel Addressable Switch device support config W1_SLAVE_DS2423 tristate "Counter 1-wire device (DS2423)" diff --git a/drivers/w1/slaves/Makefile b/drivers/w1/slaves/Makefile index 05188f6aab5a..06529f3157ab 100644 --- a/drivers/w1/slaves/Makefile +++ b/drivers/w1/slaves/Makefile @@ -4,7 +4,8 @@ obj-$(CONFIG_W1_SLAVE_THERM) += w1_therm.o obj-$(CONFIG_W1_SLAVE_SMEM) += w1_smem.o -obj-$(CONFIG_W1_SLAVE_DS2408) += w1_ds2408.o +obj-$(CONFIG_W1_SLAVE_DS2408) += w1_ds2408.o +obj-$(CONFIG_W1_SLAVE_DS2413) += w1_ds2413.o obj-$(CONFIG_W1_SLAVE_DS2423) += w1_ds2423.o obj-$(CONFIG_W1_SLAVE_DS2431) += w1_ds2431.o obj-$(CONFIG_W1_SLAVE_DS2433) += w1_ds2433.o diff --git a/drivers/w1/slaves/w1_ds2413.c b/drivers/w1/slaves/w1_ds2413.c new file mode 100644 index 000000000000..829786252c6b --- /dev/null +++ b/drivers/w1/slaves/w1_ds2413.c @@ -0,0 +1,177 @@ +/* + * w1_ds2413.c - w1 family 3a (DS2413) driver + * based on w1_ds2408.c by Jean-Francois Dagenais <dagenaisj@sonatest.com> + * + * Copyright (c) 2013 Mariusz Bialonczyk <manio@skyboo.net> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/device.h> +#include <linux/types.h> +#include <linux/delay.h> +#include <linux/slab.h> + +#include "../w1.h" +#include "../w1_int.h" +#include "../w1_family.h" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Mariusz Bialonczyk <manio@skyboo.net>"); +MODULE_DESCRIPTION("w1 family 3a driver for DS2413 2 Pin IO"); + +#define W1_F3A_RETRIES 3 +#define W1_F3A_FUNC_PIO_ACCESS_READ 0xF5 +#define W1_F3A_FUNC_PIO_ACCESS_WRITE 0x5A +#define W1_F3A_SUCCESS_CONFIRM_BYTE 0xAA + +static ssize_t w1_f3a_read_state( + struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + struct w1_slave *sl = kobj_to_w1_slave(kobj); + dev_dbg(&sl->dev, + "Reading %s kobj: %p, off: %0#10x, count: %zu, buff addr: %p", + bin_attr->attr.name, kobj, (unsigned int)off, count, buf); + + if (off != 0) + return 0; + if (!buf) + return -EINVAL; + + mutex_lock(&sl->master->bus_mutex); + dev_dbg(&sl->dev, "mutex locked"); + + if (w1_reset_select_slave(sl)) { + mutex_unlock(&sl->master->bus_mutex); + return -EIO; + } + + w1_write_8(sl->master, W1_F3A_FUNC_PIO_ACCESS_READ); + *buf = w1_read_8(sl->master); + + mutex_unlock(&sl->master->bus_mutex); + dev_dbg(&sl->dev, "mutex unlocked"); + + /* check for correct complement */ + if ((*buf & 0x0F) != ((~*buf >> 4) & 0x0F)) + return -EIO; + else + return 1; +} + +static ssize_t w1_f3a_write_output( + struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + struct w1_slave *sl = kobj_to_w1_slave(kobj); + u8 w1_buf[3]; + unsigned int retries = W1_F3A_RETRIES; + + if (count != 1 || off != 0) + return -EFAULT; + + dev_dbg(&sl->dev, "locking mutex for write_output"); + mutex_lock(&sl->master->bus_mutex); + dev_dbg(&sl->dev, "mutex locked"); + + if (w1_reset_select_slave(sl)) + goto error; + + /* according to the DS2413 datasheet the most significant 6 bits + should be set to "1"s, so do it now */ + *buf = *buf | 0xFC; + + while (retries--) { + w1_buf[0] = W1_F3A_FUNC_PIO_ACCESS_WRITE; + w1_buf[1] = *buf; + w1_buf[2] = ~(*buf); + w1_write_block(sl->master, w1_buf, 3); + + if (w1_read_8(sl->master) == W1_F3A_SUCCESS_CONFIRM_BYTE) { + mutex_unlock(&sl->master->bus_mutex); + dev_dbg(&sl->dev, "mutex unlocked, retries:%d", retries); + return 1; + } + if (w1_reset_resume_command(sl->master)) + goto error; + } + +error: + mutex_unlock(&sl->master->bus_mutex); + dev_dbg(&sl->dev, "mutex unlocked in error, retries:%d", retries); + return -EIO; +} + +#define NB_SYSFS_BIN_FILES 2 +static struct bin_attribute w1_f3a_sysfs_bin_files[NB_SYSFS_BIN_FILES] = { + { + .attr = { + .name = "state", + .mode = S_IRUGO, + }, + .size = 1, + .read = w1_f3a_read_state, + }, + { + .attr = { + .name = "output", + .mode = S_IRUGO | S_IWUSR | S_IWGRP, + }, + .size = 1, + .write = w1_f3a_write_output, + } +}; + +static int w1_f3a_add_slave(struct w1_slave *sl) +{ + int err = 0; + int i; + + for (i = 0; i < NB_SYSFS_BIN_FILES && !err; ++i) + err = sysfs_create_bin_file( + &sl->dev.kobj, + &(w1_f3a_sysfs_bin_files[i])); + if (err) + while (--i >= 0) + sysfs_remove_bin_file(&sl->dev.kobj, + &(w1_f3a_sysfs_bin_files[i])); + return err; +} + +static void w1_f3a_remove_slave(struct w1_slave *sl) +{ + int i; + for (i = NB_SYSFS_BIN_FILES - 1; i >= 0; --i) + sysfs_remove_bin_file(&sl->dev.kobj, + &(w1_f3a_sysfs_bin_files[i])); +} + +static struct w1_family_ops w1_f3a_fops = { + .add_slave = w1_f3a_add_slave, + .remove_slave = w1_f3a_remove_slave, +}; + +static struct w1_family w1_family_3a = { + .fid = W1_FAMILY_DS2413, + .fops = &w1_f3a_fops, +}; + +static int __init w1_f3a_init(void) +{ + return w1_register_family(&w1_family_3a); +} + +static void __exit w1_f3a_exit(void) +{ + w1_unregister_family(&w1_family_3a); +} + +module_init(w1_f3a_init); +module_exit(w1_f3a_exit); diff --git a/drivers/w1/w1_family.h b/drivers/w1/w1_family.h index a1f0ce151d53..625dd08f775f 100644 --- a/drivers/w1/w1_family.h +++ b/drivers/w1/w1_family.h @@ -39,6 +39,7 @@ #define W1_EEPROM_DS2431 0x2D #define W1_FAMILY_DS2760 0x30 #define W1_FAMILY_DS2780 0x32 +#define W1_FAMILY_DS2413 0x3A #define W1_THERM_DS1825 0x3B #define W1_FAMILY_DS2781 0x3D #define W1_THERM_DS28EA00 0x42 diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index eb82ee53ee0b..d9a43674cb94 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -125,9 +125,8 @@ static void affs_fix_dcache(struct inode *inode, u32 entry_ino) { struct dentry *dentry; - struct hlist_node *p; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { if (entry_ino == (u32)(long)dentry->d_fsdata) { dentry->d_fsdata = (void *)inode->i_ino; break; @@ -591,11 +591,10 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) { struct mm_struct *mm = current->mm; struct kioctx *ctx, *ret = NULL; - struct hlist_node *n; rcu_read_lock(); - hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) { + hlist_for_each_entry_rcu(ctx, &mm->ioctx_list, list) { /* * RCU protects us against accessing freed memory but * we have to be careful not to get a reference when the @@ -1428,6 +1428,8 @@ void bio_endio(struct bio *bio, int error) else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) error = -EIO; + trace_block_bio_complete(bio, error); + if (bio->bi_end_io) bio->bi_end_io(bio, error); } diff --git a/fs/block_dev.c b/fs/block_dev.c index 53f5fae5cfbe..aea605c98ba6 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1033,7 +1033,9 @@ void bd_set_size(struct block_device *bdev, loff_t size) { unsigned bsize = bdev_logical_block_size(bdev); - bdev->bd_inode->i_size = size; + mutex_lock(&bdev->bd_inode->i_mutex); + i_size_write(bdev->bd_inode, size); + mutex_unlock(&bdev->bd_inode->i_mutex); while (bsize < PAGE_CACHE_SIZE) { if (size & bsize) break; @@ -1118,7 +1120,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) } } - if (!ret && !bdev->bd_openers) { + if (!ret) { bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); bdi = blk_get_backing_dev_info(bdev); if (bdi == NULL) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1e59ed575cc9..cf54bdfee334 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3689,20 +3689,6 @@ static int can_overcommit(struct btrfs_root *root, return 0; } -static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb, - unsigned long nr_pages, - enum wb_reason reason) -{ - if (!writeback_in_progress(sb->s_bdi) && - down_read_trylock(&sb->s_umount)) { - writeback_inodes_sb_nr(sb, nr_pages, reason); - up_read(&sb->s_umount); - return 1; - } - - return 0; -} - /* * shrink metadata reservation for delalloc */ @@ -3735,9 +3721,9 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, while (delalloc_bytes && loops < 3) { max_reclaim = min(delalloc_bytes, to_reclaim); nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; - writeback_inodes_sb_nr_if_idle_safe(root->fs_info->sb, - nr_pages, - WB_REASON_FS_FREE_SPACE); + try_to_writeback_inodes_sb_nr(root->fs_info->sb, + nr_pages, + WB_REASON_FS_FREE_SPACE); /* * We need to wait for the async pages to actually start before diff --git a/fs/buffer.c b/fs/buffer.c index 8e18281b4077..b4dcb34c9635 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -41,6 +41,7 @@ #include <linux/bitops.h> #include <linux/mpage.h> #include <linux/bit_spinlock.h> +#include <trace/events/block.h> static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); @@ -53,6 +54,13 @@ void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private) } EXPORT_SYMBOL(init_buffer); +inline void touch_buffer(struct buffer_head *bh) +{ + trace_block_touch_buffer(bh); + mark_page_accessed(bh->b_page); +} +EXPORT_SYMBOL(touch_buffer); + static int sleep_on_buffer(void *word) { io_schedule(); @@ -1113,6 +1121,8 @@ void mark_buffer_dirty(struct buffer_head *bh) { WARN_ON_ONCE(!buffer_uptodate(bh)); + trace_block_dirty_buffer(bh); + /* * Very *carefully* optimize the it-is-already-dirty case. * diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index d4f81edd9a5d..a60ea977af6f 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -236,16 +236,10 @@ static int ceph_readpage(struct file *filp, struct page *page) static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg) { struct inode *inode = req->r_inode; - struct ceph_osd_reply_head *replyhead; - int rc, bytes; + int rc = req->r_result; + int bytes = le32_to_cpu(msg->hdr.data_len); int i; - /* parse reply */ - replyhead = msg->front.iov_base; - WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); - rc = le32_to_cpu(replyhead->result); - bytes = le32_to_cpu(msg->hdr.data_len); - dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes); /* unlock all pages, zeroing any data we didn't read */ @@ -315,7 +309,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, 0, ci->i_truncate_seq, ci->i_truncate_size, - NULL, false, 1, 0); + NULL, false, 0); if (IS_ERR(req)) return PTR_ERR(req); @@ -492,8 +486,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) &ci->i_layout, snapc, page_off, len, ci->i_truncate_seq, ci->i_truncate_size, - &inode->i_mtime, - &page, 1, 0, 0, true); + &inode->i_mtime, &page, 1); if (err < 0) { dout("writepage setting page/mapping error %d %p\n", err, page); SetPageError(page); @@ -554,27 +547,18 @@ static void writepages_finish(struct ceph_osd_request *req, struct ceph_msg *msg) { struct inode *inode = req->r_inode; - struct ceph_osd_reply_head *replyhead; - struct ceph_osd_op *op; struct ceph_inode_info *ci = ceph_inode(inode); unsigned wrote; struct page *page; int i; struct ceph_snap_context *snapc = req->r_snapc; struct address_space *mapping = inode->i_mapping; - __s32 rc = -EIO; - u64 bytes = 0; + int rc = req->r_result; + u64 bytes = le64_to_cpu(req->r_request_ops[0].extent.length); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); long writeback_stat; unsigned issued = ceph_caps_issued(ci); - /* parse reply */ - replyhead = msg->front.iov_base; - WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); - op = (void *)(replyhead + 1); - rc = le32_to_cpu(replyhead->result); - bytes = le64_to_cpu(op->extent.length); - if (rc >= 0) { /* * Assume we wrote the pages we originally sent. The @@ -741,8 +725,6 @@ retry: struct page *page; int want; u64 offset, len; - struct ceph_osd_request_head *reqhead; - struct ceph_osd_op *op; long writeback_stat; next = 0; @@ -838,7 +820,7 @@ get_more_pages: snapc, do_sync, ci->i_truncate_seq, ci->i_truncate_size, - &inode->i_mtime, true, 1, 0); + &inode->i_mtime, true, 0); if (IS_ERR(req)) { rc = PTR_ERR(req); @@ -906,10 +888,8 @@ get_more_pages: /* revise final length, page count */ req->r_num_pages = locked_pages; - reqhead = req->r_request->front.iov_base; - op = (void *)(reqhead + 1); - op->extent.length = cpu_to_le64(len); - op->payload_len = cpu_to_le32(len); + req->r_request_ops[0].extent.length = cpu_to_le64(len); + req->r_request_ops[0].payload_len = cpu_to_le32(len); req->r_request->hdr.data_len = cpu_to_le32(len); rc = ceph_osdc_start_request(&fsc->client->osdc, req, true); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index ae2be696eb5b..78e2f575247d 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -611,8 +611,16 @@ retry: if (flags & CEPH_CAP_FLAG_AUTH) ci->i_auth_cap = cap; - else if (ci->i_auth_cap == cap) + else if (ci->i_auth_cap == cap) { ci->i_auth_cap = NULL; + spin_lock(&mdsc->cap_dirty_lock); + if (!list_empty(&ci->i_dirty_item)) { + dout(" moving %p to cap_dirty_migrating\n", inode); + list_move(&ci->i_dirty_item, + &mdsc->cap_dirty_migrating); + } + spin_unlock(&mdsc->cap_dirty_lock); + } dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n", inode, ceph_vinop(inode), cap, ceph_cap_string(issued), @@ -1460,7 +1468,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_client *mdsc = fsc->mdsc; struct inode *inode = &ci->vfs_inode; struct ceph_cap *cap; - int file_wanted, used; + int file_wanted, used, cap_used; int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ int issued, implemented, want, retain, revoking, flushing = 0; int mds = -1; /* keep track of how far we've gone through i_caps list @@ -1563,9 +1571,14 @@ retry_locked: /* NOTE: no side-effects allowed, until we take s_mutex */ + cap_used = used; + if (ci->i_auth_cap && cap != ci->i_auth_cap) + cap_used &= ~ci->i_auth_cap->issued; + revoking = cap->implemented & ~cap->issued; - dout(" mds%d cap %p issued %s implemented %s revoking %s\n", + dout(" mds%d cap %p used %s issued %s implemented %s revoking %s\n", cap->mds, cap, ceph_cap_string(cap->issued), + ceph_cap_string(cap_used), ceph_cap_string(cap->implemented), ceph_cap_string(revoking)); @@ -1593,7 +1606,7 @@ retry_locked: } /* completed revocation? going down and there are no caps? */ - if (revoking && (revoking & used) == 0) { + if (revoking && (revoking & cap_used) == 0) { dout("completed revocation of %s\n", ceph_cap_string(cap->implemented & ~cap->issued)); goto ack; @@ -1670,8 +1683,8 @@ ack: sent++; /* __send_cap drops i_ceph_lock */ - delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want, - retain, flushing, NULL); + delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, cap_used, + want, retain, flushing, NULL); goto retry; /* retake i_ceph_lock and restart our cap scan. */ } @@ -2417,7 +2430,9 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, dout("mds wanted %s -> %s\n", ceph_cap_string(le32_to_cpu(grant->wanted)), ceph_cap_string(wanted)); - grant->wanted = cpu_to_le32(wanted); + /* imported cap may not have correct mds_wanted */ + if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) + check_caps = 1; } cap->seq = seq; @@ -2821,6 +2836,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, (unsigned)seq); + if (op == CEPH_CAP_OP_IMPORT) + ceph_add_cap_releases(mdsc, session); + /* lookup ino */ inode = ceph_find_inode(sb, vino); ci = ceph_inode(inode); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 11b57c2c8f15..bf338d9b67e3 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -243,6 +243,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, err = ceph_mdsc_do_request(mdsc, (flags & (O_CREAT|O_TRUNC)) ? dir : NULL, req); + if (err) + goto out_err; + err = ceph_handle_snapdir(req, dentry, err); if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) err = ceph_handle_notrace_create(dir, dentry); @@ -263,6 +266,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, err = finish_no_open(file, dn); } else { dout("atomic_open finish_open on dn %p\n", dn); + if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) { + *opened |= FILE_CREATED; + } err = finish_open(file, dentry, ceph_open, opened); } @@ -535,7 +541,7 @@ more: ci->i_snap_realm->cached_context, do_sync, ci->i_truncate_seq, ci->i_truncate_size, - &mtime, false, 2, page_align); + &mtime, false, page_align); if (IS_ERR(req)) return PTR_ERR(req); diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index f5ed767806df..4a989345b37b 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -185,7 +185,6 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) &ceph_sb_to_client(inode->i_sb)->client->osdc; u64 len = 1, olen; u64 tmp; - struct ceph_object_layout ol; struct ceph_pg pgid; int r; @@ -194,7 +193,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) return -EFAULT; down_read(&osdc->map_sem); - r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len, + r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len, &dl.object_no, &dl.object_offset, &olen); if (r < 0) @@ -209,10 +208,9 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx", ceph_ino(inode), dl.object_no); - ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout, + ceph_calc_object_layout(&pgid, dl.object_name, &ci->i_layout, osdc->osdmap); - pgid = ol.ol_pgid; dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid); if (dl.osd >= 0) { struct ceph_entity_addr *a = diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 7a3dfe0a9a80..442880d099c9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -233,6 +233,30 @@ bad: } /* + * parse create results + */ +static int parse_reply_info_create(void **p, void *end, + struct ceph_mds_reply_info_parsed *info, + int features) +{ + if (features & CEPH_FEATURE_REPLY_CREATE_INODE) { + if (*p == end) { + info->has_create_ino = false; + } else { + info->has_create_ino = true; + info->ino = ceph_decode_64(p); + } + } + + if (unlikely(*p != end)) + goto bad; + return 0; + +bad: + return -EIO; +} + +/* * parse extra results */ static int parse_reply_info_extra(void **p, void *end, @@ -241,8 +265,12 @@ static int parse_reply_info_extra(void **p, void *end, { if (info->head->op == CEPH_MDS_OP_GETFILELOCK) return parse_reply_info_filelock(p, end, info, features); - else + else if (info->head->op == CEPH_MDS_OP_READDIR) return parse_reply_info_dir(p, end, info, features); + else if (info->head->op == CEPH_MDS_OP_CREATE) + return parse_reply_info_create(p, end, info, features); + else + return -EIO; } /* @@ -2170,7 +2198,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) mutex_lock(&req->r_fill_mutex); err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); if (err == 0) { - if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK && + if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR || + req->r_op == CEPH_MDS_OP_LSSNAP) && rinfo->dir_nr) ceph_readdir_prepopulate(req, req->r_session); ceph_unreserve_caps(mdsc, &req->r_caps_reservation); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index ff4188bf6199..c2a19fbbe517 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -74,6 +74,12 @@ struct ceph_mds_reply_info_parsed { struct ceph_mds_reply_info_in *dir_in; u8 dir_complete, dir_end; }; + + /* for create results */ + struct { + bool has_create_ino; + u64 ino; + }; }; /* encoded blob describing snapshot contexts for certain diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 73b7d44e8a35..0d3c9240c61b 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -59,6 +59,10 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) return ERR_PTR(-ENOMEM); ceph_decode_16_safe(p, end, version, bad); + if (version > 3) { + pr_warning("got mdsmap version %d > 3, failing", version); + goto bad; + } ceph_decode_need(p, end, 8*sizeof(u32) + sizeof(u64), bad); m->m_epoch = ceph_decode_32(p); @@ -144,13 +148,13 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) /* pg_pools */ ceph_decode_32_safe(p, end, n, bad); m->m_num_data_pg_pools = n; - m->m_data_pg_pools = kcalloc(n, sizeof(u32), GFP_NOFS); + m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS); if (!m->m_data_pg_pools) goto badmem; - ceph_decode_need(p, end, sizeof(u32)*(n+1), bad); + ceph_decode_need(p, end, sizeof(u64)*(n+1), bad); for (i = 0; i < n; i++) - m->m_data_pg_pools[i] = ceph_decode_32(p); - m->m_cas_pg_pool = ceph_decode_32(p); + m->m_data_pg_pools[i] = ceph_decode_64(p); + m->m_cas_pg_pool = ceph_decode_64(p); /* ok, we don't care about the rest. */ dout("mdsmap_decode success epoch %u\n", m->m_epoch); diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c index cd5097d7c804..89fa4a940a0f 100644 --- a/fs/ceph/strings.c +++ b/fs/ceph/strings.c @@ -15,6 +15,7 @@ const char *ceph_mds_state_name(int s) case CEPH_MDS_STATE_BOOT: return "up:boot"; case CEPH_MDS_STATE_STANDBY: return "up:standby"; case CEPH_MDS_STATE_STANDBY_REPLAY: return "up:standby-replay"; + case CEPH_MDS_STATE_REPLAYONCE: return "up:oneshot-replay"; case CEPH_MDS_STATE_CREATING: return "up:creating"; case CEPH_MDS_STATE_STARTING: return "up:starting"; /* up and in */ @@ -50,10 +51,13 @@ const char *ceph_mds_op_name(int op) case CEPH_MDS_OP_LOOKUP: return "lookup"; case CEPH_MDS_OP_LOOKUPHASH: return "lookuphash"; case CEPH_MDS_OP_LOOKUPPARENT: return "lookupparent"; + case CEPH_MDS_OP_LOOKUPINO: return "lookupino"; case CEPH_MDS_OP_GETATTR: return "getattr"; case CEPH_MDS_OP_SETXATTR: return "setxattr"; case CEPH_MDS_OP_SETATTR: return "setattr"; case CEPH_MDS_OP_RMXATTR: return "rmxattr"; + case CEPH_MDS_OP_SETLAYOUT: return "setlayou"; + case CEPH_MDS_OP_SETDIRLAYOUT: return "setdirlayout"; case CEPH_MDS_OP_READDIR: return "readdir"; case CEPH_MDS_OP_MKNOD: return "mknod"; case CEPH_MDS_OP_LINK: return "link"; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index e86aa9948124..9fe17c6c2876 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -71,8 +71,14 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) /* * express utilization in terms of large blocks to avoid * overflow on 32-bit machines. + * + * NOTE: for the time being, we make bsize == frsize to humor + * not-yet-ancient versions of glibc that are broken. + * Someday, we will probably want to report a real block + * size... whatever that may mean for a network file system! */ buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; + buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); @@ -80,7 +86,6 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = le64_to_cpu(st.num_objects); buf->f_ffree = -1; buf->f_namelen = NAME_MAX; - buf->f_frsize = PAGE_CACHE_SIZE; /* leave fsid little-endian, regardless of host endianness */ fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index f053bbd1886f..c7b309723dcc 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -21,7 +21,7 @@ /* large granularity for statfs utilization stats to facilitate * large volume sizes on 32-bit machines. */ -#define CEPH_BLOCK_SHIFT 20 /* 1 MB */ +#define CEPH_BLOCK_SHIFT 22 /* 4 MB */ #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) #define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ @@ -798,13 +798,7 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); /* file.c */ extern const struct file_operations ceph_file_fops; extern const struct address_space_operations ceph_aops; -extern int ceph_copy_to_page_vector(struct page **pages, - const char *data, - loff_t off, size_t len); -extern int ceph_copy_from_page_vector(struct page **pages, - char *data, - loff_t off, size_t len); -extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); + extern int ceph_open(struct inode *inode, struct file *file); extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned flags, umode_t mode, diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 2c2ae5be9902..9b6b2b6dd164 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -29,9 +29,94 @@ struct ceph_vxattr { size_t name_size; /* strlen(name) + 1 (for '\0') */ size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val, size_t size); - bool readonly; + bool readonly, hidden; + bool (*exists_cb)(struct ceph_inode_info *ci); }; +/* layouts */ + +static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci) +{ + size_t s; + char *p = (char *)&ci->i_layout; + + for (s = 0; s < sizeof(ci->i_layout); s++, p++) + if (*p) + return true; + return false; +} + +static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, + size_t size) +{ + int ret; + struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); + struct ceph_osd_client *osdc = &fsc->client->osdc; + s64 pool = ceph_file_layout_pg_pool(ci->i_layout); + const char *pool_name; + + dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode); + down_read(&osdc->map_sem); + pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); + if (pool_name) + ret = snprintf(val, size, + "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s", + (unsigned long long)ceph_file_layout_su(ci->i_layout), + (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), + (unsigned long long)ceph_file_layout_object_size(ci->i_layout), + pool_name); + else + ret = snprintf(val, size, + "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld", + (unsigned long long)ceph_file_layout_su(ci->i_layout), + (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), + (unsigned long long)ceph_file_layout_object_size(ci->i_layout), + (unsigned long long)pool); + + up_read(&osdc->map_sem); + return ret; +} + +static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci, + char *val, size_t size) +{ + return snprintf(val, size, "%lld", + (unsigned long long)ceph_file_layout_su(ci->i_layout)); +} + +static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci, + char *val, size_t size) +{ + return snprintf(val, size, "%lld", + (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout)); +} + +static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci, + char *val, size_t size) +{ + return snprintf(val, size, "%lld", + (unsigned long long)ceph_file_layout_object_size(ci->i_layout)); +} + +static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci, + char *val, size_t size) +{ + int ret; + struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); + struct ceph_osd_client *osdc = &fsc->client->osdc; + s64 pool = ceph_file_layout_pg_pool(ci->i_layout); + const char *pool_name; + + down_read(&osdc->map_sem); + pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); + if (pool_name) + ret = snprintf(val, size, "%s", pool_name); + else + ret = snprintf(val, size, "%lld", (unsigned long long)pool); + up_read(&osdc->map_sem); + return ret; +} + /* directories */ static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val, @@ -83,17 +168,43 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, (long)ci->i_rctime.tv_nsec); } -#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name -#define XATTR_NAME_CEPH(_type, _name) \ - { \ - .name = CEPH_XATTR_NAME(_type, _name), \ - .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \ - .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ - .readonly = true, \ - } +#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name +#define CEPH_XATTR_NAME2(_type, _name, _name2) \ + XATTR_CEPH_PREFIX #_type "." #_name "." #_name2 + +#define XATTR_NAME_CEPH(_type, _name) \ + { \ + .name = CEPH_XATTR_NAME(_type, _name), \ + .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \ + .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ + .readonly = true, \ + .hidden = false, \ + .exists_cb = NULL, \ + } +#define XATTR_LAYOUT_FIELD(_type, _name, _field) \ + { \ + .name = CEPH_XATTR_NAME2(_type, _name, _field), \ + .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \ + .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \ + .readonly = false, \ + .hidden = true, \ + .exists_cb = ceph_vxattrcb_layout_exists, \ + } static struct ceph_vxattr ceph_dir_vxattrs[] = { + { + .name = "ceph.dir.layout", + .name_size = sizeof("ceph.dir.layout"), + .getxattr_cb = ceph_vxattrcb_layout, + .readonly = false, + .hidden = false, + .exists_cb = ceph_vxattrcb_layout_exists, + }, + XATTR_LAYOUT_FIELD(dir, layout, stripe_unit), + XATTR_LAYOUT_FIELD(dir, layout, stripe_count), + XATTR_LAYOUT_FIELD(dir, layout, object_size), + XATTR_LAYOUT_FIELD(dir, layout, pool), XATTR_NAME_CEPH(dir, entries), XATTR_NAME_CEPH(dir, files), XATTR_NAME_CEPH(dir, subdirs), @@ -102,35 +213,26 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = { XATTR_NAME_CEPH(dir, rsubdirs), XATTR_NAME_CEPH(dir, rbytes), XATTR_NAME_CEPH(dir, rctime), - { 0 } /* Required table terminator */ + { .name = NULL, 0 } /* Required table terminator */ }; static size_t ceph_dir_vxattrs_name_size; /* total size of all names */ /* files */ -static size_t ceph_vxattrcb_file_layout(struct ceph_inode_info *ci, char *val, - size_t size) -{ - int ret; - - ret = snprintf(val, size, - "chunk_bytes=%lld\nstripe_count=%lld\nobject_size=%lld\n", - (unsigned long long)ceph_file_layout_su(ci->i_layout), - (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), - (unsigned long long)ceph_file_layout_object_size(ci->i_layout)); - return ret; -} - static struct ceph_vxattr ceph_file_vxattrs[] = { - XATTR_NAME_CEPH(file, layout), - /* The following extended attribute name is deprecated */ { - .name = XATTR_CEPH_PREFIX "layout", - .name_size = sizeof (XATTR_CEPH_PREFIX "layout"), - .getxattr_cb = ceph_vxattrcb_file_layout, - .readonly = true, + .name = "ceph.file.layout", + .name_size = sizeof("ceph.file.layout"), + .getxattr_cb = ceph_vxattrcb_layout, + .readonly = false, + .hidden = false, + .exists_cb = ceph_vxattrcb_layout_exists, }, - { 0 } /* Required table terminator */ + XATTR_LAYOUT_FIELD(file, layout, stripe_unit), + XATTR_LAYOUT_FIELD(file, layout, stripe_count), + XATTR_LAYOUT_FIELD(file, layout, object_size), + XATTR_LAYOUT_FIELD(file, layout, pool), + { .name = NULL, 0 } /* Required table terminator */ }; static size_t ceph_file_vxattrs_name_size; /* total size of all names */ @@ -164,7 +266,8 @@ static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs) size_t size = 0; for (vxattr = vxattrs; vxattr->name; vxattr++) - size += vxattr->name_size; + if (!vxattr->hidden) + size += vxattr->name_size; return size; } @@ -572,13 +675,17 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, if (!ceph_is_valid_xattr(name)) return -ENODATA; - /* let's see if a virtual xattr was requested */ - vxattr = ceph_match_vxattr(inode, name); - spin_lock(&ci->i_ceph_lock); dout("getxattr %p ver=%lld index_ver=%lld\n", inode, ci->i_xattrs.version, ci->i_xattrs.index_version); + /* let's see if a virtual xattr was requested */ + vxattr = ceph_match_vxattr(inode, name); + if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) { + err = vxattr->getxattr_cb(ci, value, size); + goto out; + } + if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { goto get_xattr; @@ -592,11 +699,6 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, spin_lock(&ci->i_ceph_lock); - if (vxattr && vxattr->readonly) { - err = vxattr->getxattr_cb(ci, value, size); - goto out; - } - err = __build_xattrs(inode); if (err < 0) goto out; @@ -604,11 +706,8 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, get_xattr: err = -ENODATA; /* == ENOATTR */ xattr = __get_xattr(ci, name); - if (!xattr) { - if (vxattr) - err = vxattr->getxattr_cb(ci, value, size); + if (!xattr) goto out; - } err = -ERANGE; if (size && size < xattr->val_len) @@ -664,23 +763,30 @@ list_xattr: vir_namelen = ceph_vxattrs_name_size(vxattrs); /* adding 1 byte per each variable due to the null termination */ - namelen = vir_namelen + ci->i_xattrs.names_size + ci->i_xattrs.count; + namelen = ci->i_xattrs.names_size + ci->i_xattrs.count; err = -ERANGE; - if (size && namelen > size) + if (size && vir_namelen + namelen > size) goto out; - err = namelen; + err = namelen + vir_namelen; if (size == 0) goto out; names = __copy_xattr_names(ci, names); /* virtual xattr names, too */ - if (vxattrs) + err = namelen; + if (vxattrs) { for (i = 0; vxattrs[i].name; i++) { - len = sprintf(names, "%s", vxattrs[i].name); - names += len + 1; + if (!vxattrs[i].hidden && + !(vxattrs[i].exists_cb && + !vxattrs[i].exists_cb(ci))) { + len = sprintf(names, "%s", vxattrs[i].name); + names += len + 1; + err += len + 1; + } } + } out: spin_unlock(&ci->i_ceph_lock); @@ -782,6 +888,10 @@ int ceph_setxattr(struct dentry *dentry, const char *name, if (vxattr && vxattr->readonly) return -EOPNOTSUPP; + /* pass any unhandled ceph.* xattrs through to the MDS */ + if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) + goto do_sync_unlocked; + /* preallocate memory for xattr name, value, index node */ err = -ENOMEM; newname = kmemdup(name, name_len + 1, GFP_NOFS); @@ -838,6 +948,7 @@ retry: do_sync: spin_unlock(&ci->i_ceph_lock); +do_sync_unlocked: err = ceph_sync_setxattr(dentry, name, value, size, flags); out: kfree(newname); @@ -892,6 +1003,10 @@ int ceph_removexattr(struct dentry *dentry, const char *name) if (vxattr && vxattr->readonly) return -EOPNOTSUPP; + /* pass any unhandled ceph.* xattrs through to the MDS */ + if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) + goto do_sync_unlocked; + err = -ENOMEM; spin_lock(&ci->i_ceph_lock); retry: @@ -931,6 +1046,7 @@ retry: return err; do_sync: spin_unlock(&ci->i_ceph_lock); +do_sync_unlocked: err = ceph_send_removexattr(dentry, name); out: return err; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index d2a833999bcc..83f2606c76d0 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -816,10 +816,9 @@ static bool inode_has_hashed_dentries(struct inode *inode) { struct dentry *dentry; - struct hlist_node *p; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { if (!d_unhashed(dentry) || IS_ROOT(dentry)) { spin_unlock(&inode->i_lock); return true; diff --git a/fs/coredump.c b/fs/coredump.c index 69baf903d3bd..c6479658d487 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -501,7 +501,7 @@ void do_coredump(siginfo_t *siginfo) * so we dump it as root in mode 2, and only into a controlled * environment (pipe handler or fully qualified path). */ - if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) { + if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { /* Setuid core dump mode */ flag = O_EXCL; /* Stop rewrite attacks */ cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ diff --git a/fs/dcache.c b/fs/dcache.c index 68220dd0c135..fbfae008ba44 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -675,11 +675,10 @@ EXPORT_SYMBOL(dget_parent); static struct dentry *__d_find_alias(struct inode *inode, int want_discon) { struct dentry *alias, *discon_alias; - struct hlist_node *p; again: discon_alias = NULL; - hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { spin_lock(&alias->d_lock); if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { if (IS_ROOT(alias) && @@ -730,10 +729,9 @@ EXPORT_SYMBOL(d_find_alias); void d_prune_aliases(struct inode *inode) { struct dentry *dentry; - struct hlist_node *p; restart: spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (!dentry->d_count) { __dget_dlock(dentry); @@ -1443,14 +1441,13 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, int len = entry->d_name.len; const char *name = entry->d_name.name; unsigned int hash = entry->d_name.hash; - struct hlist_node *p; if (!inode) { __d_instantiate(entry, NULL); return NULL; } - hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { /* * Don't need alias->d_lock here, because aliases with * d_parent == entry->d_parent are not subject to name or diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index f7501651762d..1b1146670c4b 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -1183,7 +1183,7 @@ static void detach_lkb(struct dlm_lkb *lkb) static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) { struct dlm_lkb *lkb; - int rv, id; + int rv; lkb = dlm_allocate_lkb(ls); if (!lkb) @@ -1199,19 +1199,13 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) mutex_init(&lkb->lkb_cb_mutex); INIT_WORK(&lkb->lkb_cb_work, dlm_callback_work); - retry: - rv = idr_pre_get(&ls->ls_lkbidr, GFP_NOFS); - if (!rv) - return -ENOMEM; - + idr_preload(GFP_NOFS); spin_lock(&ls->ls_lkbidr_spin); - rv = idr_get_new_above(&ls->ls_lkbidr, lkb, 1, &id); - if (!rv) - lkb->lkb_id = id; + rv = idr_alloc(&ls->ls_lkbidr, lkb, 1, 0, GFP_NOWAIT); + if (rv >= 0) + lkb->lkb_id = rv; spin_unlock(&ls->ls_lkbidr_spin); - - if (rv == -EAGAIN) - goto retry; + idr_preload_end(); if (rv < 0) { log_error(ls, "create_lkb idr error %d", rv); diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 2e99fb0c9737..3ca79d3253b9 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -796,7 +796,6 @@ static int release_lockspace(struct dlm_ls *ls, int force) */ idr_for_each(&ls->ls_lkbidr, lkb_idr_free, ls); - idr_remove_all(&ls->ls_lkbidr); idr_destroy(&ls->ls_lkbidr); /* diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index dd87a31bcc21..4f5ad246582f 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -177,12 +177,11 @@ static inline int nodeid_hash(int nodeid) static struct connection *__find_con(int nodeid) { int r; - struct hlist_node *h; struct connection *con; r = nodeid_hash(nodeid); - hlist_for_each_entry(con, h, &connection_hash[r], list) { + hlist_for_each_entry(con, &connection_hash[r], list) { if (con->nodeid == nodeid) return con; } @@ -232,13 +231,12 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc) static void foreach_conn(void (*conn_func)(struct connection *c)) { int i; - struct hlist_node *h, *n; + struct hlist_node *n; struct connection *con; for (i = 0; i < CONN_HASH_SIZE; i++) { - hlist_for_each_entry_safe(con, h, n, &connection_hash[i], list){ + hlist_for_each_entry_safe(con, n, &connection_hash[i], list) conn_func(con); - } } } @@ -257,13 +255,12 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation) static struct connection *assoc2con(int assoc_id) { int i; - struct hlist_node *h; struct connection *con; mutex_lock(&connections_lock); for (i = 0 ; i < CONN_HASH_SIZE; i++) { - hlist_for_each_entry(con, h, &connection_hash[i], list) { + hlist_for_each_entry(con, &connection_hash[i], list) { if (con->sctp_assoc == assoc_id) { mutex_unlock(&connections_lock); return con; diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index aedea28a86a1..a6bc63f6e31b 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c @@ -305,27 +305,26 @@ static int recover_idr_empty(struct dlm_ls *ls) static int recover_idr_add(struct dlm_rsb *r) { struct dlm_ls *ls = r->res_ls; - int rv, id; - - rv = idr_pre_get(&ls->ls_recover_idr, GFP_NOFS); - if (!rv) - return -ENOMEM; + int rv; + idr_preload(GFP_NOFS); spin_lock(&ls->ls_recover_idr_lock); if (r->res_id) { - spin_unlock(&ls->ls_recover_idr_lock); - return -1; - } - rv = idr_get_new_above(&ls->ls_recover_idr, r, 1, &id); - if (rv) { - spin_unlock(&ls->ls_recover_idr_lock); - return rv; + rv = -1; + goto out_unlock; } - r->res_id = id; + rv = idr_alloc(&ls->ls_recover_idr, r, 1, 0, GFP_NOWAIT); + if (rv < 0) + goto out_unlock; + + r->res_id = rv; ls->ls_recover_list_count++; dlm_hold_rsb(r); + rv = 0; +out_unlock: spin_unlock(&ls->ls_recover_idr_lock); - return 0; + idr_preload_end(); + return rv; } static void recover_idr_del(struct dlm_rsb *r) @@ -351,24 +350,21 @@ static struct dlm_rsb *recover_idr_find(struct dlm_ls *ls, uint64_t id) return r; } -static int recover_idr_clear_rsb(int id, void *p, void *data) +static void recover_idr_clear(struct dlm_ls *ls) { - struct dlm_ls *ls = data; - struct dlm_rsb *r = p; + struct dlm_rsb *r; + int id; - r->res_id = 0; - r->res_recover_locks_count = 0; - ls->ls_recover_list_count--; + spin_lock(&ls->ls_recover_idr_lock); - dlm_put_rsb(r); - return 0; -} + idr_for_each_entry(&ls->ls_recover_idr, r, id) { + idr_remove(&ls->ls_recover_idr, id); + r->res_id = 0; + r->res_recover_locks_count = 0; + ls->ls_recover_list_count--; -static void recover_idr_clear(struct dlm_ls *ls) -{ - spin_lock(&ls->ls_recover_idr_lock); - idr_for_each(&ls->ls_recover_idr, recover_idr_clear_rsb, ls); - idr_remove_all(&ls->ls_recover_idr); + dlm_put_rsb(r); + } if (ls->ls_recover_list_count != 0) { log_error(ls, "warning: recover_list_count %d", diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 5fa2471796c2..8d7a577ae497 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -115,10 +115,9 @@ void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx) */ int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon) { - struct hlist_node *elem; int rc; - hlist_for_each_entry(*daemon, elem, + hlist_for_each_entry(*daemon, &ecryptfs_daemon_hash[ecryptfs_current_euid_hash()], euid_chain) { if (uid_eq((*daemon)->file->f_cred->euid, current_euid())) { @@ -445,7 +444,6 @@ void ecryptfs_release_messaging(void) mutex_unlock(&ecryptfs_msg_ctx_lists_mux); } if (ecryptfs_daemon_hash) { - struct hlist_node *elem; struct ecryptfs_daemon *daemon; int i; @@ -453,7 +451,7 @@ void ecryptfs_release_messaging(void) for (i = 0; i < (1 << ecryptfs_hash_bits); i++) { int rc; - hlist_for_each_entry(daemon, elem, + hlist_for_each_entry(daemon, &ecryptfs_daemon_hash[i], euid_chain) { rc = ecryptfs_exorcise_daemon(daemon); diff --git a/fs/exec.c b/fs/exec.c index 864c50df660a..a96a4885bbbf 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1111,7 +1111,7 @@ void setup_new_exec(struct linux_binprm * bprm) current->sas_ss_sp = current->sas_ss_size = 0; if (uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid())) - set_dumpable(current->mm, SUID_DUMPABLE_ENABLED); + set_dumpable(current->mm, SUID_DUMP_USER); else set_dumpable(current->mm, suid_dumpable); @@ -1639,17 +1639,17 @@ EXPORT_SYMBOL(set_binfmt); void set_dumpable(struct mm_struct *mm, int value) { switch (value) { - case SUID_DUMPABLE_DISABLED: + case SUID_DUMP_DISABLE: clear_bit(MMF_DUMPABLE, &mm->flags); smp_wmb(); clear_bit(MMF_DUMP_SECURELY, &mm->flags); break; - case SUID_DUMPABLE_ENABLED: + case SUID_DUMP_USER: set_bit(MMF_DUMPABLE, &mm->flags); smp_wmb(); clear_bit(MMF_DUMP_SECURELY, &mm->flags); break; - case SUID_DUMPABLE_SAFE: + case SUID_DUMP_ROOT: set_bit(MMF_DUMP_SECURELY, &mm->flags); smp_wmb(); set_bit(MMF_DUMPABLE, &mm->flags); @@ -1662,7 +1662,7 @@ int __get_dumpable(unsigned long mm_flags) int ret; ret = mm_flags & MMF_DUMPABLE_MASK; - return (ret > SUID_DUMPABLE_ENABLED) ? SUID_DUMPABLE_SAFE : ret; + return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret; } int get_dumpable(struct mm_struct *mm) diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 5df4bb4aab14..262fc9940982 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -44,14 +44,13 @@ find_acceptable_alias(struct dentry *result, { struct dentry *dentry, *toput = NULL; struct inode *inode; - struct hlist_node *p; if (acceptable(context, result)) return result; inode = result->d_inode; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { dget(dentry); spin_unlock(&inode->i_lock); if (toput) diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index cf83e77b16cb..f190dfe969da 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h @@ -20,10 +20,13 @@ #define es_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif -#define EXTENT_STATUS_WRITTEN 0x80000000 /* written extent */ -#define EXTENT_STATUS_UNWRITTEN 0x40000000 /* unwritten extent */ -#define EXTENT_STATUS_DELAYED 0x20000000 /* delayed extent */ -#define EXTENT_STATUS_HOLE 0x10000000 /* hole */ +/* + * These flags live in the high bits of extent_status.es_pblk + */ +#define EXTENT_STATUS_WRITTEN (1ULL << 63) +#define EXTENT_STATUS_UNWRITTEN (1ULL << 62) +#define EXTENT_STATUS_DELAYED (1ULL << 61) +#define EXTENT_STATUS_HOLE (1ULL << 60) #define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \ EXTENT_STATUS_UNWRITTEN | \ @@ -58,22 +61,22 @@ extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, static inline int ext4_es_is_written(struct extent_status *es) { - return (es->es_pblk & EXTENT_STATUS_WRITTEN); + return (es->es_pblk & EXTENT_STATUS_WRITTEN) != 0; } static inline int ext4_es_is_unwritten(struct extent_status *es) { - return (es->es_pblk & EXTENT_STATUS_UNWRITTEN); + return (es->es_pblk & EXTENT_STATUS_UNWRITTEN) != 0; } static inline int ext4_es_is_delayed(struct extent_status *es) { - return (es->es_pblk & EXTENT_STATUS_DELAYED); + return (es->es_pblk & EXTENT_STATUS_DELAYED) != 0; } static inline int ext4_es_is_hole(struct extent_status *es) { - return (es->es_pblk & EXTENT_STATUS_HOLE); + return (es->es_pblk & EXTENT_STATUS_HOLE) != 0; } static inline ext4_fsblk_t ext4_es_status(struct extent_status *es) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9c4f4b1c97f8..9ea0cde3fa9e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2512,12 +2512,8 @@ static int ext4_nonda_switch(struct super_block *sb) /* * Start pushing delalloc when 1/2 of free blocks are dirty. */ - if (dirty_blocks && (free_blocks < 2 * dirty_blocks) && - !writeback_in_progress(sb->s_bdi) && - down_read_trylock(&sb->s_umount)) { - writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE); - up_read(&sb->s_umount); - } + if (dirty_blocks && (free_blocks < 2 * dirty_blocks)) + try_to_writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE); if (2 * free_blocks < 3 * dirty_blocks || free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) { diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 12701a567752..e9cc3f0d58e2 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -95,6 +95,8 @@ struct msdos_sb_info { spinlock_t dir_hash_lock; struct hlist_head dir_hashtable[FAT_HASH_SIZE]; + + unsigned int dirty; /* fs state before mount */ }; #define FAT_CACHE_VALID 0 /* special case for valid cache */ diff --git a/fs/fat/inode.c b/fs/fat/inode.c index f8f491677a4a..acf6e479b443 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -341,12 +341,11 @@ struct inode *fat_iget(struct super_block *sb, loff_t i_pos) { struct msdos_sb_info *sbi = MSDOS_SB(sb); struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos); - struct hlist_node *_p; struct msdos_inode_info *i; struct inode *inode = NULL; spin_lock(&sbi->inode_hash_lock); - hlist_for_each_entry(i, _p, head, i_fat_hash) { + hlist_for_each_entry(i, head, i_fat_hash) { BUG_ON(i->vfs_inode.i_sb != sb); if (i->i_pos != i_pos) continue; @@ -488,10 +487,59 @@ static void fat_evict_inode(struct inode *inode) fat_detach(inode); } +static void fat_set_state(struct super_block *sb, + unsigned int set, unsigned int force) +{ + struct buffer_head *bh; + struct fat_boot_sector *b; + struct msdos_sb_info *sbi = sb->s_fs_info; + + /* do not change any thing if mounted read only */ + if ((sb->s_flags & MS_RDONLY) && !force) + return; + + /* do not change state if fs was dirty */ + if (sbi->dirty) { + /* warn only on set (mount). */ + if (set) + fat_msg(sb, KERN_WARNING, "Volume was not properly " + "unmounted. Some data may be corrupt. " + "Please run fsck."); + return; + } + + bh = sb_bread(sb, 0); + if (bh == NULL) { + fat_msg(sb, KERN_ERR, "unable to read boot sector " + "to mark fs as dirty"); + return; + } + + b = (struct fat_boot_sector *) bh->b_data; + + if (sbi->fat_bits == 32) { + if (set) + b->fat32.state |= FAT_STATE_DIRTY; + else + b->fat32.state &= ~FAT_STATE_DIRTY; + } else /* fat 16 and 12 */ { + if (set) + b->fat16.state |= FAT_STATE_DIRTY; + else + b->fat16.state &= ~FAT_STATE_DIRTY; + } + + mark_buffer_dirty(bh); + sync_dirty_buffer(bh); + brelse(bh); +} + static void fat_put_super(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); + fat_set_state(sb, 0, 0); + iput(sbi->fsinfo_inode); iput(sbi->fat_inode); @@ -566,8 +614,18 @@ static void __exit fat_destroy_inodecache(void) static int fat_remount(struct super_block *sb, int *flags, char *data) { + int new_rdonly; struct msdos_sb_info *sbi = MSDOS_SB(sb); *flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME); + + /* make sure we update state on remount. */ + new_rdonly = *flags & MS_RDONLY; + if (new_rdonly != (sb->s_flags & MS_RDONLY)) { + if (new_rdonly) + fat_set_state(sb, 0, 0); + else + fat_set_state(sb, 1, 1); + } return 0; } @@ -1298,17 +1356,17 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, sbi->prev_free = FAT_START_ENT; sb->s_maxbytes = 0xffffffff; - if (!sbi->fat_length && b->fat32_length) { + if (!sbi->fat_length && b->fat32.length) { struct fat_boot_fsinfo *fsinfo; struct buffer_head *fsinfo_bh; /* Must be FAT32 */ sbi->fat_bits = 32; - sbi->fat_length = le32_to_cpu(b->fat32_length); - sbi->root_cluster = le32_to_cpu(b->root_cluster); + sbi->fat_length = le32_to_cpu(b->fat32.length); + sbi->root_cluster = le32_to_cpu(b->fat32.root_cluster); /* MC - if info_sector is 0, don't multiply by 0 */ - sbi->fsinfo_sector = le16_to_cpu(b->info_sector); + sbi->fsinfo_sector = le16_to_cpu(b->fat32.info_sector); if (sbi->fsinfo_sector == 0) sbi->fsinfo_sector = 1; @@ -1362,6 +1420,12 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, if (sbi->fat_bits != 32) sbi->fat_bits = (total_clusters > MAX_FAT12) ? 16 : 12; + /* some OSes set FAT_STATE_DIRTY and clean it on unmount. */ + if (sbi->fat_bits == 32) + sbi->dirty = b->fat32.state & FAT_STATE_DIRTY; + else /* fat 16 or 12 */ + sbi->dirty = b->fat16.state & FAT_STATE_DIRTY; + /* check that FAT table does not overflow */ fat_clusters = sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits; total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT); @@ -1456,6 +1520,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, "the device does not support discard"); } + fat_set_state(sb, 1, 0); return 0; out_invalid: diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c index ef4b5faba87b..499c10438ca2 100644 --- a/fs/fat/nfs.c +++ b/fs/fat/nfs.c @@ -21,13 +21,12 @@ static struct inode *fat_dget(struct super_block *sb, int i_logstart) { struct msdos_sb_info *sbi = MSDOS_SB(sb); struct hlist_head *head; - struct hlist_node *_p; struct msdos_inode_info *i; struct inode *inode = NULL; head = sbi->dir_hashtable + fat_dir_hash(i_logstart); spin_lock(&sbi->dir_hash_lock); - hlist_for_each_entry(i, _p, head, i_dir_hash) { + hlist_for_each_entry(i, head, i_dir_hash) { BUG_ON(i->vfs_inode.i_sb != sb); if (i->i_logstart != i_logstart) continue; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 310972b72a66..21f46fb3a101 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -318,8 +318,14 @@ static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work) static int write_inode(struct inode *inode, struct writeback_control *wbc) { - if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) - return inode->i_sb->s_op->write_inode(inode, wbc); + int ret; + + if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) { + trace_writeback_write_inode_start(inode, wbc); + ret = inode->i_sb->s_op->write_inode(inode, wbc); + trace_writeback_write_inode(inode, wbc); + return ret; + } return 0; } @@ -450,6 +456,8 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) WARN_ON(!(inode->i_state & I_SYNC)); + trace_writeback_single_inode_start(inode, wbc, nr_to_write); + ret = do_writepages(mapping, wbc); /* @@ -1150,8 +1158,12 @@ void __mark_inode_dirty(struct inode *inode, int flags) * dirty the inode itself */ if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { + trace_writeback_dirty_inode_start(inode, flags); + if (sb->s_op->dirty_inode) sb->s_op->dirty_inode(inode, flags); + + trace_writeback_dirty_inode(inode, flags); } /* @@ -1332,47 +1344,43 @@ void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason) EXPORT_SYMBOL(writeback_inodes_sb); /** - * writeback_inodes_sb_if_idle - start writeback if none underway + * try_to_writeback_inodes_sb_nr - try to start writeback if none underway * @sb: the superblock - * @reason: reason why some writeback work was initiated + * @nr: the number of pages to write + * @reason: the reason of writeback * - * Invoke writeback_inodes_sb if no writeback is currently underway. + * Invoke writeback_inodes_sb_nr if no writeback is currently underway. * Returns 1 if writeback was started, 0 if not. */ -int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_reason reason) +int try_to_writeback_inodes_sb_nr(struct super_block *sb, + unsigned long nr, + enum wb_reason reason) { - if (!writeback_in_progress(sb->s_bdi)) { - down_read(&sb->s_umount); - writeback_inodes_sb(sb, reason); - up_read(&sb->s_umount); + if (writeback_in_progress(sb->s_bdi)) return 1; - } else + + if (!down_read_trylock(&sb->s_umount)) return 0; + + writeback_inodes_sb_nr(sb, nr, reason); + up_read(&sb->s_umount); + return 1; } -EXPORT_SYMBOL(writeback_inodes_sb_if_idle); +EXPORT_SYMBOL(try_to_writeback_inodes_sb_nr); /** - * writeback_inodes_sb_nr_if_idle - start writeback if none underway + * try_to_writeback_inodes_sb - try to start writeback if none underway * @sb: the superblock - * @nr: the number of pages to write * @reason: reason why some writeback work was initiated * - * Invoke writeback_inodes_sb if no writeback is currently underway. + * Implement by try_to_writeback_inodes_sb_nr() * Returns 1 if writeback was started, 0 if not. */ -int writeback_inodes_sb_nr_if_idle(struct super_block *sb, - unsigned long nr, - enum wb_reason reason) +int try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason) { - if (!writeback_in_progress(sb->s_bdi)) { - down_read(&sb->s_umount); - writeback_inodes_sb_nr(sb, nr, reason); - up_read(&sb->s_umount); - return 1; - } else - return 0; + return try_to_writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason); } -EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle); +EXPORT_SYMBOL(try_to_writeback_inodes_sb); /** * sync_inodes_sb - sync sb inode pages diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 8dcb114758e3..e2cba1f60c21 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -237,13 +237,12 @@ static int fscache_alloc_object(struct fscache_cache *cache, struct fscache_cookie *cookie) { struct fscache_object *object; - struct hlist_node *_n; int ret; _enter("%p,%p{%s}", cache, cookie, cookie->def->name); spin_lock(&cookie->lock); - hlist_for_each_entry(object, _n, &cookie->backing_objects, + hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) { if (object->cache == cache) goto object_already_extant; @@ -311,7 +310,6 @@ static int fscache_attach_object(struct fscache_cookie *cookie, { struct fscache_object *p; struct fscache_cache *cache = object->cache; - struct hlist_node *_n; int ret; _enter("{%s},{OBJ%x}", cookie->def->name, object->debug_id); @@ -321,7 +319,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie, /* there may be multiple initial creations of this object, but we only * want one */ ret = -EEXIST; - hlist_for_each_entry(p, _n, &cookie->backing_objects, cookie_link) { + hlist_for_each_entry(p, &cookie->backing_objects, cookie_link) { if (p->cache == object->cache) { if (p->state >= FSCACHE_OBJECT_DYING) ret = -ENOBUFS; @@ -331,7 +329,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie, /* pin the parent object */ spin_lock_nested(&cookie->parent->lock, 1); - hlist_for_each_entry(p, _n, &cookie->parent->backing_objects, + hlist_for_each_entry(p, &cookie->parent->backing_objects, cookie_link) { if (p->cache == object->cache) { if (p->state >= FSCACHE_OBJECT_DYING) { @@ -435,7 +433,6 @@ EXPORT_SYMBOL(__fscache_wait_on_invalidate); void __fscache_update_cookie(struct fscache_cookie *cookie) { struct fscache_object *object; - struct hlist_node *_p; fscache_stat(&fscache_n_updates); @@ -452,7 +449,7 @@ void __fscache_update_cookie(struct fscache_cookie *cookie) spin_lock(&cookie->lock); /* update the index entry on disk in each cache backing this cookie */ - hlist_for_each_entry(object, _p, + hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) { fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE); } diff --git a/fs/hfsplus/Makefile b/fs/hfsplus/Makefile index 3cc0df730156..09d278bb7b91 100644 --- a/fs/hfsplus/Makefile +++ b/fs/hfsplus/Makefile @@ -5,5 +5,5 @@ obj-$(CONFIG_HFSPLUS_FS) += hfsplus.o hfsplus-objs := super.o options.o inode.o ioctl.o extents.o catalog.o dir.o btree.o \ - bnode.o brec.o bfind.o tables.o unicode.o wrapper.o bitmap.o part_tbl.o - + bnode.o brec.o bfind.o tables.o unicode.o wrapper.o bitmap.o part_tbl.o \ + attributes.o xattr.o xattr_user.o xattr_security.o xattr_trusted.o diff --git a/fs/hfsplus/attributes.c b/fs/hfsplus/attributes.c new file mode 100644 index 000000000000..8d691f124714 --- /dev/null +++ b/fs/hfsplus/attributes.c @@ -0,0 +1,399 @@ +/* + * linux/fs/hfsplus/attributes.c + * + * Vyacheslav Dubeyko <slava@dubeyko.com> + * + * Handling of records in attributes tree + */ + +#include "hfsplus_fs.h" +#include "hfsplus_raw.h" + +static struct kmem_cache *hfsplus_attr_tree_cachep; + +int hfsplus_create_attr_tree_cache(void) +{ + if (hfsplus_attr_tree_cachep) + return -EEXIST; + + hfsplus_attr_tree_cachep = + kmem_cache_create("hfsplus_attr_cache", + sizeof(hfsplus_attr_entry), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!hfsplus_attr_tree_cachep) + return -ENOMEM; + + return 0; +} + +void hfsplus_destroy_attr_tree_cache(void) +{ + kmem_cache_destroy(hfsplus_attr_tree_cachep); +} + +int hfsplus_attr_bin_cmp_key(const hfsplus_btree_key *k1, + const hfsplus_btree_key *k2) +{ + __be32 k1_cnid, k2_cnid; + + k1_cnid = k1->attr.cnid; + k2_cnid = k2->attr.cnid; + if (k1_cnid != k2_cnid) + return be32_to_cpu(k1_cnid) < be32_to_cpu(k2_cnid) ? -1 : 1; + + return hfsplus_strcmp( + (const struct hfsplus_unistr *)&k1->attr.key_name, + (const struct hfsplus_unistr *)&k2->attr.key_name); +} + +int hfsplus_attr_build_key(struct super_block *sb, hfsplus_btree_key *key, + u32 cnid, const char *name) +{ + int len; + + memset(key, 0, sizeof(struct hfsplus_attr_key)); + key->attr.cnid = cpu_to_be32(cnid); + if (name) { + len = strlen(name); + if (len > HFSPLUS_ATTR_MAX_STRLEN) { + printk(KERN_ERR "hfs: invalid xattr name's length\n"); + return -EINVAL; + } + hfsplus_asc2uni(sb, + (struct hfsplus_unistr *)&key->attr.key_name, + HFSPLUS_ATTR_MAX_STRLEN, name, len); + len = be16_to_cpu(key->attr.key_name.length); + } else { + key->attr.key_name.length = 0; + len = 0; + } + + /* The length of the key, as stored in key_len field, does not include + * the size of the key_len field itself. + * So, offsetof(hfsplus_attr_key, key_name) is a trick because + * it takes into consideration key_len field (__be16) of + * hfsplus_attr_key structure instead of length field (__be16) of + * hfsplus_attr_unistr structure. + */ + key->key_len = + cpu_to_be16(offsetof(struct hfsplus_attr_key, key_name) + + 2 * len); + + return 0; +} + +void hfsplus_attr_build_key_uni(hfsplus_btree_key *key, + u32 cnid, + struct hfsplus_attr_unistr *name) +{ + int ustrlen; + + memset(key, 0, sizeof(struct hfsplus_attr_key)); + ustrlen = be16_to_cpu(name->length); + key->attr.cnid = cpu_to_be32(cnid); + key->attr.key_name.length = cpu_to_be16(ustrlen); + ustrlen *= 2; + memcpy(key->attr.key_name.unicode, name->unicode, ustrlen); + + /* The length of the key, as stored in key_len field, does not include + * the size of the key_len field itself. + * So, offsetof(hfsplus_attr_key, key_name) is a trick because + * it takes into consideration key_len field (__be16) of + * hfsplus_attr_key structure instead of length field (__be16) of + * hfsplus_attr_unistr structure. + */ + key->key_len = + cpu_to_be16(offsetof(struct hfsplus_attr_key, key_name) + + ustrlen); +} + +hfsplus_attr_entry *hfsplus_alloc_attr_entry(void) +{ + return kmem_cache_alloc(hfsplus_attr_tree_cachep, GFP_KERNEL); +} + +void hfsplus_destroy_attr_entry(hfsplus_attr_entry *entry) +{ + if (entry) + kmem_cache_free(hfsplus_attr_tree_cachep, entry); +} + +#define HFSPLUS_INVALID_ATTR_RECORD -1 + +static int hfsplus_attr_build_record(hfsplus_attr_entry *entry, int record_type, + u32 cnid, const void *value, size_t size) +{ + if (record_type == HFSPLUS_ATTR_FORK_DATA) { + /* + * Mac OS X supports only inline data attributes. + * Do nothing + */ + memset(entry, 0, sizeof(*entry)); + return sizeof(struct hfsplus_attr_fork_data); + } else if (record_type == HFSPLUS_ATTR_EXTENTS) { + /* + * Mac OS X supports only inline data attributes. + * Do nothing. + */ + memset(entry, 0, sizeof(*entry)); + return sizeof(struct hfsplus_attr_extents); + } else if (record_type == HFSPLUS_ATTR_INLINE_DATA) { + u16 len; + + memset(entry, 0, sizeof(struct hfsplus_attr_inline_data)); + entry->inline_data.record_type = cpu_to_be32(record_type); + if (size <= HFSPLUS_MAX_INLINE_DATA_SIZE) + len = size; + else + return HFSPLUS_INVALID_ATTR_RECORD; + entry->inline_data.length = cpu_to_be16(len); + memcpy(entry->inline_data.raw_bytes, value, len); + /* + * Align len on two-byte boundary. + * It needs to add pad byte if we have odd len. + */ + len = round_up(len, 2); + return offsetof(struct hfsplus_attr_inline_data, raw_bytes) + + len; + } else /* invalid input */ + memset(entry, 0, sizeof(*entry)); + + return HFSPLUS_INVALID_ATTR_RECORD; +} + +int hfsplus_find_attr(struct super_block *sb, u32 cnid, + const char *name, struct hfs_find_data *fd) +{ + int err = 0; + + dprint(DBG_ATTR_MOD, "find_attr: %s,%d\n", name ? name : NULL, cnid); + + if (!HFSPLUS_SB(sb)->attr_tree) { + printk(KERN_ERR "hfs: attributes file doesn't exist\n"); + return -EINVAL; + } + + if (name) { + err = hfsplus_attr_build_key(sb, fd->search_key, cnid, name); + if (err) + goto failed_find_attr; + err = hfs_brec_find(fd, hfs_find_rec_by_key); + if (err) + goto failed_find_attr; + } else { + err = hfsplus_attr_build_key(sb, fd->search_key, cnid, NULL); + if (err) + goto failed_find_attr; + err = hfs_brec_find(fd, hfs_find_1st_rec_by_cnid); + if (err) + goto failed_find_attr; + } + +failed_find_attr: + return err; +} + +int hfsplus_attr_exists(struct inode *inode, const char *name) +{ + int err = 0; + struct super_block *sb = inode->i_sb; + struct hfs_find_data fd; + + if (!HFSPLUS_SB(sb)->attr_tree) + return 0; + + err = hfs_find_init(HFSPLUS_SB(sb)->attr_tree, &fd); + if (err) + return 0; + + err = hfsplus_find_attr(sb, inode->i_ino, name, &fd); + if (err) + goto attr_not_found; + + hfs_find_exit(&fd); + return 1; + +attr_not_found: + hfs_find_exit(&fd); + return 0; +} + +int hfsplus_create_attr(struct inode *inode, + const char *name, + const void *value, size_t size) +{ + struct super_block *sb = inode->i_sb; + struct hfs_find_data fd; + hfsplus_attr_entry *entry_ptr; + int entry_size; + int err; + + dprint(DBG_ATTR_MOD, "create_attr: %s,%ld\n", + name ? name : NULL, inode->i_ino); + + if (!HFSPLUS_SB(sb)->attr_tree) { + printk(KERN_ERR "hfs: attributes file doesn't exist\n"); + return -EINVAL; + } + + entry_ptr = hfsplus_alloc_attr_entry(); + if (!entry_ptr) + return -ENOMEM; + + err = hfs_find_init(HFSPLUS_SB(sb)->attr_tree, &fd); + if (err) + goto failed_init_create_attr; + + if (name) { + err = hfsplus_attr_build_key(sb, fd.search_key, + inode->i_ino, name); + if (err) + goto failed_create_attr; + } else { + err = -EINVAL; + goto failed_create_attr; + } + + /* Mac OS X supports only inline data attributes. */ + entry_size = hfsplus_attr_build_record(entry_ptr, + HFSPLUS_ATTR_INLINE_DATA, + inode->i_ino, + value, size); + if (entry_size == HFSPLUS_INVALID_ATTR_RECORD) { + err = -EINVAL; + goto failed_create_attr; + } + + err = hfs_brec_find(&fd, hfs_find_rec_by_key); + if (err != -ENOENT) { + if (!err) + err = -EEXIST; + goto failed_create_attr; + } + + err = hfs_brec_insert(&fd, entry_ptr, entry_size); + if (err) + goto failed_create_attr; + + hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ATTR_DIRTY); + +failed_create_attr: + hfs_find_exit(&fd); + +failed_init_create_attr: + hfsplus_destroy_attr_entry(entry_ptr); + return err; +} + +static int __hfsplus_delete_attr(struct inode *inode, u32 cnid, + struct hfs_find_data *fd) +{ + int err = 0; + __be32 found_cnid, record_type; + + hfs_bnode_read(fd->bnode, &found_cnid, + fd->keyoffset + + offsetof(struct hfsplus_attr_key, cnid), + sizeof(__be32)); + if (cnid != be32_to_cpu(found_cnid)) + return -ENOENT; + + hfs_bnode_read(fd->bnode, &record_type, + fd->entryoffset, sizeof(record_type)); + + switch (be32_to_cpu(record_type)) { + case HFSPLUS_ATTR_INLINE_DATA: + /* All is OK. Do nothing. */ + break; + case HFSPLUS_ATTR_FORK_DATA: + case HFSPLUS_ATTR_EXTENTS: + printk(KERN_ERR "hfs: only inline data xattr are supported\n"); + return -EOPNOTSUPP; + default: + printk(KERN_ERR "hfs: invalid extended attribute record\n"); + return -ENOENT; + } + + err = hfs_brec_remove(fd); + if (err) + return err; + + hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ATTR_DIRTY); + return err; +} + +int hfsplus_delete_attr(struct inode *inode, const char *name) +{ + int err = 0; + struct super_block *sb = inode->i_sb; + struct hfs_find_data fd; + + dprint(DBG_ATTR_MOD, "delete_attr: %s,%ld\n", + name ? name : NULL, inode->i_ino); + + if (!HFSPLUS_SB(sb)->attr_tree) { + printk(KERN_ERR "hfs: attributes file doesn't exist\n"); + return -EINVAL; + } + + err = hfs_find_init(HFSPLUS_SB(sb)->attr_tree, &fd); + if (err) + return err; + + if (name) { + err = hfsplus_attr_build_key(sb, fd.search_key, + inode->i_ino, name); + if (err) + goto out; + } else { + printk(KERN_ERR "hfs: invalid extended attribute name\n"); + err = -EINVAL; + goto out; + } + + err = hfs_brec_find(&fd, hfs_find_rec_by_key); + if (err) + goto out; + + err = __hfsplus_delete_attr(inode, inode->i_ino, &fd); + if (err) + goto out; + +out: + hfs_find_exit(&fd); + return err; +} + +int hfsplus_delete_all_attrs(struct inode *dir, u32 cnid) +{ + int err = 0; + struct hfs_find_data fd; + + dprint(DBG_ATTR_MOD, "delete_all_attrs: %d\n", cnid); + + if (!HFSPLUS_SB(dir->i_sb)->attr_tree) { + printk(KERN_ERR "hfs: attributes file doesn't exist\n"); + return -EINVAL; + } + + err = hfs_find_init(HFSPLUS_SB(dir->i_sb)->attr_tree, &fd); + if (err) + return err; + + for (;;) { + err = hfsplus_find_attr(dir->i_sb, cnid, NULL, &fd); + if (err) { + if (err != -ENOENT) + printk(KERN_ERR "hfs: xattr search failed.\n"); + goto end_delete_all; + } + + err = __hfsplus_delete_attr(dir, cnid, &fd); + if (err) + goto end_delete_all; + } + +end_delete_all: + hfs_find_exit(&fd); + return err; +} diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c index 5d799c13205f..d73c98d1ee99 100644 --- a/fs/hfsplus/bfind.c +++ b/fs/hfsplus/bfind.c @@ -24,7 +24,19 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd) fd->key = ptr + tree->max_key_len + 2; dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); - mutex_lock(&tree->tree_lock); + switch (tree->cnid) { + case HFSPLUS_CAT_CNID: + mutex_lock_nested(&tree->tree_lock, CATALOG_BTREE_MUTEX); + break; + case HFSPLUS_EXT_CNID: + mutex_lock_nested(&tree->tree_lock, EXTENTS_BTREE_MUTEX); + break; + case HFSPLUS_ATTR_CNID: + mutex_lock_nested(&tree->tree_lock, ATTR_BTREE_MUTEX); + break; + default: + BUG(); + } return 0; } @@ -38,15 +50,73 @@ void hfs_find_exit(struct hfs_find_data *fd) fd->tree = NULL; } -/* Find the record in bnode that best matches key (not greater than...)*/ -int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) +int hfs_find_1st_rec_by_cnid(struct hfs_bnode *bnode, + struct hfs_find_data *fd, + int *begin, + int *end, + int *cur_rec) +{ + __be32 cur_cnid, search_cnid; + + if (bnode->tree->cnid == HFSPLUS_EXT_CNID) { + cur_cnid = fd->key->ext.cnid; + search_cnid = fd->search_key->ext.cnid; + } else if (bnode->tree->cnid == HFSPLUS_CAT_CNID) { + cur_cnid = fd->key->cat.parent; + search_cnid = fd->search_key->cat.parent; + } else if (bnode->tree->cnid == HFSPLUS_ATTR_CNID) { + cur_cnid = fd->key->attr.cnid; + search_cnid = fd->search_key->attr.cnid; + } else + BUG(); + + if (cur_cnid == search_cnid) { + (*end) = (*cur_rec); + if ((*begin) == (*end)) + return 1; + } else { + if (be32_to_cpu(cur_cnid) < be32_to_cpu(search_cnid)) + (*begin) = (*cur_rec) + 1; + else + (*end) = (*cur_rec) - 1; + } + + return 0; +} + +int hfs_find_rec_by_key(struct hfs_bnode *bnode, + struct hfs_find_data *fd, + int *begin, + int *end, + int *cur_rec) { int cmpval; + + cmpval = bnode->tree->keycmp(fd->key, fd->search_key); + if (!cmpval) { + (*end) = (*cur_rec); + return 1; + } + if (cmpval < 0) + (*begin) = (*cur_rec) + 1; + else + *(end) = (*cur_rec) - 1; + + return 0; +} + +/* Find the record in bnode that best matches key (not greater than...)*/ +int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd, + search_strategy_t rec_found) +{ u16 off, len, keylen; int rec; int b, e; int res; + if (!rec_found) + BUG(); + b = 0; e = bnode->num_recs - 1; res = -ENOENT; @@ -59,17 +129,12 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) goto fail; } hfs_bnode_read(bnode, fd->key, off, keylen); - cmpval = bnode->tree->keycmp(fd->key, fd->search_key); - if (!cmpval) { - e = rec; + if (rec_found(bnode, fd, &b, &e, &rec)) { res = 0; goto done; } - if (cmpval < 0) - b = rec + 1; - else - e = rec - 1; } while (b <= e); + if (rec != e && e >= 0) { len = hfs_brec_lenoff(bnode, e, &off); keylen = hfs_brec_keylen(bnode, e); @@ -79,19 +144,21 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) } hfs_bnode_read(bnode, fd->key, off, keylen); } + done: fd->record = e; fd->keyoffset = off; fd->keylength = keylen; fd->entryoffset = off + keylen; fd->entrylength = len - keylen; + fail: return res; } /* Traverse a B*Tree from the root to a leaf finding best fit to key */ /* Return allocated copy of node found, set recnum to best record */ -int hfs_brec_find(struct hfs_find_data *fd) +int hfs_brec_find(struct hfs_find_data *fd, search_strategy_t do_key_compare) { struct hfs_btree *tree; struct hfs_bnode *bnode; @@ -122,7 +189,7 @@ int hfs_brec_find(struct hfs_find_data *fd) goto invalid; bnode->parent = parent; - res = __hfs_brec_find(bnode, fd); + res = __hfs_brec_find(bnode, fd, do_key_compare); if (!height) break; if (fd->record < 0) @@ -149,7 +216,7 @@ int hfs_brec_read(struct hfs_find_data *fd, void *rec, int rec_len) { int res; - res = hfs_brec_find(fd); + res = hfs_brec_find(fd, hfs_find_rec_by_key); if (res) return res; if (fd->entrylength > rec_len) diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c index 1c42cc5b899f..f31ac6f404f1 100644 --- a/fs/hfsplus/bnode.c +++ b/fs/hfsplus/bnode.c @@ -62,7 +62,8 @@ void hfs_bnode_read_key(struct hfs_bnode *node, void *key, int off) tree = node->tree; if (node->type == HFS_NODE_LEAF || - tree->attributes & HFS_TREE_VARIDXKEYS) + tree->attributes & HFS_TREE_VARIDXKEYS || + node->tree->cnid == HFSPLUS_ATTR_CNID) key_len = hfs_bnode_read_u16(node, off) + 2; else key_len = tree->max_key_len + 2; @@ -314,7 +315,8 @@ void hfs_bnode_dump(struct hfs_bnode *node) if (i && node->type == HFS_NODE_INDEX) { int tmp; - if (node->tree->attributes & HFS_TREE_VARIDXKEYS) + if (node->tree->attributes & HFS_TREE_VARIDXKEYS || + node->tree->cnid == HFSPLUS_ATTR_CNID) tmp = hfs_bnode_read_u16(node, key_off) + 2; else tmp = node->tree->max_key_len + 2; @@ -646,6 +648,8 @@ void hfs_bnode_put(struct hfs_bnode *node) if (test_bit(HFS_BNODE_DELETED, &node->flags)) { hfs_bnode_unhash(node); spin_unlock(&tree->hash_lock); + hfs_bnode_clear(node, 0, + PAGE_CACHE_SIZE * tree->pages_per_bnode); hfs_bmap_free(node); hfs_bnode_free(node); return; diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index 2a734cfccc92..298d4e45604b 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c @@ -36,7 +36,8 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec) return 0; if ((node->type == HFS_NODE_INDEX) && - !(node->tree->attributes & HFS_TREE_VARIDXKEYS)) { + !(node->tree->attributes & HFS_TREE_VARIDXKEYS) && + (node->tree->cnid != HFSPLUS_ATTR_CNID)) { retval = node->tree->max_key_len + 2; } else { recoff = hfs_bnode_read_u16(node, @@ -151,12 +152,13 @@ skip: /* get index key */ hfs_bnode_read_key(new_node, fd->search_key, 14); - __hfs_brec_find(fd->bnode, fd); + __hfs_brec_find(fd->bnode, fd, hfs_find_rec_by_key); hfs_bnode_put(new_node); new_node = NULL; - if (tree->attributes & HFS_TREE_VARIDXKEYS) + if ((tree->attributes & HFS_TREE_VARIDXKEYS) || + (tree->cnid == HFSPLUS_ATTR_CNID)) key_len = be16_to_cpu(fd->search_key->key_len) + 2; else { fd->search_key->key_len = @@ -201,7 +203,7 @@ again: hfs_bnode_put(node); node = fd->bnode = parent; - __hfs_brec_find(node, fd); + __hfs_brec_find(node, fd, hfs_find_rec_by_key); goto again; } hfs_bnode_write_u16(node, @@ -367,12 +369,13 @@ again: parent = hfs_bnode_find(tree, node->parent); if (IS_ERR(parent)) return PTR_ERR(parent); - __hfs_brec_find(parent, fd); + __hfs_brec_find(parent, fd, hfs_find_rec_by_key); hfs_bnode_dump(parent); rec = fd->record; /* size difference between old and new key */ - if (tree->attributes & HFS_TREE_VARIDXKEYS) + if ((tree->attributes & HFS_TREE_VARIDXKEYS) || + (tree->cnid == HFSPLUS_ATTR_CNID)) newkeylen = hfs_bnode_read_u16(node, 14) + 2; else fd->keylength = newkeylen = tree->max_key_len + 2; @@ -427,7 +430,7 @@ skip: hfs_bnode_read_key(new_node, fd->search_key, 14); cnid = cpu_to_be32(new_node->this); - __hfs_brec_find(fd->bnode, fd); + __hfs_brec_find(fd->bnode, fd, hfs_find_rec_by_key); hfs_brec_insert(fd, &cnid, sizeof(cnid)); hfs_bnode_put(fd->bnode); hfs_bnode_put(new_node); @@ -495,13 +498,15 @@ static int hfs_btree_inc_height(struct hfs_btree *tree) /* insert old root idx into new root */ node->parent = tree->root; if (node->type == HFS_NODE_LEAF || - tree->attributes & HFS_TREE_VARIDXKEYS) + tree->attributes & HFS_TREE_VARIDXKEYS || + tree->cnid == HFSPLUS_ATTR_CNID) key_size = hfs_bnode_read_u16(node, 14) + 2; else key_size = tree->max_key_len + 2; hfs_bnode_copy(new_node, 14, node, 14, key_size); - if (!(tree->attributes & HFS_TREE_VARIDXKEYS)) { + if (!(tree->attributes & HFS_TREE_VARIDXKEYS) && + (tree->cnid != HFSPLUS_ATTR_CNID)) { key_size = tree->max_key_len + 2; hfs_bnode_write_u16(new_node, 14, tree->max_key_len); } diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index 685d07d0ed18..efb689c21a95 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -98,6 +98,14 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) set_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); } break; + case HFSPLUS_ATTR_CNID: + if (tree->max_key_len != HFSPLUS_ATTR_KEYLEN - sizeof(u16)) { + printk(KERN_ERR "hfs: invalid attributes max_key_len %d\n", + tree->max_key_len); + goto fail_page; + } + tree->keycmp = hfsplus_attr_bin_cmp_key; + break; default: printk(KERN_ERR "hfs: unknown B*Tree requested\n"); goto fail_page; diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index 798d9c4c5e71..840d71edd193 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -45,7 +45,8 @@ void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key, key->cat.parent = cpu_to_be32(parent); if (str) { - hfsplus_asc2uni(sb, &key->cat.name, str->name, str->len); + hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN, + str->name, str->len); len = be16_to_cpu(key->cat.name.length); } else { key->cat.name.length = 0; @@ -167,7 +168,8 @@ static int hfsplus_fill_cat_thread(struct super_block *sb, entry->type = cpu_to_be16(type); entry->thread.reserved = 0; entry->thread.parentID = cpu_to_be32(parentid); - hfsplus_asc2uni(sb, &entry->thread.nodeName, str->name, str->len); + hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN, + str->name, str->len); return 10 + be16_to_cpu(entry->thread.nodeName.length) * 2; } @@ -198,7 +200,7 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid, hfsplus_cat_build_key_uni(fd->search_key, be32_to_cpu(tmp.thread.parentID), &tmp.thread.nodeName); - return hfs_brec_find(fd); + return hfs_brec_find(fd, hfs_find_rec_by_key); } int hfsplus_create_cat(u32 cnid, struct inode *dir, @@ -221,7 +223,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, S_ISDIR(inode->i_mode) ? HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD, dir->i_ino, str); - err = hfs_brec_find(&fd); + err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err != -ENOENT) { if (!err) err = -EEXIST; @@ -233,7 +235,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); entry_size = hfsplus_cat_build_record(&entry, cnid, inode); - err = hfs_brec_find(&fd); + err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err != -ENOENT) { /* panic? */ if (!err) @@ -253,7 +255,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, err1: hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); - if (!hfs_brec_find(&fd)) + if (!hfs_brec_find(&fd, hfs_find_rec_by_key)) hfs_brec_remove(&fd); err2: hfs_find_exit(&fd); @@ -279,7 +281,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) int len; hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); - err = hfs_brec_find(&fd); + err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err) goto out; @@ -296,7 +298,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) } else hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); - err = hfs_brec_find(&fd); + err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err) goto out; @@ -326,7 +328,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) goto out; hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); - err = hfs_brec_find(&fd); + err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err) goto out; @@ -337,6 +339,12 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) dir->i_size--; dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY); + + if (type == HFSPLUS_FILE || type == HFSPLUS_FOLDER) { + if (HFSPLUS_SB(sb)->attr_tree) + hfsplus_delete_all_attrs(dir, cnid); + } + out: hfs_find_exit(&fd); @@ -363,7 +371,7 @@ int hfsplus_rename_cat(u32 cnid, /* find the old dir entry and read the data */ hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); - err = hfs_brec_find(&src_fd); + err = hfs_brec_find(&src_fd, hfs_find_rec_by_key); if (err) goto out; if (src_fd.entrylength > sizeof(entry) || src_fd.entrylength < 0) { @@ -376,7 +384,7 @@ int hfsplus_rename_cat(u32 cnid, /* create new dir entry with the data from the old entry */ hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name); - err = hfs_brec_find(&dst_fd); + err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key); if (err != -ENOENT) { if (!err) err = -EEXIST; @@ -391,7 +399,7 @@ int hfsplus_rename_cat(u32 cnid, /* finally remove the old entry */ hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); - err = hfs_brec_find(&src_fd); + err = hfs_brec_find(&src_fd, hfs_find_rec_by_key); if (err) goto out; err = hfs_brec_remove(&src_fd); @@ -402,7 +410,7 @@ int hfsplus_rename_cat(u32 cnid, /* remove old thread entry */ hfsplus_cat_build_key(sb, src_fd.search_key, cnid, NULL); - err = hfs_brec_find(&src_fd); + err = hfs_brec_find(&src_fd, hfs_find_rec_by_key); if (err) goto out; type = hfs_bnode_read_u16(src_fd.bnode, src_fd.entryoffset); @@ -414,7 +422,7 @@ int hfsplus_rename_cat(u32 cnid, hfsplus_cat_build_key(sb, dst_fd.search_key, cnid, NULL); entry_size = hfsplus_fill_cat_thread(sb, &entry, type, dst_dir->i_ino, dst_name); - err = hfs_brec_find(&dst_fd); + err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key); if (err != -ENOENT) { if (!err) err = -EEXIST; diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 074e04589248..031c24e50521 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -15,6 +15,7 @@ #include "hfsplus_fs.h" #include "hfsplus_raw.h" +#include "xattr.h" static inline void hfsplus_instantiate(struct dentry *dentry, struct inode *inode, u32 cnid) @@ -138,7 +139,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) if (err) return err; hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); - err = hfs_brec_find(&fd); + err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err) goto out; @@ -421,6 +422,15 @@ static int hfsplus_symlink(struct inode *dir, struct dentry *dentry, if (res) goto out_err; + res = hfsplus_init_inode_security(inode, dir, &dentry->d_name); + if (res == -EOPNOTSUPP) + res = 0; /* Operation is not supported. */ + else if (res) { + /* Try to delete anyway without error analysis. */ + hfsplus_delete_cat(inode->i_ino, dir, &dentry->d_name); + goto out_err; + } + hfsplus_instantiate(dentry, inode, inode->i_ino); mark_inode_dirty(inode); goto out; @@ -450,15 +460,26 @@ static int hfsplus_mknod(struct inode *dir, struct dentry *dentry, init_special_inode(inode, mode, rdev); res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); - if (res) { - clear_nlink(inode); - hfsplus_delete_inode(inode); - iput(inode); - goto out; + if (res) + goto failed_mknod; + + res = hfsplus_init_inode_security(inode, dir, &dentry->d_name); + if (res == -EOPNOTSUPP) + res = 0; /* Operation is not supported. */ + else if (res) { + /* Try to delete anyway without error analysis. */ + hfsplus_delete_cat(inode->i_ino, dir, &dentry->d_name); + goto failed_mknod; } hfsplus_instantiate(dentry, inode, inode->i_ino); mark_inode_dirty(inode); + goto out; + +failed_mknod: + clear_nlink(inode); + hfsplus_delete_inode(inode); + iput(inode); out: mutex_unlock(&sbi->vh_mutex); return res; @@ -499,15 +520,19 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry, } const struct inode_operations hfsplus_dir_inode_operations = { - .lookup = hfsplus_lookup, - .create = hfsplus_create, - .link = hfsplus_link, - .unlink = hfsplus_unlink, - .mkdir = hfsplus_mkdir, - .rmdir = hfsplus_rmdir, - .symlink = hfsplus_symlink, - .mknod = hfsplus_mknod, - .rename = hfsplus_rename, + .lookup = hfsplus_lookup, + .create = hfsplus_create, + .link = hfsplus_link, + .unlink = hfsplus_unlink, + .mkdir = hfsplus_mkdir, + .rmdir = hfsplus_rmdir, + .symlink = hfsplus_symlink, + .mknod = hfsplus_mknod, + .rename = hfsplus_rename, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = hfsplus_listxattr, + .removexattr = hfsplus_removexattr, }; const struct file_operations hfsplus_dir_operations = { diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index eba76eab6d62..a94f0f779d5e 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -95,7 +95,7 @@ static void __hfsplus_ext_write_extent(struct inode *inode, HFSPLUS_IS_RSRC(inode) ? HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA); - res = hfs_brec_find(fd); + res = hfs_brec_find(fd, hfs_find_rec_by_key); if (hip->extent_state & HFSPLUS_EXT_NEW) { if (res != -ENOENT) return; @@ -154,7 +154,7 @@ static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd, hfsplus_ext_build_key(fd->search_key, cnid, block, type); fd->key->ext.cnid = 0; - res = hfs_brec_find(fd); + res = hfs_brec_find(fd, hfs_find_rec_by_key); if (res && res != -ENOENT) return res; if (fd->key->ext.cnid != fd->search_key->ext.cnid || diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index a6da86b1b4c1..05b11f36024c 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -23,6 +23,7 @@ #define DBG_SUPER 0x00000010 #define DBG_EXTENT 0x00000020 #define DBG_BITMAP 0x00000040 +#define DBG_ATTR_MOD 0x00000080 #if 0 #define DBG_MASK (DBG_EXTENT|DBG_INODE|DBG_BNODE_MOD) @@ -46,6 +47,13 @@ typedef int (*btree_keycmp)(const hfsplus_btree_key *, #define NODE_HASH_SIZE 256 +/* B-tree mutex nested subclasses */ +enum hfsplus_btree_mutex_classes { + CATALOG_BTREE_MUTEX, + EXTENTS_BTREE_MUTEX, + ATTR_BTREE_MUTEX, +}; + /* An HFS+ BTree held in memory */ struct hfs_btree { struct super_block *sb; @@ -223,6 +231,7 @@ struct hfsplus_inode_info { #define HFSPLUS_I_CAT_DIRTY 1 /* has changes in the catalog tree */ #define HFSPLUS_I_EXT_DIRTY 2 /* has changes in the extent tree */ #define HFSPLUS_I_ALLOC_DIRTY 3 /* has changes in the allocation file */ +#define HFSPLUS_I_ATTR_DIRTY 4 /* has changes in the attributes tree */ #define HFSPLUS_IS_RSRC(inode) \ test_bit(HFSPLUS_I_RSRC, &HFSPLUS_I(inode)->flags) @@ -302,7 +311,7 @@ static inline unsigned short hfsplus_min_io_size(struct super_block *sb) #define hfs_brec_remove hfsplus_brec_remove #define hfs_find_init hfsplus_find_init #define hfs_find_exit hfsplus_find_exit -#define __hfs_brec_find __hplusfs_brec_find +#define __hfs_brec_find __hfsplus_brec_find #define hfs_brec_find hfsplus_brec_find #define hfs_brec_read hfsplus_brec_read #define hfs_brec_goto hfsplus_brec_goto @@ -324,10 +333,33 @@ static inline unsigned short hfsplus_min_io_size(struct super_block *sb) */ #define HFSPLUS_IOC_BLESS _IO('h', 0x80) +typedef int (*search_strategy_t)(struct hfs_bnode *, + struct hfs_find_data *, + int *, int *, int *); + /* * Functions in any *.c used in other files */ +/* attributes.c */ +int hfsplus_create_attr_tree_cache(void); +void hfsplus_destroy_attr_tree_cache(void); +hfsplus_attr_entry *hfsplus_alloc_attr_entry(void); +void hfsplus_destroy_attr_entry(hfsplus_attr_entry *entry_p); +int hfsplus_attr_bin_cmp_key(const hfsplus_btree_key *, + const hfsplus_btree_key *); +int hfsplus_attr_build_key(struct super_block *, hfsplus_btree_key *, + u32, const char *); +void hfsplus_attr_build_key_uni(hfsplus_btree_key *key, + u32 cnid, + struct hfsplus_attr_unistr *name); +int hfsplus_find_attr(struct super_block *, u32, + const char *, struct hfs_find_data *); +int hfsplus_attr_exists(struct inode *inode, const char *name); +int hfsplus_create_attr(struct inode *, const char *, const void *, size_t); +int hfsplus_delete_attr(struct inode *, const char *); +int hfsplus_delete_all_attrs(struct inode *dir, u32 cnid); + /* bitmap.c */ int hfsplus_block_allocate(struct super_block *, u32, u32, u32 *); int hfsplus_block_free(struct super_block *, u32, u32); @@ -369,8 +401,15 @@ int hfs_brec_remove(struct hfs_find_data *); /* bfind.c */ int hfs_find_init(struct hfs_btree *, struct hfs_find_data *); void hfs_find_exit(struct hfs_find_data *); -int __hfs_brec_find(struct hfs_bnode *, struct hfs_find_data *); -int hfs_brec_find(struct hfs_find_data *); +int hfs_find_1st_rec_by_cnid(struct hfs_bnode *, + struct hfs_find_data *, + int *, int *, int *); +int hfs_find_rec_by_key(struct hfs_bnode *, + struct hfs_find_data *, + int *, int *, int *); +int __hfs_brec_find(struct hfs_bnode *, struct hfs_find_data *, + search_strategy_t); +int hfs_brec_find(struct hfs_find_data *, search_strategy_t); int hfs_brec_read(struct hfs_find_data *, void *, int); int hfs_brec_goto(struct hfs_find_data *, int); @@ -417,11 +456,6 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, /* ioctl.c */ long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); -int hfsplus_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags); -ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, - void *value, size_t size); -ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size); /* options.c */ int hfsplus_parse_options(char *, struct hfsplus_sb_info *); @@ -446,7 +480,7 @@ int hfsplus_strcmp(const struct hfsplus_unistr *, int hfsplus_uni2asc(struct super_block *, const struct hfsplus_unistr *, char *, int *); int hfsplus_asc2uni(struct super_block *, - struct hfsplus_unistr *, const char *, int); + struct hfsplus_unistr *, int, const char *, int); int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode, struct qstr *str); int hfsplus_compare_dentry(const struct dentry *parent, diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h index 921967e5abb1..452ede01b036 100644 --- a/fs/hfsplus/hfsplus_raw.h +++ b/fs/hfsplus/hfsplus_raw.h @@ -52,13 +52,23 @@ typedef __be32 hfsplus_cnid; typedef __be16 hfsplus_unichr; +#define HFSPLUS_MAX_STRLEN 255 +#define HFSPLUS_ATTR_MAX_STRLEN 127 + /* A "string" as used in filenames, etc. */ struct hfsplus_unistr { __be16 length; - hfsplus_unichr unicode[255]; + hfsplus_unichr unicode[HFSPLUS_MAX_STRLEN]; } __packed; -#define HFSPLUS_MAX_STRLEN 255 +/* + * A "string" is used in attributes file + * for name of extended attribute + */ +struct hfsplus_attr_unistr { + __be16 length; + hfsplus_unichr unicode[HFSPLUS_ATTR_MAX_STRLEN]; +} __packed; /* POSIX permissions */ struct hfsplus_perm { @@ -291,6 +301,8 @@ struct hfsplus_cat_file { /* File attribute bits */ #define HFSPLUS_FILE_LOCKED 0x0001 #define HFSPLUS_FILE_THREAD_EXISTS 0x0002 +#define HFSPLUS_XATTR_EXISTS 0x0004 +#define HFSPLUS_ACL_EXISTS 0x0008 /* HFS+ catalog thread (part of a cat_entry) */ struct hfsplus_cat_thread { @@ -327,11 +339,63 @@ struct hfsplus_ext_key { #define HFSPLUS_EXT_KEYLEN sizeof(struct hfsplus_ext_key) +#define HFSPLUS_XATTR_FINDER_INFO_NAME "com.apple.FinderInfo" +#define HFSPLUS_XATTR_ACL_NAME "com.apple.system.Security" + +#define HFSPLUS_ATTR_INLINE_DATA 0x10 +#define HFSPLUS_ATTR_FORK_DATA 0x20 +#define HFSPLUS_ATTR_EXTENTS 0x30 + +/* HFS+ attributes tree key */ +struct hfsplus_attr_key { + __be16 key_len; + __be16 pad; + hfsplus_cnid cnid; + __be32 start_block; + struct hfsplus_attr_unistr key_name; +} __packed; + +#define HFSPLUS_ATTR_KEYLEN sizeof(struct hfsplus_attr_key) + +/* HFS+ fork data attribute */ +struct hfsplus_attr_fork_data { + __be32 record_type; + __be32 reserved; + struct hfsplus_fork_raw the_fork; +} __packed; + +/* HFS+ extension attribute */ +struct hfsplus_attr_extents { + __be32 record_type; + __be32 reserved; + struct hfsplus_extent extents; +} __packed; + +#define HFSPLUS_MAX_INLINE_DATA_SIZE 3802 + +/* HFS+ attribute inline data */ +struct hfsplus_attr_inline_data { + __be32 record_type; + __be32 reserved1; + u8 reserved2[6]; + __be16 length; + u8 raw_bytes[HFSPLUS_MAX_INLINE_DATA_SIZE]; +} __packed; + +/* A data record in the attributes tree */ +typedef union { + __be32 record_type; + struct hfsplus_attr_fork_data fork_data; + struct hfsplus_attr_extents extents; + struct hfsplus_attr_inline_data inline_data; +} __packed hfsplus_attr_entry; + /* HFS+ generic BTree key */ typedef union { __be16 key_len; struct hfsplus_cat_key cat; struct hfsplus_ext_key ext; + struct hfsplus_attr_key attr; } __packed hfsplus_btree_key; #endif diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index dcd05be5344b..160ccc9cdb4b 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -17,6 +17,7 @@ #include "hfsplus_fs.h" #include "hfsplus_raw.h" +#include "xattr.h" static int hfsplus_readpage(struct file *file, struct page *page) { @@ -348,6 +349,18 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, error = error2; } + if (test_and_clear_bit(HFSPLUS_I_ATTR_DIRTY, &hip->flags)) { + if (sbi->attr_tree) { + error2 = + filemap_write_and_wait( + sbi->attr_tree->inode->i_mapping); + if (!error) + error = error2; + } else { + printk(KERN_ERR "hfs: sync non-existent attributes tree\n"); + } + } + if (test_and_clear_bit(HFSPLUS_I_ALLOC_DIRTY, &hip->flags)) { error2 = filemap_write_and_wait(sbi->alloc_file->i_mapping); if (!error) @@ -365,9 +378,10 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, static const struct inode_operations hfsplus_file_inode_operations = { .lookup = hfsplus_file_lookup, .setattr = hfsplus_setattr, - .setxattr = hfsplus_setxattr, - .getxattr = hfsplus_getxattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, .listxattr = hfsplus_listxattr, + .removexattr = hfsplus_removexattr, }; static const struct file_operations hfsplus_file_operations = { diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c index e3c4c4209428..d3ff5cc317d7 100644 --- a/fs/hfsplus/ioctl.c +++ b/fs/hfsplus/ioctl.c @@ -16,7 +16,6 @@ #include <linux/fs.h> #include <linux/mount.h> #include <linux/sched.h> -#include <linux/xattr.h> #include <asm/uaccess.h> #include "hfsplus_fs.h" @@ -151,110 +150,3 @@ long hfsplus_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return -ENOTTY; } } - -int hfsplus_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) -{ - struct inode *inode = dentry->d_inode; - struct hfs_find_data fd; - hfsplus_cat_entry entry; - struct hfsplus_cat_file *file; - int res; - - if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode)) - return -EOPNOTSUPP; - - res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); - if (res) - return res; - res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); - if (res) - goto out; - hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, - sizeof(struct hfsplus_cat_file)); - file = &entry.file; - - if (!strcmp(name, "hfs.type")) { - if (size == 4) - memcpy(&file->user_info.fdType, value, 4); - else - res = -ERANGE; - } else if (!strcmp(name, "hfs.creator")) { - if (size == 4) - memcpy(&file->user_info.fdCreator, value, 4); - else - res = -ERANGE; - } else - res = -EOPNOTSUPP; - if (!res) { - hfs_bnode_write(fd.bnode, &entry, fd.entryoffset, - sizeof(struct hfsplus_cat_file)); - hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY); - } -out: - hfs_find_exit(&fd); - return res; -} - -ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, - void *value, size_t size) -{ - struct inode *inode = dentry->d_inode; - struct hfs_find_data fd; - hfsplus_cat_entry entry; - struct hfsplus_cat_file *file; - ssize_t res = 0; - - if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode)) - return -EOPNOTSUPP; - - if (size) { - res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); - if (res) - return res; - res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); - if (res) - goto out; - hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, - sizeof(struct hfsplus_cat_file)); - } - file = &entry.file; - - if (!strcmp(name, "hfs.type")) { - if (size >= 4) { - memcpy(value, &file->user_info.fdType, 4); - res = 4; - } else - res = size ? -ERANGE : 4; - } else if (!strcmp(name, "hfs.creator")) { - if (size >= 4) { - memcpy(value, &file->user_info.fdCreator, 4); - res = 4; - } else - res = size ? -ERANGE : 4; - } else - res = -EOPNOTSUPP; -out: - if (size) - hfs_find_exit(&fd); - return res; -} - -#define HFSPLUS_ATTRLIST_SIZE (sizeof("hfs.creator")+sizeof("hfs.type")) - -ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size) -{ - struct inode *inode = dentry->d_inode; - - if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode)) - return -EOPNOTSUPP; - - if (!buffer || !size) - return HFSPLUS_ATTRLIST_SIZE; - if (size < HFSPLUS_ATTRLIST_SIZE) - return -ERANGE; - strcpy(buffer, "hfs.type"); - strcpy(buffer + sizeof("hfs.type"), "hfs.creator"); - - return HFSPLUS_ATTRLIST_SIZE; -} diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 796198d26553..974c26f96fae 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -20,6 +20,7 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb); static void hfsplus_destroy_inode(struct inode *inode); #include "hfsplus_fs.h" +#include "xattr.h" static int hfsplus_system_read_inode(struct inode *inode) { @@ -118,6 +119,7 @@ static int hfsplus_system_write_inode(struct inode *inode) case HFSPLUS_ATTR_CNID: fork = &vhdr->attr_file; tree = sbi->attr_tree; + break; default: return -EIO; } @@ -191,6 +193,12 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait) error2 = filemap_write_and_wait(sbi->ext_tree->inode->i_mapping); if (!error) error = error2; + if (sbi->attr_tree) { + error2 = + filemap_write_and_wait(sbi->attr_tree->inode->i_mapping); + if (!error) + error = error2; + } error2 = filemap_write_and_wait(sbi->alloc_file->i_mapping); if (!error) error = error2; @@ -281,6 +289,7 @@ static void hfsplus_put_super(struct super_block *sb) hfsplus_sync_fs(sb, 1); } + hfs_btree_close(sbi->attr_tree); hfs_btree_close(sbi->cat_tree); hfs_btree_close(sbi->ext_tree); iput(sbi->alloc_file); @@ -477,12 +486,20 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) printk(KERN_ERR "hfs: failed to load catalog file\n"); goto out_close_ext_tree; } + if (vhdr->attr_file.total_blocks != 0) { + sbi->attr_tree = hfs_btree_open(sb, HFSPLUS_ATTR_CNID); + if (!sbi->attr_tree) { + printk(KERN_ERR "hfs: failed to load attributes file\n"); + goto out_close_cat_tree; + } + } + sb->s_xattr = hfsplus_xattr_handlers; inode = hfsplus_iget(sb, HFSPLUS_ALLOC_CNID); if (IS_ERR(inode)) { printk(KERN_ERR "hfs: failed to load allocation file\n"); err = PTR_ERR(inode); - goto out_close_cat_tree; + goto out_close_attr_tree; } sbi->alloc_file = inode; @@ -542,10 +559,27 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) } err = hfsplus_create_cat(sbi->hidden_dir->i_ino, root, &str, sbi->hidden_dir); - mutex_unlock(&sbi->vh_mutex); - if (err) + if (err) { + mutex_unlock(&sbi->vh_mutex); + goto out_put_hidden_dir; + } + + err = hfsplus_init_inode_security(sbi->hidden_dir, + root, &str); + if (err == -EOPNOTSUPP) + err = 0; /* Operation is not supported. */ + else if (err) { + /* + * Try to delete anyway without + * error analysis. + */ + hfsplus_delete_cat(sbi->hidden_dir->i_ino, + root, &str); + mutex_unlock(&sbi->vh_mutex); goto out_put_hidden_dir; + } + mutex_unlock(&sbi->vh_mutex); hfsplus_mark_inode_dirty(sbi->hidden_dir, HFSPLUS_I_CAT_DIRTY); } @@ -562,6 +596,8 @@ out_put_root: sb->s_root = NULL; out_put_alloc_file: iput(sbi->alloc_file); +out_close_attr_tree: + hfs_btree_close(sbi->attr_tree); out_close_cat_tree: hfs_btree_close(sbi->cat_tree); out_close_ext_tree: @@ -635,9 +671,20 @@ static int __init init_hfsplus_fs(void) hfsplus_init_once); if (!hfsplus_inode_cachep) return -ENOMEM; + err = hfsplus_create_attr_tree_cache(); + if (err) + goto destroy_inode_cache; err = register_filesystem(&hfsplus_fs_type); if (err) - kmem_cache_destroy(hfsplus_inode_cachep); + goto destroy_attr_tree_cache; + return 0; + +destroy_attr_tree_cache: + hfsplus_destroy_attr_tree_cache(); + +destroy_inode_cache: + kmem_cache_destroy(hfsplus_inode_cachep); + return err; } @@ -650,6 +697,7 @@ static void __exit exit_hfsplus_fs(void) * destroy cache. */ rcu_barrier(); + hfsplus_destroy_attr_tree_cache(); kmem_cache_destroy(hfsplus_inode_cachep); } diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c index a32998f29f0b..2c2e47dcfdd8 100644 --- a/fs/hfsplus/unicode.c +++ b/fs/hfsplus/unicode.c @@ -295,7 +295,8 @@ static inline u16 *decompose_unichar(wchar_t uc, int *size) return hfsplus_decompose_table + (off / 4); } -int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, +int hfsplus_asc2uni(struct super_block *sb, + struct hfsplus_unistr *ustr, int max_unistr_len, const char *astr, int len) { int size, dsize, decompose; @@ -303,7 +304,7 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, wchar_t c; decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); - while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { + while (outlen < max_unistr_len && len > 0) { size = asc2unichar(sb, astr, len, &c); if (decompose) @@ -311,7 +312,7 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, else dstr = NULL; if (dstr) { - if (outlen + dsize > HFSPLUS_MAX_STRLEN) + if (outlen + dsize > max_unistr_len) break; do { ustr->unicode[outlen++] = cpu_to_be16(*dstr++); diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c new file mode 100644 index 000000000000..e8a4b0815c61 --- /dev/null +++ b/fs/hfsplus/xattr.c @@ -0,0 +1,709 @@ +/* + * linux/fs/hfsplus/xattr.c + * + * Vyacheslav Dubeyko <slava@dubeyko.com> + * + * Logic of processing extended attributes + */ + +#include "hfsplus_fs.h" +#include "xattr.h" + +const struct xattr_handler *hfsplus_xattr_handlers[] = { + &hfsplus_xattr_osx_handler, + &hfsplus_xattr_user_handler, + &hfsplus_xattr_trusted_handler, + &hfsplus_xattr_security_handler, + NULL +}; + +static int strcmp_xattr_finder_info(const char *name) +{ + if (name) { + return strncmp(name, HFSPLUS_XATTR_FINDER_INFO_NAME, + sizeof(HFSPLUS_XATTR_FINDER_INFO_NAME)); + } + return -1; +} + +static int strcmp_xattr_acl(const char *name) +{ + if (name) { + return strncmp(name, HFSPLUS_XATTR_ACL_NAME, + sizeof(HFSPLUS_XATTR_ACL_NAME)); + } + return -1; +} + +static inline int is_known_namespace(const char *name) +{ + if (strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) && + strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && + strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) && + strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) + return false; + + return true; +} + +static int can_set_xattr(struct inode *inode, const char *name, + const void *value, size_t value_len) +{ + if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) + return -EOPNOTSUPP; /* TODO: implement ACL support */ + + if (!strncmp(name, XATTR_MAC_OSX_PREFIX, XATTR_MAC_OSX_PREFIX_LEN)) { + /* + * This makes sure that we aren't trying to set an + * attribute in a different namespace by prefixing it + * with "osx." + */ + if (is_known_namespace(name + XATTR_MAC_OSX_PREFIX_LEN)) + return -EOPNOTSUPP; + + return 0; + } + + /* + * Don't allow setting an attribute in an unknown namespace. + */ + if (strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) && + strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) && + strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) + return -EOPNOTSUPP; + + return 0; +} + +int __hfsplus_setxattr(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + int err = 0; + struct hfs_find_data cat_fd; + hfsplus_cat_entry entry; + u16 cat_entry_flags, cat_entry_type; + u16 folder_finderinfo_len = sizeof(struct DInfo) + + sizeof(struct DXInfo); + u16 file_finderinfo_len = sizeof(struct FInfo) + + sizeof(struct FXInfo); + + if ((!S_ISREG(inode->i_mode) && + !S_ISDIR(inode->i_mode)) || + HFSPLUS_IS_RSRC(inode)) + return -EOPNOTSUPP; + + err = can_set_xattr(inode, name, value, size); + if (err) + return err; + + if (strncmp(name, XATTR_MAC_OSX_PREFIX, + XATTR_MAC_OSX_PREFIX_LEN) == 0) + name += XATTR_MAC_OSX_PREFIX_LEN; + + if (value == NULL) { + value = ""; + size = 0; + } + + err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &cat_fd); + if (err) { + printk(KERN_ERR "hfs: can't init xattr find struct\n"); + return err; + } + + err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &cat_fd); + if (err) { + printk(KERN_ERR "hfs: catalog searching failed\n"); + goto end_setxattr; + } + + if (!strcmp_xattr_finder_info(name)) { + if (flags & XATTR_CREATE) { + printk(KERN_ERR "hfs: xattr exists yet\n"); + err = -EOPNOTSUPP; + goto end_setxattr; + } + hfs_bnode_read(cat_fd.bnode, &entry, cat_fd.entryoffset, + sizeof(hfsplus_cat_entry)); + if (be16_to_cpu(entry.type) == HFSPLUS_FOLDER) { + if (size == folder_finderinfo_len) { + memcpy(&entry.folder.user_info, value, + folder_finderinfo_len); + hfs_bnode_write(cat_fd.bnode, &entry, + cat_fd.entryoffset, + sizeof(struct hfsplus_cat_folder)); + hfsplus_mark_inode_dirty(inode, + HFSPLUS_I_CAT_DIRTY); + } else { + err = -ERANGE; + goto end_setxattr; + } + } else if (be16_to_cpu(entry.type) == HFSPLUS_FILE) { + if (size == file_finderinfo_len) { + memcpy(&entry.file.user_info, value, + file_finderinfo_len); + hfs_bnode_write(cat_fd.bnode, &entry, + cat_fd.entryoffset, + sizeof(struct hfsplus_cat_file)); + hfsplus_mark_inode_dirty(inode, + HFSPLUS_I_CAT_DIRTY); + } else { + err = -ERANGE; + goto end_setxattr; + } + } else { + err = -EOPNOTSUPP; + goto end_setxattr; + } + goto end_setxattr; + } + + if (!HFSPLUS_SB(inode->i_sb)->attr_tree) { + err = -EOPNOTSUPP; + goto end_setxattr; + } + + if (hfsplus_attr_exists(inode, name)) { + if (flags & XATTR_CREATE) { + printk(KERN_ERR "hfs: xattr exists yet\n"); + err = -EOPNOTSUPP; + goto end_setxattr; + } + err = hfsplus_delete_attr(inode, name); + if (err) + goto end_setxattr; + err = hfsplus_create_attr(inode, name, value, size); + if (err) + goto end_setxattr; + } else { + if (flags & XATTR_REPLACE) { + printk(KERN_ERR "hfs: cannot replace xattr\n"); + err = -EOPNOTSUPP; + goto end_setxattr; + } + err = hfsplus_create_attr(inode, name, value, size); + if (err) + goto end_setxattr; + } + + cat_entry_type = hfs_bnode_read_u16(cat_fd.bnode, cat_fd.entryoffset); + if (cat_entry_type == HFSPLUS_FOLDER) { + cat_entry_flags = hfs_bnode_read_u16(cat_fd.bnode, + cat_fd.entryoffset + + offsetof(struct hfsplus_cat_folder, flags)); + cat_entry_flags |= HFSPLUS_XATTR_EXISTS; + if (!strcmp_xattr_acl(name)) + cat_entry_flags |= HFSPLUS_ACL_EXISTS; + hfs_bnode_write_u16(cat_fd.bnode, cat_fd.entryoffset + + offsetof(struct hfsplus_cat_folder, flags), + cat_entry_flags); + hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY); + } else if (cat_entry_type == HFSPLUS_FILE) { + cat_entry_flags = hfs_bnode_read_u16(cat_fd.bnode, + cat_fd.entryoffset + + offsetof(struct hfsplus_cat_file, flags)); + cat_entry_flags |= HFSPLUS_XATTR_EXISTS; + if (!strcmp_xattr_acl(name)) + cat_entry_flags |= HFSPLUS_ACL_EXISTS; + hfs_bnode_write_u16(cat_fd.bnode, cat_fd.entryoffset + + offsetof(struct hfsplus_cat_file, flags), + cat_entry_flags); + hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY); + } else { + printk(KERN_ERR "hfs: invalid catalog entry type\n"); + err = -EIO; + goto end_setxattr; + } + +end_setxattr: + hfs_find_exit(&cat_fd); + return err; +} + +static inline int is_osx_xattr(const char *xattr_name) +{ + return !is_known_namespace(xattr_name); +} + +static int name_len(const char *xattr_name, int xattr_name_len) +{ + int len = xattr_name_len + 1; + + if (is_osx_xattr(xattr_name)) + len += XATTR_MAC_OSX_PREFIX_LEN; + + return len; +} + +static int copy_name(char *buffer, const char *xattr_name, int name_len) +{ + int len = name_len; + int offset = 0; + + if (is_osx_xattr(xattr_name)) { + strncpy(buffer, XATTR_MAC_OSX_PREFIX, XATTR_MAC_OSX_PREFIX_LEN); + offset += XATTR_MAC_OSX_PREFIX_LEN; + len += XATTR_MAC_OSX_PREFIX_LEN; + } + + strncpy(buffer + offset, xattr_name, name_len); + memset(buffer + offset + name_len, 0, 1); + len += 1; + + return len; +} + +static ssize_t hfsplus_getxattr_finder_info(struct dentry *dentry, + void *value, size_t size) +{ + ssize_t res = 0; + struct inode *inode = dentry->d_inode; + struct hfs_find_data fd; + u16 entry_type; + u16 folder_rec_len = sizeof(struct DInfo) + sizeof(struct DXInfo); + u16 file_rec_len = sizeof(struct FInfo) + sizeof(struct FXInfo); + u16 record_len = max(folder_rec_len, file_rec_len); + u8 folder_finder_info[sizeof(struct DInfo) + sizeof(struct DXInfo)]; + u8 file_finder_info[sizeof(struct FInfo) + sizeof(struct FXInfo)]; + + if (size >= record_len) { + res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); + if (res) { + printk(KERN_ERR "hfs: can't init xattr find struct\n"); + return res; + } + res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); + if (res) + goto end_getxattr_finder_info; + entry_type = hfs_bnode_read_u16(fd.bnode, fd.entryoffset); + + if (entry_type == HFSPLUS_FOLDER) { + hfs_bnode_read(fd.bnode, folder_finder_info, + fd.entryoffset + + offsetof(struct hfsplus_cat_folder, user_info), + folder_rec_len); + memcpy(value, folder_finder_info, folder_rec_len); + res = folder_rec_len; + } else if (entry_type == HFSPLUS_FILE) { + hfs_bnode_read(fd.bnode, file_finder_info, + fd.entryoffset + + offsetof(struct hfsplus_cat_file, user_info), + file_rec_len); + memcpy(value, file_finder_info, file_rec_len); + res = file_rec_len; + } else { + res = -EOPNOTSUPP; + goto end_getxattr_finder_info; + } + } else + res = size ? -ERANGE : record_len; + +end_getxattr_finder_info: + if (size >= record_len) + hfs_find_exit(&fd); + return res; +} + +ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, + void *value, size_t size) +{ + struct inode *inode = dentry->d_inode; + struct hfs_find_data fd; + hfsplus_attr_entry *entry; + __be32 xattr_record_type; + u32 record_type; + u16 record_length = 0; + ssize_t res = 0; + + if ((!S_ISREG(inode->i_mode) && + !S_ISDIR(inode->i_mode)) || + HFSPLUS_IS_RSRC(inode)) + return -EOPNOTSUPP; + + if (strncmp(name, XATTR_MAC_OSX_PREFIX, + XATTR_MAC_OSX_PREFIX_LEN) == 0) { + /* skip "osx." prefix */ + name += XATTR_MAC_OSX_PREFIX_LEN; + /* + * Don't allow retrieving properly prefixed attributes + * by prepending them with "osx." + */ + if (is_known_namespace(name)) + return -EOPNOTSUPP; + } + + if (!strcmp_xattr_finder_info(name)) + return hfsplus_getxattr_finder_info(dentry, value, size); + + if (!HFSPLUS_SB(inode->i_sb)->attr_tree) + return -EOPNOTSUPP; + + entry = hfsplus_alloc_attr_entry(); + if (!entry) { + printk(KERN_ERR "hfs: can't allocate xattr entry\n"); + return -ENOMEM; + } + + res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->attr_tree, &fd); + if (res) { + printk(KERN_ERR "hfs: can't init xattr find struct\n"); + goto failed_getxattr_init; + } + + res = hfsplus_find_attr(inode->i_sb, inode->i_ino, name, &fd); + if (res) { + if (res == -ENOENT) + res = -ENODATA; + else + printk(KERN_ERR "hfs: xattr searching failed\n"); + goto out; + } + + hfs_bnode_read(fd.bnode, &xattr_record_type, + fd.entryoffset, sizeof(xattr_record_type)); + record_type = be32_to_cpu(xattr_record_type); + if (record_type == HFSPLUS_ATTR_INLINE_DATA) { + record_length = hfs_bnode_read_u16(fd.bnode, + fd.entryoffset + + offsetof(struct hfsplus_attr_inline_data, + length)); + if (record_length > HFSPLUS_MAX_INLINE_DATA_SIZE) { + printk(KERN_ERR "hfs: invalid xattr record size\n"); + res = -EIO; + goto out; + } + } else if (record_type == HFSPLUS_ATTR_FORK_DATA || + record_type == HFSPLUS_ATTR_EXTENTS) { + printk(KERN_ERR "hfs: only inline data xattr are supported\n"); + res = -EOPNOTSUPP; + goto out; + } else { + printk(KERN_ERR "hfs: invalid xattr record\n"); + res = -EIO; + goto out; + } + + if (size) { + hfs_bnode_read(fd.bnode, entry, fd.entryoffset, + offsetof(struct hfsplus_attr_inline_data, + raw_bytes) + record_length); + } + + if (size >= record_length) { + memcpy(value, entry->inline_data.raw_bytes, record_length); + res = record_length; + } else + res = size ? -ERANGE : record_length; + +out: + hfs_find_exit(&fd); + +failed_getxattr_init: + hfsplus_destroy_attr_entry(entry); + return res; +} + +static inline int can_list(const char *xattr_name) +{ + if (!xattr_name) + return 0; + + return strncmp(xattr_name, XATTR_TRUSTED_PREFIX, + XATTR_TRUSTED_PREFIX_LEN) || + capable(CAP_SYS_ADMIN); +} + +static ssize_t hfsplus_listxattr_finder_info(struct dentry *dentry, + char *buffer, size_t size) +{ + ssize_t res = 0; + struct inode *inode = dentry->d_inode; + struct hfs_find_data fd; + u16 entry_type; + u8 folder_finder_info[sizeof(struct DInfo) + sizeof(struct DXInfo)]; + u8 file_finder_info[sizeof(struct FInfo) + sizeof(struct FXInfo)]; + unsigned long len, found_bit; + int xattr_name_len, symbols_count; + + res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); + if (res) { + printk(KERN_ERR "hfs: can't init xattr find struct\n"); + return res; + } + + res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); + if (res) + goto end_listxattr_finder_info; + + entry_type = hfs_bnode_read_u16(fd.bnode, fd.entryoffset); + if (entry_type == HFSPLUS_FOLDER) { + len = sizeof(struct DInfo) + sizeof(struct DXInfo); + hfs_bnode_read(fd.bnode, folder_finder_info, + fd.entryoffset + + offsetof(struct hfsplus_cat_folder, user_info), + len); + found_bit = find_first_bit((void *)folder_finder_info, len*8); + } else if (entry_type == HFSPLUS_FILE) { + len = sizeof(struct FInfo) + sizeof(struct FXInfo); + hfs_bnode_read(fd.bnode, file_finder_info, + fd.entryoffset + + offsetof(struct hfsplus_cat_file, user_info), + len); + found_bit = find_first_bit((void *)file_finder_info, len*8); + } else { + res = -EOPNOTSUPP; + goto end_listxattr_finder_info; + } + + if (found_bit >= (len*8)) + res = 0; + else { + symbols_count = sizeof(HFSPLUS_XATTR_FINDER_INFO_NAME) - 1; + xattr_name_len = + name_len(HFSPLUS_XATTR_FINDER_INFO_NAME, symbols_count); + if (!buffer || !size) { + if (can_list(HFSPLUS_XATTR_FINDER_INFO_NAME)) + res = xattr_name_len; + } else if (can_list(HFSPLUS_XATTR_FINDER_INFO_NAME)) { + if (size < xattr_name_len) + res = -ERANGE; + else { + res = copy_name(buffer, + HFSPLUS_XATTR_FINDER_INFO_NAME, + symbols_count); + } + } + } + +end_listxattr_finder_info: + hfs_find_exit(&fd); + + return res; +} + +ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size) +{ + ssize_t err; + ssize_t res = 0; + struct inode *inode = dentry->d_inode; + struct hfs_find_data fd; + u16 key_len = 0; + struct hfsplus_attr_key attr_key; + char strbuf[HFSPLUS_ATTR_MAX_STRLEN + + XATTR_MAC_OSX_PREFIX_LEN + 1] = {0}; + int xattr_name_len; + + if ((!S_ISREG(inode->i_mode) && + !S_ISDIR(inode->i_mode)) || + HFSPLUS_IS_RSRC(inode)) + return -EOPNOTSUPP; + + res = hfsplus_listxattr_finder_info(dentry, buffer, size); + if (res < 0) + return res; + else if (!HFSPLUS_SB(inode->i_sb)->attr_tree) + return (res == 0) ? -EOPNOTSUPP : res; + + err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->attr_tree, &fd); + if (err) { + printk(KERN_ERR "hfs: can't init xattr find struct\n"); + return err; + } + + err = hfsplus_find_attr(inode->i_sb, inode->i_ino, NULL, &fd); + if (err) { + if (err == -ENOENT) { + if (res == 0) + res = -ENODATA; + goto end_listxattr; + } else { + res = err; + goto end_listxattr; + } + } + + for (;;) { + key_len = hfs_bnode_read_u16(fd.bnode, fd.keyoffset); + if (key_len == 0 || key_len > fd.tree->max_key_len) { + printk(KERN_ERR "hfs: invalid xattr key length: %d\n", + key_len); + res = -EIO; + goto end_listxattr; + } + + hfs_bnode_read(fd.bnode, &attr_key, + fd.keyoffset, key_len + sizeof(key_len)); + + if (be32_to_cpu(attr_key.cnid) != inode->i_ino) + goto end_listxattr; + + xattr_name_len = HFSPLUS_ATTR_MAX_STRLEN; + if (hfsplus_uni2asc(inode->i_sb, + (const struct hfsplus_unistr *)&fd.key->attr.key_name, + strbuf, &xattr_name_len)) { + printk(KERN_ERR "hfs: unicode conversion failed\n"); + res = -EIO; + goto end_listxattr; + } + + if (!buffer || !size) { + if (can_list(strbuf)) + res += name_len(strbuf, xattr_name_len); + } else if (can_list(strbuf)) { + if (size < (res + name_len(strbuf, xattr_name_len))) { + res = -ERANGE; + goto end_listxattr; + } else + res += copy_name(buffer + res, + strbuf, xattr_name_len); + } + + if (hfs_brec_goto(&fd, 1)) + goto end_listxattr; + } + +end_listxattr: + hfs_find_exit(&fd); + return res; +} + +int hfsplus_removexattr(struct dentry *dentry, const char *name) +{ + int err = 0; + struct inode *inode = dentry->d_inode; + struct hfs_find_data cat_fd; + u16 flags; + u16 cat_entry_type; + int is_xattr_acl_deleted = 0; + int is_all_xattrs_deleted = 0; + + if ((!S_ISREG(inode->i_mode) && + !S_ISDIR(inode->i_mode)) || + HFSPLUS_IS_RSRC(inode)) + return -EOPNOTSUPP; + + if (!HFSPLUS_SB(inode->i_sb)->attr_tree) + return -EOPNOTSUPP; + + err = can_set_xattr(inode, name, NULL, 0); + if (err) + return err; + + if (strncmp(name, XATTR_MAC_OSX_PREFIX, + XATTR_MAC_OSX_PREFIX_LEN) == 0) + name += XATTR_MAC_OSX_PREFIX_LEN; + + if (!strcmp_xattr_finder_info(name)) + return -EOPNOTSUPP; + + err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &cat_fd); + if (err) { + printk(KERN_ERR "hfs: can't init xattr find struct\n"); + return err; + } + + err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &cat_fd); + if (err) { + printk(KERN_ERR "hfs: catalog searching failed\n"); + goto end_removexattr; + } + + err = hfsplus_delete_attr(inode, name); + if (err) + goto end_removexattr; + + is_xattr_acl_deleted = !strcmp_xattr_acl(name); + is_all_xattrs_deleted = !hfsplus_attr_exists(inode, NULL); + + if (!is_xattr_acl_deleted && !is_all_xattrs_deleted) + goto end_removexattr; + + cat_entry_type = hfs_bnode_read_u16(cat_fd.bnode, cat_fd.entryoffset); + + if (cat_entry_type == HFSPLUS_FOLDER) { + flags = hfs_bnode_read_u16(cat_fd.bnode, cat_fd.entryoffset + + offsetof(struct hfsplus_cat_folder, flags)); + if (is_xattr_acl_deleted) + flags &= ~HFSPLUS_ACL_EXISTS; + if (is_all_xattrs_deleted) + flags &= ~HFSPLUS_XATTR_EXISTS; + hfs_bnode_write_u16(cat_fd.bnode, cat_fd.entryoffset + + offsetof(struct hfsplus_cat_folder, flags), + flags); + hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY); + } else if (cat_entry_type == HFSPLUS_FILE) { + flags = hfs_bnode_read_u16(cat_fd.bnode, cat_fd.entryoffset + + offsetof(struct hfsplus_cat_file, flags)); + if (is_xattr_acl_deleted) + flags &= ~HFSPLUS_ACL_EXISTS; + if (is_all_xattrs_deleted) + flags &= ~HFSPLUS_XATTR_EXISTS; + hfs_bnode_write_u16(cat_fd.bnode, cat_fd.entryoffset + + offsetof(struct hfsplus_cat_file, flags), + flags); + hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY); + } else { + printk(KERN_ERR "hfs: invalid catalog entry type\n"); + err = -EIO; + goto end_removexattr; + } + +end_removexattr: + hfs_find_exit(&cat_fd); + return err; +} + +static int hfsplus_osx_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + + XATTR_MAC_OSX_PREFIX_LEN + 1] = {0}; + size_t len = strlen(name); + + if (!strcmp(name, "")) + return -EINVAL; + + if (len > HFSPLUS_ATTR_MAX_STRLEN) + return -EOPNOTSUPP; + + strcpy(xattr_name, XATTR_MAC_OSX_PREFIX); + strcpy(xattr_name + XATTR_MAC_OSX_PREFIX_LEN, name); + + return hfsplus_getxattr(dentry, xattr_name, buffer, size); +} + +static int hfsplus_osx_setxattr(struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags, int type) +{ + char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + + XATTR_MAC_OSX_PREFIX_LEN + 1] = {0}; + size_t len = strlen(name); + + if (!strcmp(name, "")) + return -EINVAL; + + if (len > HFSPLUS_ATTR_MAX_STRLEN) + return -EOPNOTSUPP; + + strcpy(xattr_name, XATTR_MAC_OSX_PREFIX); + strcpy(xattr_name + XATTR_MAC_OSX_PREFIX_LEN, name); + + return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags); +} + +static size_t hfsplus_osx_listxattr(struct dentry *dentry, char *list, + size_t list_size, const char *name, size_t name_len, int type) +{ + /* + * This method is not used. + * It is used hfsplus_listxattr() instead of generic_listxattr(). + */ + return -EOPNOTSUPP; +} + +const struct xattr_handler hfsplus_xattr_osx_handler = { + .prefix = XATTR_MAC_OSX_PREFIX, + .list = hfsplus_osx_listxattr, + .get = hfsplus_osx_getxattr, + .set = hfsplus_osx_setxattr, +}; diff --git a/fs/hfsplus/xattr.h b/fs/hfsplus/xattr.h new file mode 100644 index 000000000000..847b695b984d --- /dev/null +++ b/fs/hfsplus/xattr.h @@ -0,0 +1,60 @@ +/* + * linux/fs/hfsplus/xattr.h + * + * Vyacheslav Dubeyko <slava@dubeyko.com> + * + * Logic of processing extended attributes + */ + +#ifndef _LINUX_HFSPLUS_XATTR_H +#define _LINUX_HFSPLUS_XATTR_H + +#include <linux/xattr.h> + +extern const struct xattr_handler hfsplus_xattr_osx_handler; +extern const struct xattr_handler hfsplus_xattr_user_handler; +extern const struct xattr_handler hfsplus_xattr_trusted_handler; +/*extern const struct xattr_handler hfsplus_xattr_acl_access_handler;*/ +/*extern const struct xattr_handler hfsplus_xattr_acl_default_handler;*/ +extern const struct xattr_handler hfsplus_xattr_security_handler; + +extern const struct xattr_handler *hfsplus_xattr_handlers[]; + +int __hfsplus_setxattr(struct inode *inode, const char *name, + const void *value, size_t size, int flags); + +static inline int hfsplus_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + return __hfsplus_setxattr(dentry->d_inode, name, value, size, flags); +} + +ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, + void *value, size_t size); + +ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size); + +int hfsplus_removexattr(struct dentry *dentry, const char *name); + +int hfsplus_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr); + +static inline int hfsplus_init_acl(struct inode *inode, struct inode *dir) +{ + /*TODO: implement*/ + return 0; +} + +static inline int hfsplus_init_inode_security(struct inode *inode, + struct inode *dir, + const struct qstr *qstr) +{ + int err; + + err = hfsplus_init_acl(inode, dir); + if (!err) + err = hfsplus_init_security(inode, dir, qstr); + return err; +} + +#endif diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c new file mode 100644 index 000000000000..83b842f113c5 --- /dev/null +++ b/fs/hfsplus/xattr_security.c @@ -0,0 +1,104 @@ +/* + * linux/fs/hfsplus/xattr_trusted.c + * + * Vyacheslav Dubeyko <slava@dubeyko.com> + * + * Handler for storing security labels as extended attributes. + */ + +#include <linux/security.h> +#include "hfsplus_fs.h" +#include "xattr.h" + +static int hfsplus_security_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0}; + size_t len = strlen(name); + + if (!strcmp(name, "")) + return -EINVAL; + + if (len + XATTR_SECURITY_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN) + return -EOPNOTSUPP; + + strcpy(xattr_name, XATTR_SECURITY_PREFIX); + strcpy(xattr_name + XATTR_SECURITY_PREFIX_LEN, name); + + return hfsplus_getxattr(dentry, xattr_name, buffer, size); +} + +static int hfsplus_security_setxattr(struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags, int type) +{ + char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0}; + size_t len = strlen(name); + + if (!strcmp(name, "")) + return -EINVAL; + + if (len + XATTR_SECURITY_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN) + return -EOPNOTSUPP; + + strcpy(xattr_name, XATTR_SECURITY_PREFIX); + strcpy(xattr_name + XATTR_SECURITY_PREFIX_LEN, name); + + return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags); +} + +static size_t hfsplus_security_listxattr(struct dentry *dentry, char *list, + size_t list_size, const char *name, size_t name_len, int type) +{ + /* + * This method is not used. + * It is used hfsplus_listxattr() instead of generic_listxattr(). + */ + return -EOPNOTSUPP; +} + +static int hfsplus_initxattrs(struct inode *inode, + const struct xattr *xattr_array, + void *fs_info) +{ + const struct xattr *xattr; + char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0}; + size_t xattr_name_len; + int err = 0; + + for (xattr = xattr_array; xattr->name != NULL; xattr++) { + xattr_name_len = strlen(xattr->name); + + if (xattr_name_len == 0) + continue; + + if (xattr_name_len + XATTR_SECURITY_PREFIX_LEN > + HFSPLUS_ATTR_MAX_STRLEN) + return -EOPNOTSUPP; + + strcpy(xattr_name, XATTR_SECURITY_PREFIX); + strcpy(xattr_name + + XATTR_SECURITY_PREFIX_LEN, xattr->name); + memset(xattr_name + + XATTR_SECURITY_PREFIX_LEN + xattr_name_len, 0, 1); + + err = __hfsplus_setxattr(inode, xattr_name, + xattr->value, xattr->value_len, 0); + if (err) + break; + } + return err; +} + +int hfsplus_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr) +{ + return security_inode_init_security(inode, dir, qstr, + &hfsplus_initxattrs, NULL); +} + +const struct xattr_handler hfsplus_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .list = hfsplus_security_listxattr, + .get = hfsplus_security_getxattr, + .set = hfsplus_security_setxattr, +}; diff --git a/fs/hfsplus/xattr_trusted.c b/fs/hfsplus/xattr_trusted.c new file mode 100644 index 000000000000..426cee277542 --- /dev/null +++ b/fs/hfsplus/xattr_trusted.c @@ -0,0 +1,63 @@ +/* + * linux/fs/hfsplus/xattr_trusted.c + * + * Vyacheslav Dubeyko <slava@dubeyko.com> + * + * Handler for trusted extended attributes. + */ + +#include "hfsplus_fs.h" +#include "xattr.h" + +static int hfsplus_trusted_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0}; + size_t len = strlen(name); + + if (!strcmp(name, "")) + return -EINVAL; + + if (len + XATTR_TRUSTED_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN) + return -EOPNOTSUPP; + + strcpy(xattr_name, XATTR_TRUSTED_PREFIX); + strcpy(xattr_name + XATTR_TRUSTED_PREFIX_LEN, name); + + return hfsplus_getxattr(dentry, xattr_name, buffer, size); +} + +static int hfsplus_trusted_setxattr(struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags, int type) +{ + char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0}; + size_t len = strlen(name); + + if (!strcmp(name, "")) + return -EINVAL; + + if (len + XATTR_TRUSTED_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN) + return -EOPNOTSUPP; + + strcpy(xattr_name, XATTR_TRUSTED_PREFIX); + strcpy(xattr_name + XATTR_TRUSTED_PREFIX_LEN, name); + + return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags); +} + +static size_t hfsplus_trusted_listxattr(struct dentry *dentry, char *list, + size_t list_size, const char *name, size_t name_len, int type) +{ + /* + * This method is not used. + * It is used hfsplus_listxattr() instead of generic_listxattr(). + */ + return -EOPNOTSUPP; +} + +const struct xattr_handler hfsplus_xattr_trusted_handler = { + .prefix = XATTR_TRUSTED_PREFIX, + .list = hfsplus_trusted_listxattr, + .get = hfsplus_trusted_getxattr, + .set = hfsplus_trusted_setxattr, +}; diff --git a/fs/hfsplus/xattr_user.c b/fs/hfsplus/xattr_user.c new file mode 100644 index 000000000000..e34016561ae0 --- /dev/null +++ b/fs/hfsplus/xattr_user.c @@ -0,0 +1,63 @@ +/* + * linux/fs/hfsplus/xattr_user.c + * + * Vyacheslav Dubeyko <slava@dubeyko.com> + * + * Handler for user extended attributes. + */ + +#include "hfsplus_fs.h" +#include "xattr.h" + +static int hfsplus_user_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0}; + size_t len = strlen(name); + + if (!strcmp(name, "")) + return -EINVAL; + + if (len + XATTR_USER_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN) + return -EOPNOTSUPP; + + strcpy(xattr_name, XATTR_USER_PREFIX); + strcpy(xattr_name + XATTR_USER_PREFIX_LEN, name); + + return hfsplus_getxattr(dentry, xattr_name, buffer, size); +} + +static int hfsplus_user_setxattr(struct dentry *dentry, const char *name, + const void *buffer, size_t size, int flags, int type) +{ + char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0}; + size_t len = strlen(name); + + if (!strcmp(name, "")) + return -EINVAL; + + if (len + XATTR_USER_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN) + return -EOPNOTSUPP; + + strcpy(xattr_name, XATTR_USER_PREFIX); + strcpy(xattr_name + XATTR_USER_PREFIX_LEN, name); + + return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags); +} + +static size_t hfsplus_user_listxattr(struct dentry *dentry, char *list, + size_t list_size, const char *name, size_t name_len, int type) +{ + /* + * This method is not used. + * It is used hfsplus_listxattr() instead of generic_listxattr(). + */ + return -EOPNOTSUPP; +} + +const struct xattr_handler hfsplus_xattr_user_handler = { + .prefix = XATTR_USER_PREFIX, + .list = hfsplus_user_listxattr, + .get = hfsplus_user_getxattr, + .set = hfsplus_user_setxattr, +}; diff --git a/fs/inode.c b/fs/inode.c index 67880e604399..f5f7c06c36fb 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -798,11 +798,10 @@ static struct inode *find_inode(struct super_block *sb, int (*test)(struct inode *, void *), void *data) { - struct hlist_node *node; struct inode *inode = NULL; repeat: - hlist_for_each_entry(inode, node, head, i_hash) { + hlist_for_each_entry(inode, head, i_hash) { spin_lock(&inode->i_lock); if (inode->i_sb != sb) { spin_unlock(&inode->i_lock); @@ -830,11 +829,10 @@ repeat: static struct inode *find_inode_fast(struct super_block *sb, struct hlist_head *head, unsigned long ino) { - struct hlist_node *node; struct inode *inode = NULL; repeat: - hlist_for_each_entry(inode, node, head, i_hash) { + hlist_for_each_entry(inode, head, i_hash) { spin_lock(&inode->i_lock); if (inode->i_ino != ino) { spin_unlock(&inode->i_lock); @@ -1132,11 +1130,10 @@ EXPORT_SYMBOL(iget_locked); static int test_inode_iunique(struct super_block *sb, unsigned long ino) { struct hlist_head *b = inode_hashtable + hash(sb, ino); - struct hlist_node *node; struct inode *inode; spin_lock(&inode_hash_lock); - hlist_for_each_entry(inode, node, b, i_hash) { + hlist_for_each_entry(inode, b, i_hash) { if (inode->i_ino == ino && inode->i_sb == sb) { spin_unlock(&inode_hash_lock); return 0; @@ -1291,10 +1288,9 @@ int insert_inode_locked(struct inode *inode) struct hlist_head *head = inode_hashtable + hash(sb, ino); while (1) { - struct hlist_node *node; struct inode *old = NULL; spin_lock(&inode_hash_lock); - hlist_for_each_entry(old, node, head, i_hash) { + hlist_for_each_entry(old, head, i_hash) { if (old->i_ino != ino) continue; if (old->i_sb != sb) @@ -1306,7 +1302,7 @@ int insert_inode_locked(struct inode *inode) } break; } - if (likely(!node)) { + if (likely(!old)) { spin_lock(&inode->i_lock); inode->i_state |= I_NEW; hlist_add_head(&inode->i_hash, head); @@ -1334,11 +1330,10 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, struct hlist_head *head = inode_hashtable + hash(sb, hashval); while (1) { - struct hlist_node *node; struct inode *old = NULL; spin_lock(&inode_hash_lock); - hlist_for_each_entry(old, node, head, i_hash) { + hlist_for_each_entry(old, head, i_hash) { if (old->i_sb != sb) continue; if (!test(old, data)) @@ -1350,7 +1345,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, } break; } - if (likely(!node)) { + if (likely(!old)) { spin_lock(&inode->i_lock); inode->i_state |= I_NEW; hlist_add_head(&inode->i_hash, head); diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index a2717408c478..0796c45d0d4d 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -11,7 +11,7 @@ #include <linux/slab.h> #include <linux/time.h> #include <linux/nfs_fs.h> -#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/svc.h> #include <linux/lockd/lockd.h> #include <linux/kthread.h> @@ -220,10 +220,19 @@ reclaimer(void *ptr) { struct nlm_host *host = (struct nlm_host *) ptr; struct nlm_wait *block; + struct nlm_rqst *req; struct file_lock *fl, *next; u32 nsmstate; struct net *net = host->net; + req = kmalloc(sizeof(*req), GFP_KERNEL); + if (!req) { + printk(KERN_ERR "lockd: reclaimer unable to alloc memory." + " Locks for %s won't be reclaimed!\n", + host->h_name); + return 0; + } + allow_signal(SIGKILL); down_write(&host->h_rwsem); @@ -253,7 +262,7 @@ restart: */ if (signalled()) continue; - if (nlmclnt_reclaim(host, fl) != 0) + if (nlmclnt_reclaim(host, fl, req) != 0) continue; list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted); if (host->h_nsmstate != nsmstate) { @@ -279,5 +288,6 @@ restart: /* Release host handle after use */ nlmclnt_release_host(host); lockd_down(net); + kfree(req); return 0; } diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 366277190b82..7e529c3c45c0 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -618,17 +618,15 @@ out_unlock: * RECLAIM: Try to reclaim a lock */ int -nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl) +nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl, + struct nlm_rqst *req) { - struct nlm_rqst reqst, *req; int status; - req = &reqst; memset(req, 0, sizeof(*req)); locks_init_lock(&req->a_args.lock.fl); locks_init_lock(&req->a_res.lock.fl); req->a_host = host; - req->a_flags = 0; /* Set up the argument struct */ nlmclnt_setlockargs(req, fl); diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 0e17090c310f..969d589c848d 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -13,6 +13,7 @@ #include <linux/in.h> #include <linux/in6.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/svc.h> #include <linux/lockd/lockd.h> #include <linux/mutex.h> @@ -32,15 +33,15 @@ static struct hlist_head nlm_server_hosts[NLM_HOST_NRHASH]; static struct hlist_head nlm_client_hosts[NLM_HOST_NRHASH]; -#define for_each_host(host, pos, chain, table) \ +#define for_each_host(host, chain, table) \ for ((chain) = (table); \ (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \ - hlist_for_each_entry((host), (pos), (chain), h_hash) + hlist_for_each_entry((host), (chain), h_hash) -#define for_each_host_safe(host, pos, next, chain, table) \ +#define for_each_host_safe(host, next, chain, table) \ for ((chain) = (table); \ (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \ - hlist_for_each_entry_safe((host), (pos), (next), \ + hlist_for_each_entry_safe((host), (next), \ (chain), h_hash) static unsigned long nrhosts; @@ -225,7 +226,6 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, .net = net, }; struct hlist_head *chain; - struct hlist_node *pos; struct nlm_host *host; struct nsm_handle *nsm = NULL; struct lockd_net *ln = net_generic(net, lockd_net_id); @@ -237,7 +237,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, mutex_lock(&nlm_host_mutex); chain = &nlm_client_hosts[nlm_hash_address(sap)]; - hlist_for_each_entry(host, pos, chain, h_hash) { + hlist_for_each_entry(host, chain, h_hash) { if (host->net != net) continue; if (!rpc_cmp_addr(nlm_addr(host), sap)) @@ -322,7 +322,6 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, const size_t hostname_len) { struct hlist_head *chain; - struct hlist_node *pos; struct nlm_host *host = NULL; struct nsm_handle *nsm = NULL; struct sockaddr *src_sap = svc_daddr(rqstp); @@ -350,7 +349,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, nlm_gc_hosts(net); chain = &nlm_server_hosts[nlm_hash_address(ni.sap)]; - hlist_for_each_entry(host, pos, chain, h_hash) { + hlist_for_each_entry(host, chain, h_hash) { if (host->net != net) continue; if (!rpc_cmp_addr(nlm_addr(host), ni.sap)) @@ -515,10 +514,9 @@ static struct nlm_host *next_host_state(struct hlist_head *cache, { struct nlm_host *host; struct hlist_head *chain; - struct hlist_node *pos; mutex_lock(&nlm_host_mutex); - for_each_host(host, pos, chain, cache) { + for_each_host(host, chain, cache) { if (host->h_nsmhandle == nsm && host->h_nsmstate != info->state) { host->h_nsmstate = info->state; @@ -570,7 +568,6 @@ void nlm_host_rebooted(const struct nlm_reboot *info) static void nlm_complain_hosts(struct net *net) { struct hlist_head *chain; - struct hlist_node *pos; struct nlm_host *host; if (net) { @@ -587,7 +584,7 @@ static void nlm_complain_hosts(struct net *net) dprintk("lockd: %lu hosts left:\n", nrhosts); } - for_each_host(host, pos, chain, nlm_server_hosts) { + for_each_host(host, chain, nlm_server_hosts) { if (net && host->net != net) continue; dprintk(" %s (cnt %d use %d exp %ld net %p)\n", @@ -600,14 +597,13 @@ void nlm_shutdown_hosts_net(struct net *net) { struct hlist_head *chain; - struct hlist_node *pos; struct nlm_host *host; mutex_lock(&nlm_host_mutex); /* First, make all hosts eligible for gc */ dprintk("lockd: nuking all hosts in net %p...\n", net); - for_each_host(host, pos, chain, nlm_server_hosts) { + for_each_host(host, chain, nlm_server_hosts) { if (net && host->net != net) continue; host->h_expires = jiffies - 1; @@ -644,11 +640,11 @@ static void nlm_gc_hosts(struct net *net) { struct hlist_head *chain; - struct hlist_node *pos, *next; + struct hlist_node *next; struct nlm_host *host; dprintk("lockd: host garbage collection for net %p\n", net); - for_each_host(host, pos, chain, nlm_server_hosts) { + for_each_host(host, chain, nlm_server_hosts) { if (net && host->net != net) continue; host->h_inuse = 0; @@ -657,7 +653,7 @@ nlm_gc_hosts(struct net *net) /* Mark all hosts that hold locks, blocks or shares */ nlmsvc_mark_resources(net); - for_each_host_safe(host, pos, next, chain, nlm_server_hosts) { + for_each_host_safe(host, next, chain, nlm_server_hosts) { if (net && host->net != net) continue; if (atomic_read(&host->h_count) || host->h_inuse diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 3c2cfc683631..1812f026960c 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -12,6 +12,7 @@ #include <linux/slab.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/xprtsock.h> #include <linux/sunrpc/svc.h> #include <linux/lockd/lockd.h> diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index b3a24b07d981..97e87415b145 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -13,7 +13,7 @@ #include <linux/slab.h> #include <linux/mutex.h> #include <linux/sunrpc/svc.h> -#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/nfsd/nfsfh.h> #include <linux/nfsd/export.h> #include <linux/lockd/lockd.h> @@ -84,7 +84,6 @@ __be32 nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, struct nfs_fh *f) { - struct hlist_node *pos; struct nlm_file *file; unsigned int hash; __be32 nfserr; @@ -96,7 +95,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, /* Lock file table */ mutex_lock(&nlm_file_mutex); - hlist_for_each_entry(file, pos, &nlm_files[hash], f_list) + hlist_for_each_entry(file, &nlm_files[hash], f_list) if (!nfs_compare_fh(&file->f_handle, f)) goto found; @@ -248,13 +247,13 @@ static int nlm_traverse_files(void *data, nlm_host_match_fn_t match, int (*is_failover_file)(void *data, struct nlm_file *file)) { - struct hlist_node *pos, *next; + struct hlist_node *next; struct nlm_file *file; int i, ret = 0; mutex_lock(&nlm_file_mutex); for (i = 0; i < FILE_NRHASH; i++) { - hlist_for_each_entry_safe(file, pos, next, &nlm_files[i], f_list) { + hlist_for_each_entry_safe(file, next, &nlm_files[i], f_list) { if (is_failover_file && !is_failover_file(data, file)) continue; file->f_count++; diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index 862a2f16db64..5f7b053720ee 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c @@ -128,10 +128,13 @@ int nfs_cache_register_net(struct net *net, struct cache_detail *cd) struct super_block *pipefs_sb; int ret = 0; + sunrpc_init_cache_detail(cd); pipefs_sb = rpc_get_sb_net(net); if (pipefs_sb) { ret = nfs_cache_register_sb(pipefs_sb, cd); rpc_put_sb_net(net); + if (ret) + sunrpc_destroy_cache_detail(cd); } return ret; } @@ -151,14 +154,5 @@ void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd) nfs_cache_unregister_sb(pipefs_sb, cd); rpc_put_sb_net(net); } -} - -void nfs_cache_init(struct cache_detail *cd) -{ - sunrpc_init_cache_detail(cd); -} - -void nfs_cache_destroy(struct cache_detail *cd) -{ sunrpc_destroy_cache_detail(cd); } diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h index 317db95e37f8..4116d2c3f52f 100644 --- a/fs/nfs/cache_lib.h +++ b/fs/nfs/cache_lib.h @@ -23,8 +23,6 @@ extern struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void); extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq); extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq); -extern void nfs_cache_init(struct cache_detail *cd); -extern void nfs_cache_destroy(struct cache_detail *cd); extern int nfs_cache_register_net(struct net *net, struct cache_detail *cd); extern void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd); extern int nfs_cache_register_sb(struct super_block *sb, diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 9f3c66438d0e..84d8eae203a7 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -197,7 +197,6 @@ error_0: EXPORT_SYMBOL_GPL(nfs_alloc_client); #if IS_ENABLED(CONFIG_NFS_V4) -/* idr_remove_all is not needed as all id's are removed by nfs_put_client */ void nfs_cleanup_cb_ident_idr(struct net *net) { struct nfs_net *nn = net_generic(net, nfs_net_id); diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index ca4b11ec87a2..945527092295 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/dns_resolver.h> #include "dns_resolve.h" @@ -42,6 +43,7 @@ EXPORT_SYMBOL_GPL(nfs_dns_resolve_name); #include <linux/seq_file.h> #include <linux/inet.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/cache.h> #include <linux/sunrpc/svcauth.h> #include <linux/sunrpc/rpc_pipe_fs.h> @@ -142,7 +144,7 @@ static int nfs_dns_upcall(struct cache_detail *cd, ret = nfs_cache_upcall(cd, key->hostname); if (ret) - ret = sunrpc_cache_pipe_upcall(cd, ch, nfs_dns_request); + ret = sunrpc_cache_pipe_upcall(cd, ch); return ret; } @@ -351,60 +353,47 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, } EXPORT_SYMBOL_GPL(nfs_dns_resolve_name); +static struct cache_detail nfs_dns_resolve_template = { + .owner = THIS_MODULE, + .hash_size = NFS_DNS_HASHTBL_SIZE, + .name = "dns_resolve", + .cache_put = nfs_dns_ent_put, + .cache_upcall = nfs_dns_upcall, + .cache_request = nfs_dns_request, + .cache_parse = nfs_dns_parse, + .cache_show = nfs_dns_show, + .match = nfs_dns_match, + .init = nfs_dns_ent_init, + .update = nfs_dns_ent_update, + .alloc = nfs_dns_ent_alloc, +}; + + int nfs_dns_resolver_cache_init(struct net *net) { - int err = -ENOMEM; + int err; struct nfs_net *nn = net_generic(net, nfs_net_id); - struct cache_detail *cd; - struct cache_head **tbl; - cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL); - if (cd == NULL) - goto err_cd; - - tbl = kzalloc(NFS_DNS_HASHTBL_SIZE * sizeof(struct cache_head *), - GFP_KERNEL); - if (tbl == NULL) - goto err_tbl; - - cd->owner = THIS_MODULE, - cd->hash_size = NFS_DNS_HASHTBL_SIZE, - cd->hash_table = tbl, - cd->name = "dns_resolve", - cd->cache_put = nfs_dns_ent_put, - cd->cache_upcall = nfs_dns_upcall, - cd->cache_parse = nfs_dns_parse, - cd->cache_show = nfs_dns_show, - cd->match = nfs_dns_match, - cd->init = nfs_dns_ent_init, - cd->update = nfs_dns_ent_update, - cd->alloc = nfs_dns_ent_alloc, - - nfs_cache_init(cd); - err = nfs_cache_register_net(net, cd); + nn->nfs_dns_resolve = cache_create_net(&nfs_dns_resolve_template, net); + if (IS_ERR(nn->nfs_dns_resolve)) + return PTR_ERR(nn->nfs_dns_resolve); + + err = nfs_cache_register_net(net, nn->nfs_dns_resolve); if (err) goto err_reg; - nn->nfs_dns_resolve = cd; return 0; err_reg: - nfs_cache_destroy(cd); - kfree(cd->hash_table); -err_tbl: - kfree(cd); -err_cd: + cache_destroy_net(nn->nfs_dns_resolve, net); return err; } void nfs_dns_resolver_cache_destroy(struct net *net) { struct nfs_net *nn = net_generic(net, nfs_net_id); - struct cache_detail *cd = nn->nfs_dns_resolve; - nfs_cache_unregister_net(net, cd); - nfs_cache_destroy(cd); - kfree(cd->hash_table); - kfree(cd); + nfs_cache_unregister_net(net, nn->nfs_dns_resolve); + cache_destroy_net(nn->nfs_dns_resolve, net); } static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 2e9779b58b7a..ac4fc9a8fdbc 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -6,6 +6,7 @@ #include <linux/nfs_fs.h> #include <linux/nfs_idmap.h> #include <linux/nfs_mount.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/auth.h> #include <linux/sunrpc/xprt.h> #include <linux/sunrpc/bc_xprt.h> @@ -29,15 +30,14 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) if (clp->rpc_ops->version != 4 || minorversion != 0) return ret; -retry: - if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL)) - return -ENOMEM; + idr_preload(GFP_KERNEL); spin_lock(&nn->nfs_client_lock); - ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident); + ret = idr_alloc(&nn->cb_ident_idr, clp, 0, 0, GFP_NOWAIT); + if (ret >= 0) + clp->cl_cb_ident = ret; spin_unlock(&nn->nfs_client_lock); - if (ret == -EAGAIN) - goto retry; - return ret; + idr_preload_end(); + return ret < 0 ? ret : 0; } #ifdef CONFIG_NFS_V4_1 diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index b720064bcd7f..1fe284f01f8b 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -31,6 +31,7 @@ #include <linux/nfs_fs.h> #include <linux/vmalloc.h> #include <linux/module.h> +#include <linux/sunrpc/addr.h> #include "internal.h" #include "nfs4session.h" diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 1e09eb78543b..0dd766079e1c 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -14,6 +14,7 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/vfs.h> #include <linux/inet.h> #include "internal.h" diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index d35b62e83ea6..6da209bd9408 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -77,9 +77,8 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld, long hash) { struct nfs4_deviceid_node *d; - struct hlist_node *n; - hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) + hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[hash], node) if (d->ld == ld && d->nfs_client == clp && !memcmp(&d->deviceid, id, sizeof(*id))) { if (atomic_read(&d->ref)) @@ -248,12 +247,11 @@ static void _deviceid_purge_client(const struct nfs_client *clp, long hash) { struct nfs4_deviceid_node *d; - struct hlist_node *n; HLIST_HEAD(tmp); spin_lock(&nfs4_deviceid_lock); rcu_read_lock(); - hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) + hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[hash], node) if (d->nfs_client == clp && atomic_read(&d->ref)) { hlist_del_init_rcu(&d->node); hlist_add_head(&d->tmpnode, &tmp); @@ -291,12 +289,11 @@ void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp) { struct nfs4_deviceid_node *d; - struct hlist_node *n; int i; rcu_read_lock(); for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){ - hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[i], node) + hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[i], node) if (d->nfs_client == clp) set_bit(NFS_DEVICEID_INVALID, &d->flags); } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a9dc5fc29955..17b32b722457 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -31,6 +31,7 @@ #include <linux/errno.h> #include <linux/unistd.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/metrics.h> #include <linux/sunrpc/xprtsock.h> diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 93cc9d34c459..87fd1410b737 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -12,6 +12,10 @@ /* * Representation of a reply cache entry. + * + * Note that we use a sockaddr_in6 to hold the address instead of the more + * typical sockaddr_storage. This is for space reasons, since sockaddr_storage + * is much larger than a sockaddr_in6. */ struct svc_cacherep { struct hlist_node c_hash; @@ -20,11 +24,13 @@ struct svc_cacherep { unsigned char c_state, /* unused, inprog, done */ c_type, /* status, buffer */ c_secure : 1; /* req came from port < 1024 */ - struct sockaddr_in c_addr; + struct sockaddr_in6 c_addr; __be32 c_xid; u32 c_prot; u32 c_proc; u32 c_vers; + unsigned int c_len; + __wsum c_csum; unsigned long c_timestamp; union { struct kvec u_vec; @@ -46,8 +52,7 @@ enum { enum { RC_DROPIT, RC_REPLY, - RC_DOIT, - RC_INTR + RC_DOIT }; /* @@ -67,6 +72,12 @@ enum { */ #define RC_DELAY (HZ/5) +/* Cache entries expire after this time period */ +#define RC_EXPIRE (120 * HZ) + +/* Checksum this amount of the request */ +#define RC_CSUMLEN (256U) + int nfsd_reply_cache_init(void); void nfsd_reply_cache_shutdown(void); int nfsd_cache_lookup(struct svc_rqst *); diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 5681c5906f08..5f38ea36e266 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -67,11 +67,6 @@ static void expkey_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, expkey_request); -} - static struct svc_expkey *svc_expkey_update(struct cache_detail *cd, struct svc_expkey *new, struct svc_expkey *old); static struct svc_expkey *svc_expkey_lookup(struct cache_detail *cd, struct svc_expkey *); @@ -245,7 +240,7 @@ static struct cache_detail svc_expkey_cache_template = { .hash_size = EXPKEY_HASHMAX, .name = "nfsd.fh", .cache_put = expkey_put, - .cache_upcall = expkey_upcall, + .cache_request = expkey_request, .cache_parse = expkey_parse, .cache_show = expkey_show, .match = expkey_match, @@ -315,6 +310,7 @@ static void svc_export_put(struct kref *ref) path_put(&exp->ex_path); auth_domain_put(exp->ex_client); nfsd4_fslocs_free(&exp->ex_fslocs); + kfree(exp->ex_uuid); kfree(exp); } @@ -337,11 +333,6 @@ static void svc_export_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, svc_export_request); -} - static struct svc_export *svc_export_update(struct svc_export *new, struct svc_export *old); static struct svc_export *svc_export_lookup(struct svc_export *); @@ -674,6 +665,7 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) new->ex_fslocs.locations = NULL; new->ex_fslocs.locations_count = 0; new->ex_fslocs.migrated = 0; + new->ex_uuid = NULL; new->cd = item->cd; } @@ -715,7 +707,7 @@ static struct cache_detail svc_export_cache_template = { .hash_size = EXPORT_HASHMAX, .name = "nfsd.export", .cache_put = svc_export_put, - .cache_upcall = svc_export_upcall, + .cache_request = svc_export_request, .cache_parse = svc_export_parse, .cache_show = svc_export_show, .match = svc_export_match, diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index 497584c70366..d620e7f81429 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -9,7 +9,7 @@ #include <linux/debugfs.h> #include <linux/module.h> #include <linux/nsproxy.h> -#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <asm/uaccess.h> #include "state.h" diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 0ce12346df9c..4832fd819f88 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -140,12 +140,6 @@ idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, } static int -idtoname_upcall(struct cache_detail *cd, struct cache_head *ch) -{ - return sunrpc_cache_pipe_upcall(cd, ch, idtoname_request); -} - -static int idtoname_match(struct cache_head *ca, struct cache_head *cb) { struct ent *a = container_of(ca, struct ent, h); @@ -192,7 +186,7 @@ static struct cache_detail idtoname_cache_template = { .hash_size = ENT_HASHMAX, .name = "nfs4.idtoname", .cache_put = ent_put, - .cache_upcall = idtoname_upcall, + .cache_request = idtoname_request, .cache_parse = idtoname_parse, .cache_show = idtoname_show, .warn_no_listener = warn_no_idmapd, @@ -321,12 +315,6 @@ nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, } static int -nametoid_upcall(struct cache_detail *cd, struct cache_head *ch) -{ - return sunrpc_cache_pipe_upcall(cd, ch, nametoid_request); -} - -static int nametoid_match(struct cache_head *ca, struct cache_head *cb) { struct ent *a = container_of(ca, struct ent, h); @@ -365,7 +353,7 @@ static struct cache_detail nametoid_cache_template = { .hash_size = ENT_HASHMAX, .name = "nfs4.nametoid", .cache_put = ent_put, - .cache_upcall = nametoid_upcall, + .cache_request = nametoid_request, .cache_parse = nametoid_parse, .cache_show = nametoid_show, .warn_no_listener = warn_no_idmapd, diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 9d1c5dba2bbb..ae73175e6e68 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -993,14 +993,15 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!buf) return nfserr_jukebox; + p = buf; status = nfsd4_encode_fattr(&cstate->current_fh, cstate->current_fh.fh_export, - cstate->current_fh.fh_dentry, buf, - &count, verify->ve_bmval, + cstate->current_fh.fh_dentry, &p, + count, verify->ve_bmval, rqstp, 0); /* this means that nfsd4_encode_fattr() ran out of space */ - if (status == nfserr_resource && count == 0) + if (status == nfserr_resource) status = nfserr_not_same; if (status) goto out_kfree; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 4914af4a817e..899ca26dd194 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -1185,6 +1185,12 @@ bin_to_hex_dup(const unsigned char *src, int srclen) static int nfsd4_umh_cltrack_init(struct net __attribute__((unused)) *net) { + /* XXX: The usermode helper s not working in container yet. */ + if (net != &init_net) { + WARN(1, KERN_ERR "NFSD: attempt to initialize umh client " + "tracking in a container!\n"); + return -EINVAL; + } return nfsd4_umh_cltrack_upcall("init", NULL, NULL); } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9e7103b6e0ad..16d39c6c4fbb 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -40,7 +40,7 @@ #include <linux/pagemap.h> #include <linux/ratelimit.h> #include <linux/sunrpc/svcauth_gss.h> -#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include "xdr4.h" #include "vfs.h" #include "current_stateid.h" @@ -261,33 +261,46 @@ static inline int get_new_stid(struct nfs4_stid *stid) return new_stid; } -static void init_stid(struct nfs4_stid *stid, struct nfs4_client *cl, unsigned char type) +static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct +kmem_cache *slab) { - stateid_t *s = &stid->sc_stateid; + struct idr *stateids = &cl->cl_stateids; + static int min_stateid = 0; + struct nfs4_stid *stid; int new_id; - stid->sc_type = type; + stid = kmem_cache_alloc(slab, GFP_KERNEL); + if (!stid) + return NULL; + + if (!idr_pre_get(stateids, GFP_KERNEL)) + goto out_free; + if (idr_get_new_above(stateids, stid, min_stateid, &new_id)) + goto out_free; stid->sc_client = cl; - s->si_opaque.so_clid = cl->cl_clientid; - new_id = get_new_stid(stid); - s->si_opaque.so_id = (u32)new_id; + stid->sc_type = 0; + stid->sc_stateid.si_opaque.so_id = new_id; + stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; /* Will be incremented before return to client: */ - s->si_generation = 0; -} - -static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab) -{ - struct idr *stateids = &cl->cl_stateids; + stid->sc_stateid.si_generation = 0; - if (!idr_pre_get(stateids, GFP_KERNEL)) - return NULL; /* - * Note: if we fail here (or any time between now and the time - * we actually get the new idr), we won't need to undo the idr - * preallocation, since the idr code caps the number of - * preallocated entries. + * It shouldn't be a problem to reuse an opaque stateid value. + * I don't think it is for 4.1. But with 4.0 I worry that, for + * example, a stray write retransmission could be accepted by + * the server when it should have been rejected. Therefore, + * adopt a trick from the sctp code to attempt to maximize the + * amount of time until an id is reused, by ensuring they always + * "increase" (mod INT_MAX): */ - return kmem_cache_alloc(slab, GFP_KERNEL); + + min_stateid = new_id+1; + if (min_stateid == INT_MAX) + min_stateid = 0; + return stid; +out_free: + kfree(stid); + return NULL; } static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) @@ -316,7 +329,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); if (dp == NULL) return dp; - init_stid(&dp->dl_stid, clp, NFS4_DELEG_STID); + dp->dl_stid.sc_type = NFS4_DELEG_STID; /* * delegation seqid's are never incremented. The 4.1 special * meaning of seqid 0 isn't meaningful, really, but let's avoid @@ -337,13 +350,21 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv return dp; } +static void free_stid(struct nfs4_stid *s, struct kmem_cache *slab) +{ + struct idr *stateids = &s->sc_client->cl_stateids; + + idr_remove(stateids, s->sc_stateid.si_opaque.so_id); + kmem_cache_free(slab, s); +} + void nfs4_put_delegation(struct nfs4_delegation *dp) { if (atomic_dec_and_test(&dp->dl_count)) { dprintk("NFSD: freeing dp %p\n",dp); put_nfs4_file(dp->dl_file); - kmem_cache_free(deleg_slab, dp); + free_stid(&dp->dl_stid, deleg_slab); num_delegations--; } } @@ -360,9 +381,7 @@ static void nfs4_put_deleg_lease(struct nfs4_file *fp) static void unhash_stid(struct nfs4_stid *s) { - struct idr *stateids = &s->sc_client->cl_stateids; - - idr_remove(stateids, s->sc_stateid.si_opaque.so_id); + s->sc_type = 0; } /* Called under the state lock. */ @@ -519,7 +538,7 @@ static void close_generic_stateid(struct nfs4_ol_stateid *stp) static void free_generic_stateid(struct nfs4_ol_stateid *stp) { - kmem_cache_free(stateid_slab, stp); + free_stid(&stp->st_stid, stateid_slab); } static void release_lock_stateid(struct nfs4_ol_stateid *stp) @@ -905,7 +924,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan, new = __alloc_session(slotsize, numslots); if (!new) { - nfsd4_put_drc_mem(slotsize, fchan->maxreqs); + nfsd4_put_drc_mem(slotsize, numslots); return NULL; } init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize, nn); @@ -1048,7 +1067,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) static inline void free_client(struct nfs4_client *clp) { - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + struct nfsd_net __maybe_unused *nn = net_generic(clp->net, nfsd_net_id); lockdep_assert_held(&nn->client_lock); while (!list_empty(&clp->cl_sessions)) { @@ -1060,6 +1079,7 @@ free_client(struct nfs4_client *clp) } free_svc_cred(&clp->cl_cred); kfree(clp->cl_name.data); + idr_destroy(&clp->cl_stateids); kfree(clp); } @@ -1258,7 +1278,12 @@ static void gen_confirm(struct nfs4_client *clp) static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t) { - return idr_find(&cl->cl_stateids, t->si_opaque.so_id); + struct nfs4_stid *ret; + + ret = idr_find(&cl->cl_stateids, t->si_opaque.so_id); + if (!ret || !ret->sc_type) + return NULL; + return ret; } static struct nfs4_stid *find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) @@ -1844,11 +1869,12 @@ nfsd4_create_session(struct svc_rqst *rqstp, /* cache solo and embedded create sessions under the state lock */ nfsd4_cache_create_session(cr_ses, cs_slot, status); -out: nfs4_unlock_state(); +out: dprintk("%s returns %d\n", __func__, ntohl(status)); return status; out_free_conn: + nfs4_unlock_state(); free_conn(conn); out_free_session: __free_session(new); @@ -2443,9 +2469,8 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { struct nfs4_openowner *oo = open->op_openowner; - struct nfs4_client *clp = oo->oo_owner.so_client; - init_stid(&stp->st_stid, clp, NFS4_OPEN_STID); + stp->st_stid.sc_type = NFS4_OPEN_STID; INIT_LIST_HEAD(&stp->st_lockowners); list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); list_add(&stp->st_perfile, &fp->fi_stateids); @@ -4031,7 +4056,7 @@ alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct stp = nfs4_alloc_stateid(clp); if (stp == NULL) return NULL; - init_stid(&stp->st_stid, clp, NFS4_LOCK_STID); + stp->st_stid.sc_type = NFS4_LOCK_STID; list_add(&stp->st_perfile, &fp->fi_stateids); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); stp->st_stateowner = &lo->lo_owner; @@ -4913,16 +4938,6 @@ nfs4_state_start_net(struct net *net) struct nfsd_net *nn = net_generic(net, nfsd_net_id); int ret; - /* - * FIXME: For now, we hang most of the pernet global stuff off of - * init_net until nfsd is fully containerized. Eventually, we'll - * need to pass a net pointer into this function, take a reference - * to that instead and then do most of the rest of this on a per-net - * basis. - */ - if (net != &init_net) - return -EINVAL; - ret = nfs4_state_create_net(net); if (ret) return ret; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 8ca6d17f6cf3..01168865dd37 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2024,12 +2024,11 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) * Note: @fhp can be NULL; in this case, we might have to compose the filehandle * ourselves. * - * @countp is the buffer size in _words_; upon successful return this becomes - * replaced with the number of words written. + * countp is the buffer size in _words_ */ __be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, - struct dentry *dentry, __be32 *buffer, int *countp, u32 *bmval, + struct dentry *dentry, __be32 **buffer, int count, u32 *bmval, struct svc_rqst *rqstp, int ignore_crossmnt) { u32 bmval0 = bmval[0]; @@ -2038,12 +2037,12 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, struct kstat stat; struct svc_fh tempfh; struct kstatfs statfs; - int buflen = *countp << 2; + int buflen = count << 2; __be32 *attrlenp; u32 dummy; u64 dummy64; u32 rdattr_err = 0; - __be32 *p = buffer; + __be32 *p = *buffer; __be32 status; int err; int aclsupport = 0; @@ -2447,7 +2446,7 @@ out_acl: } *attrlenp = htonl((char *)p - (char *)attrlenp - 4); - *countp = p - buffer; + *buffer = p; status = nfs_ok; out: @@ -2459,7 +2458,6 @@ out_nfserr: status = nfserrno(err); goto out; out_resource: - *countp = 0; status = nfserr_resource; goto out; out_serverfault: @@ -2478,7 +2476,7 @@ static inline int attributes_need_mount(u32 *bmval) static __be32 nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, - const char *name, int namlen, __be32 *p, int *buflen) + const char *name, int namlen, __be32 **p, int buflen) { struct svc_export *exp = cd->rd_fhp->fh_export; struct dentry *dentry; @@ -2584,10 +2582,9 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */ p = xdr_encode_array(p, name, namlen); /* name length & name */ - nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, p, &buflen); + nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, &p, buflen); switch (nfserr) { case nfs_ok: - p += buflen; break; case nfserr_resource: nfserr = nfserr_toosmall; @@ -2714,10 +2711,8 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2); nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, - resp->p, &buflen, getattr->ga_bmval, + &resp->p, buflen, getattr->ga_bmval, resp->rqstp, 0); - if (!nfserr) - resp->p += buflen; return nfserr; } diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 2cbac34a55da..62c1ee128aeb 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -9,22 +9,22 @@ */ #include <linux/slab.h> +#include <linux/sunrpc/addr.h> +#include <linux/highmem.h> +#include <net/checksum.h> #include "nfsd.h" #include "cache.h" -/* Size of reply cache. Common values are: - * 4.3BSD: 128 - * 4.4BSD: 256 - * Solaris2: 1024 - * DEC Unix: 512-4096 - */ -#define CACHESIZE 1024 +#define NFSDDBG_FACILITY NFSDDBG_REPCACHE + #define HASHSIZE 64 static struct hlist_head * cache_hash; static struct list_head lru_head; -static int cache_disabled = 1; +static struct kmem_cache *drc_slab; +static unsigned int num_drc_entries; +static unsigned int max_drc_entries; /* * Calculate the hash index from an XID. @@ -37,6 +37,14 @@ static inline u32 request_hash(u32 xid) } static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); +static void cache_cleaner_func(struct work_struct *unused); +static int nfsd_reply_cache_shrink(struct shrinker *shrink, + struct shrink_control *sc); + +struct shrinker nfsd_reply_cache_shrinker = { + .shrink = nfsd_reply_cache_shrink, + .seeks = 1, +}; /* * locking for the reply cache: @@ -44,30 +52,86 @@ static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); * Otherwise, it when accessing _prev or _next, the lock must be held. */ static DEFINE_SPINLOCK(cache_lock); +static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func); -int nfsd_reply_cache_init(void) +/* + * Put a cap on the size of the DRC based on the amount of available + * low memory in the machine. + * + * 64MB: 8192 + * 128MB: 11585 + * 256MB: 16384 + * 512MB: 23170 + * 1GB: 32768 + * 2GB: 46340 + * 4GB: 65536 + * 8GB: 92681 + * 16GB: 131072 + * + * ...with a hard cap of 256k entries. In the worst case, each entry will be + * ~1k, so the above numbers should give a rough max of the amount of memory + * used in k. + */ +static unsigned int +nfsd_cache_size_limit(void) +{ + unsigned int limit; + unsigned long low_pages = totalram_pages - totalhigh_pages; + + limit = (16 * int_sqrt(low_pages)) << (PAGE_SHIFT-10); + return min_t(unsigned int, limit, 256*1024); +} + +static struct svc_cacherep * +nfsd_reply_cache_alloc(void) { struct svc_cacherep *rp; - int i; - INIT_LIST_HEAD(&lru_head); - i = CACHESIZE; - while (i) { - rp = kmalloc(sizeof(*rp), GFP_KERNEL); - if (!rp) - goto out_nomem; - list_add(&rp->c_lru, &lru_head); + rp = kmem_cache_alloc(drc_slab, GFP_KERNEL); + if (rp) { rp->c_state = RC_UNUSED; rp->c_type = RC_NOCACHE; + INIT_LIST_HEAD(&rp->c_lru); INIT_HLIST_NODE(&rp->c_hash); - i--; } + return rp; +} + +static void +nfsd_reply_cache_free_locked(struct svc_cacherep *rp) +{ + if (rp->c_type == RC_REPLBUFF) + kfree(rp->c_replvec.iov_base); + hlist_del(&rp->c_hash); + list_del(&rp->c_lru); + --num_drc_entries; + kmem_cache_free(drc_slab, rp); +} + +static void +nfsd_reply_cache_free(struct svc_cacherep *rp) +{ + spin_lock(&cache_lock); + nfsd_reply_cache_free_locked(rp); + spin_unlock(&cache_lock); +} + +int nfsd_reply_cache_init(void) +{ + register_shrinker(&nfsd_reply_cache_shrinker); + drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), + 0, 0, NULL); + if (!drc_slab) + goto out_nomem; - cache_hash = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); + cache_hash = kcalloc(HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); if (!cache_hash) goto out_nomem; - cache_disabled = 0; + INIT_LIST_HEAD(&lru_head); + max_drc_entries = nfsd_cache_size_limit(); + num_drc_entries = 0; + return 0; out_nomem: printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); @@ -79,27 +143,33 @@ void nfsd_reply_cache_shutdown(void) { struct svc_cacherep *rp; + unregister_shrinker(&nfsd_reply_cache_shrinker); + cancel_delayed_work_sync(&cache_cleaner); + while (!list_empty(&lru_head)) { rp = list_entry(lru_head.next, struct svc_cacherep, c_lru); - if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF) - kfree(rp->c_replvec.iov_base); - list_del(&rp->c_lru); - kfree(rp); + nfsd_reply_cache_free_locked(rp); } - cache_disabled = 1; - kfree (cache_hash); cache_hash = NULL; + + if (drc_slab) { + kmem_cache_destroy(drc_slab); + drc_slab = NULL; + } } /* - * Move cache entry to end of LRU list + * Move cache entry to end of LRU list, and queue the cleaner to run if it's + * not already scheduled. */ static void lru_put_end(struct svc_cacherep *rp) { + rp->c_timestamp = jiffies; list_move_tail(&rp->c_lru, &lru_head); + schedule_delayed_work(&cache_cleaner, RC_EXPIRE); } /* @@ -112,83 +182,214 @@ hash_refile(struct svc_cacherep *rp) hlist_add_head(&rp->c_hash, cache_hash + request_hash(rp->c_xid)); } +static inline bool +nfsd_cache_entry_expired(struct svc_cacherep *rp) +{ + return rp->c_state != RC_INPROG && + time_after(jiffies, rp->c_timestamp + RC_EXPIRE); +} + +/* + * Walk the LRU list and prune off entries that are older than RC_EXPIRE. + * Also prune the oldest ones when the total exceeds the max number of entries. + */ +static void +prune_cache_entries(void) +{ + struct svc_cacherep *rp, *tmp; + + list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { + if (!nfsd_cache_entry_expired(rp) && + num_drc_entries <= max_drc_entries) + break; + nfsd_reply_cache_free_locked(rp); + } + + /* + * Conditionally rearm the job. If we cleaned out the list, then + * cancel any pending run (since there won't be any work to do). + * Otherwise, we rearm the job or modify the existing one to run in + * RC_EXPIRE since we just ran the pruner. + */ + if (list_empty(&lru_head)) + cancel_delayed_work(&cache_cleaner); + else + mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); +} + +static void +cache_cleaner_func(struct work_struct *unused) +{ + spin_lock(&cache_lock); + prune_cache_entries(); + spin_unlock(&cache_lock); +} + +static int +nfsd_reply_cache_shrink(struct shrinker *shrink, struct shrink_control *sc) +{ + unsigned int num; + + spin_lock(&cache_lock); + if (sc->nr_to_scan) + prune_cache_entries(); + num = num_drc_entries; + spin_unlock(&cache_lock); + + return num; +} + +/* + * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes + */ +static __wsum +nfsd_cache_csum(struct svc_rqst *rqstp) +{ + int idx; + unsigned int base; + __wsum csum; + struct xdr_buf *buf = &rqstp->rq_arg; + const unsigned char *p = buf->head[0].iov_base; + size_t csum_len = min_t(size_t, buf->head[0].iov_len + buf->page_len, + RC_CSUMLEN); + size_t len = min(buf->head[0].iov_len, csum_len); + + /* rq_arg.head first */ + csum = csum_partial(p, len, 0); + csum_len -= len; + + /* Continue into page array */ + idx = buf->page_base / PAGE_SIZE; + base = buf->page_base & ~PAGE_MASK; + while (csum_len) { + p = page_address(buf->pages[idx]) + base; + len = min_t(size_t, PAGE_SIZE - base, csum_len); + csum = csum_partial(p, len, csum); + csum_len -= len; + base = 0; + ++idx; + } + return csum; +} + +/* + * Search the request hash for an entry that matches the given rqstp. + * Must be called with cache_lock held. Returns the found entry or + * NULL on failure. + */ +static struct svc_cacherep * +nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) +{ + struct svc_cacherep *rp; + struct hlist_head *rh; + __be32 xid = rqstp->rq_xid; + u32 proto = rqstp->rq_prot, + vers = rqstp->rq_vers, + proc = rqstp->rq_proc; + + rh = &cache_hash[request_hash(xid)]; + hlist_for_each_entry(rp, rh, c_hash) { + if (xid == rp->c_xid && proc == rp->c_proc && + proto == rp->c_prot && vers == rp->c_vers && + rqstp->rq_arg.len == rp->c_len && csum == rp->c_csum && + rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) && + rpc_get_port(svc_addr(rqstp)) == rpc_get_port((struct sockaddr *)&rp->c_addr)) + return rp; + } + return NULL; +} + /* * Try to find an entry matching the current call in the cache. When none - * is found, we grab the oldest unlocked entry off the LRU list. - * Note that no operation within the loop may sleep. + * is found, we try to grab the oldest expired entry off the LRU list. If + * a suitable one isn't there, then drop the cache_lock and allocate a + * new one, then search again in case one got inserted while this thread + * didn't hold the lock. */ int nfsd_cache_lookup(struct svc_rqst *rqstp) { - struct hlist_node *hn; - struct hlist_head *rh; - struct svc_cacherep *rp; + struct svc_cacherep *rp, *found; __be32 xid = rqstp->rq_xid; u32 proto = rqstp->rq_prot, vers = rqstp->rq_vers, proc = rqstp->rq_proc; + __wsum csum; unsigned long age; int type = rqstp->rq_cachetype; int rtn; rqstp->rq_cacherep = NULL; - if (cache_disabled || type == RC_NOCACHE) { + if (type == RC_NOCACHE) { nfsdstats.rcnocache++; return RC_DOIT; } + csum = nfsd_cache_csum(rqstp); + spin_lock(&cache_lock); rtn = RC_DOIT; - rh = &cache_hash[request_hash(xid)]; - hlist_for_each_entry(rp, hn, rh, c_hash) { - if (rp->c_state != RC_UNUSED && - xid == rp->c_xid && proc == rp->c_proc && - proto == rp->c_prot && vers == rp->c_vers && - time_before(jiffies, rp->c_timestamp + 120*HZ) && - memcmp((char*)&rqstp->rq_addr, (char*)&rp->c_addr, sizeof(rp->c_addr))==0) { - nfsdstats.rchits++; - goto found_entry; + rp = nfsd_cache_search(rqstp, csum); + if (rp) + goto found_entry; + + /* Try to use the first entry on the LRU */ + if (!list_empty(&lru_head)) { + rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru); + if (nfsd_cache_entry_expired(rp) || + num_drc_entries >= max_drc_entries) { + lru_put_end(rp); + prune_cache_entries(); + goto setup_entry; } } - nfsdstats.rcmisses++; - /* This loop shouldn't take more than a few iterations normally */ - { - int safe = 0; - list_for_each_entry(rp, &lru_head, c_lru) { - if (rp->c_state != RC_INPROG) - break; - if (safe++ > CACHESIZE) { - printk("nfsd: loop in repcache LRU list\n"); - cache_disabled = 1; - goto out; - } + /* Drop the lock and allocate a new entry */ + spin_unlock(&cache_lock); + rp = nfsd_reply_cache_alloc(); + if (!rp) { + dprintk("nfsd: unable to allocate DRC entry!\n"); + return RC_DOIT; } + spin_lock(&cache_lock); + ++num_drc_entries; + + /* + * Must search again just in case someone inserted one + * after we dropped the lock above. + */ + found = nfsd_cache_search(rqstp, csum); + if (found) { + nfsd_reply_cache_free_locked(rp); + rp = found; + goto found_entry; } - /* All entries on the LRU are in-progress. This should not happen */ - if (&rp->c_lru == &lru_head) { - static int complaints; - - printk(KERN_WARNING "nfsd: all repcache entries locked!\n"); - if (++complaints > 5) { - printk(KERN_WARNING "nfsd: disabling repcache.\n"); - cache_disabled = 1; - } - goto out; - } + /* + * We're keeping the one we just allocated. Are we now over the + * limit? Prune one off the tip of the LRU in trade for the one we + * just allocated if so. + */ + if (num_drc_entries >= max_drc_entries) + nfsd_reply_cache_free_locked(list_first_entry(&lru_head, + struct svc_cacherep, c_lru)); +setup_entry: + nfsdstats.rcmisses++; rqstp->rq_cacherep = rp; rp->c_state = RC_INPROG; rp->c_xid = xid; rp->c_proc = proc; - memcpy(&rp->c_addr, svc_addr_in(rqstp), sizeof(rp->c_addr)); + rpc_copy_addr((struct sockaddr *)&rp->c_addr, svc_addr(rqstp)); + rpc_set_port((struct sockaddr *)&rp->c_addr, rpc_get_port(svc_addr(rqstp))); rp->c_prot = proto; rp->c_vers = vers; - rp->c_timestamp = jiffies; + rp->c_len = rqstp->rq_arg.len; + rp->c_csum = csum; hash_refile(rp); + lru_put_end(rp); /* release any buffer */ if (rp->c_type == RC_REPLBUFF) { @@ -201,9 +402,9 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) return rtn; found_entry: + nfsdstats.rchits++; /* We found a matching entry which is either in progress or done. */ age = jiffies - rp->c_timestamp; - rp->c_timestamp = jiffies; lru_put_end(rp); rtn = RC_DROPIT; @@ -232,7 +433,7 @@ found_entry: break; default: printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type); - rp->c_state = RC_UNUSED; + nfsd_reply_cache_free_locked(rp); } goto out; @@ -257,11 +458,11 @@ found_entry: void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) { - struct svc_cacherep *rp; + struct svc_cacherep *rp = rqstp->rq_cacherep; struct kvec *resv = &rqstp->rq_res.head[0], *cachv; int len; - if (!(rp = rqstp->rq_cacherep) || cache_disabled) + if (!rp) return; len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); @@ -269,7 +470,7 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) /* Don't cache excessive amounts of data and XDR failures */ if (!statp || len > (256 >> 2)) { - rp->c_state = RC_UNUSED; + nfsd_reply_cache_free(rp); return; } @@ -283,21 +484,21 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) cachv = &rp->c_replvec; cachv->iov_base = kmalloc(len << 2, GFP_KERNEL); if (!cachv->iov_base) { - spin_lock(&cache_lock); - rp->c_state = RC_UNUSED; - spin_unlock(&cache_lock); + nfsd_reply_cache_free(rp); return; } cachv->iov_len = len << 2; memcpy(cachv->iov_base, statp, len << 2); break; + case RC_NOCACHE: + nfsd_reply_cache_free(rp); + return; } spin_lock(&cache_lock); lru_put_end(rp); rp->c_secure = rqstp->rq_secure; rp->c_type = cachetype; rp->c_state = RC_DONE; - rp->c_timestamp = jiffies; spin_unlock(&cache_lock); return; } diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 2db7021b01ae..13a21c8fca49 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -10,7 +10,7 @@ #include <linux/sunrpc/svcsock.h> #include <linux/lockd/lockd.h> -#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/gss_api.h> #include <linux/sunrpc/gss_krb5_enctypes.h> #include <linux/sunrpc/rpc_pipe_fs.h> @@ -125,11 +125,11 @@ static const struct file_operations transaction_ops = { .llseek = default_llseek, }; -static int exports_open(struct inode *inode, struct file *file) +static int exports_net_open(struct net *net, struct file *file) { int err; struct seq_file *seq; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); err = seq_open(file, &nfs_exports_op); if (err) @@ -140,8 +140,26 @@ static int exports_open(struct inode *inode, struct file *file) return 0; } -static const struct file_operations exports_operations = { - .open = exports_open, +static int exports_proc_open(struct inode *inode, struct file *file) +{ + return exports_net_open(current->nsproxy->net_ns, file); +} + +static const struct file_operations exports_proc_operations = { + .open = exports_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, + .owner = THIS_MODULE, +}; + +static int exports_nfsd_open(struct inode *inode, struct file *file) +{ + return exports_net_open(inode->i_sb->s_fs_info, file); +} + +static const struct file_operations exports_nfsd_operations = { + .open = exports_nfsd_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, @@ -220,6 +238,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) struct sockaddr *sap = (struct sockaddr *)&address; size_t salen = sizeof(address); char *fo_path; + struct net *net = file->f_dentry->d_sb->s_fs_info; /* sanity check */ if (size == 0) @@ -232,7 +251,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) if (qword_get(&buf, fo_path, size) < 0) return -EINVAL; - if (rpc_pton(&init_net, fo_path, size, sap, salen) == 0) + if (rpc_pton(net, fo_path, size, sap, salen) == 0) return -EINVAL; return nlmsvc_unlock_all_by_ip(sap); @@ -317,6 +336,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) int len; struct auth_domain *dom; struct knfsd_fh fh; + struct net *net = file->f_dentry->d_sb->s_fs_info; if (size == 0) return -EINVAL; @@ -352,7 +372,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) if (!dom) return -ENOMEM; - len = exp_rootfh(&init_net, dom, path, &fh, maxsize); + len = exp_rootfh(net, dom, path, &fh, maxsize); auth_domain_put(dom); if (len) return len; @@ -396,7 +416,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) { char *mesg = buf; int rv; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; if (size > 0) { int newthreads; @@ -447,7 +467,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) int len; int npools; int *nthreads; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; mutex_lock(&nfsd_mutex); npools = nfsd_nrpools(net); @@ -510,7 +530,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) unsigned minor; ssize_t tlen = 0; char *sep; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (size>0) { @@ -534,7 +554,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) else num = simple_strtol(vers, &minorp, 0); if (*minorp == '.') { - if (num < 4) + if (num != 4) return -EINVAL; minor = simple_strtoul(minorp+1, NULL, 0); if (minor == 0) @@ -792,7 +812,7 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size, static ssize_t write_ports(struct file *file, char *buf, size_t size) { ssize_t rv; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; mutex_lock(&nfsd_mutex); rv = __write_ports(file, buf, size, net); @@ -827,7 +847,7 @@ int nfsd_max_blksize; static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) { char *mesg = buf; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (size > 0) { @@ -923,7 +943,8 @@ static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, */ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) { - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); return nfsd4_write_time(file, buf, size, &nn->nfsd4_lease, nn); } @@ -939,7 +960,8 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) */ static ssize_t write_gracetime(struct file *file, char *buf, size_t size) { - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); return nfsd4_write_time(file, buf, size, &nn->nfsd4_grace, nn); } @@ -995,7 +1017,8 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size, static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) { ssize_t rv; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); mutex_lock(&nfsd_mutex); rv = __write_recoverydir(file, buf, size, nn); @@ -1013,7 +1036,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) static int nfsd_fill_super(struct super_block * sb, void * data, int silent) { static struct tree_descr nfsd_files[] = { - [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, + [NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO}, [NFSD_Export_features] = {"export_features", &export_features_operations, S_IRUGO}, [NFSD_FO_UnlockIP] = {"unlock_ip", @@ -1037,20 +1060,35 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) #endif /* last one */ {""} }; - return simple_fill_super(sb, 0x6e667364, nfsd_files); + struct net *net = data; + int ret; + + ret = simple_fill_super(sb, 0x6e667364, nfsd_files); + if (ret) + return ret; + sb->s_fs_info = get_net(net); + return 0; } static struct dentry *nfsd_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_single(fs_type, flags, data, nfsd_fill_super); + return mount_ns(fs_type, flags, current->nsproxy->net_ns, nfsd_fill_super); +} + +static void nfsd_umount(struct super_block *sb) +{ + struct net *net = sb->s_fs_info; + + kill_litter_super(sb); + put_net(net); } static struct file_system_type nfsd_fs_type = { .owner = THIS_MODULE, .name = "nfsd", .mount = nfsd_mount, - .kill_sb = kill_litter_super, + .kill_sb = nfsd_umount, }; #ifdef CONFIG_PROC_FS @@ -1061,7 +1099,8 @@ static int create_proc_exports_entry(void) entry = proc_mkdir("fs/nfs", NULL); if (!entry) return -ENOMEM; - entry = proc_create("exports", 0, entry, &exports_operations); + entry = proc_create("exports", 0, entry, + &exports_proc_operations); if (!entry) return -ENOMEM; return 0; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index be7af509930c..262df5ccbf59 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -652,7 +652,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) /* Check whether we have this call in the cache. */ switch (nfsd_cache_lookup(rqstp)) { - case RC_INTR: case RC_DROPIT: return 0; case RC_REPLY: @@ -703,8 +702,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) int nfsd_pool_stats_open(struct inode *inode, struct file *file) { int ret; - struct net *net = &init_net; - struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id); mutex_lock(&nfsd_mutex); if (nn->nfsd_serv == NULL) { @@ -721,7 +719,7 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file) int nfsd_pool_stats_release(struct inode *inode, struct file *file) { int ret = seq_release(inode, file); - struct net *net = &init_net; + struct net *net = inode->i_sb->s_fs_info; mutex_lock(&nfsd_mutex); /* this function really, really should have been called svc_put() */ diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 0889bfb43dc9..546f8983ecf1 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -563,7 +563,7 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); __be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, - struct dentry *dentry, __be32 *buffer, int *countp, + struct dentry *dentry, __be32 **buffer, int countp, u32 *bmval, struct svc_rqst *, int ignore_crossmnt); extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *, diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 6baadb5a8430..4bb21d67d9b1 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -52,7 +52,6 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt) void __fsnotify_update_child_dentry_flags(struct inode *inode) { struct dentry *alias; - struct hlist_node *p; int watched; if (!S_ISDIR(inode->i_mode)) @@ -64,7 +63,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) spin_lock(&inode->i_lock); /* run all of the dentries associated with this inode. Since this is a * directory, there damn well better only be one item on this list */ - hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { struct dentry *child; /* run all of the children of the original inode and fix their diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index f31e90fc050d..74825be65b7b 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -36,12 +36,11 @@ static void fsnotify_recalc_inode_mask_locked(struct inode *inode) { struct fsnotify_mark *mark; - struct hlist_node *pos; __u32 new_mask = 0; assert_spin_locked(&inode->i_lock); - hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list) + hlist_for_each_entry(mark, &inode->i_fsnotify_marks, i.i_list) new_mask |= mark->mask; inode->i_fsnotify_mask = new_mask; } @@ -87,11 +86,11 @@ void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark) void fsnotify_clear_marks_by_inode(struct inode *inode) { struct fsnotify_mark *mark, *lmark; - struct hlist_node *pos, *n; + struct hlist_node *n; LIST_HEAD(free_list); spin_lock(&inode->i_lock); - hlist_for_each_entry_safe(mark, pos, n, &inode->i_fsnotify_marks, i.i_list) { + hlist_for_each_entry_safe(mark, n, &inode->i_fsnotify_marks, i.i_list) { list_add(&mark->i.free_i_list, &free_list); hlist_del_init_rcu(&mark->i.i_list); fsnotify_get_mark(mark); @@ -129,11 +128,10 @@ static struct fsnotify_mark *fsnotify_find_inode_mark_locked( struct inode *inode) { struct fsnotify_mark *mark; - struct hlist_node *pos; assert_spin_locked(&inode->i_lock); - hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list) { + hlist_for_each_entry(mark, &inode->i_fsnotify_marks, i.i_list) { if (mark->group == group) { fsnotify_get_mark(mark); return mark; @@ -194,8 +192,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, struct inode *inode, int allow_dups) { - struct fsnotify_mark *lmark; - struct hlist_node *node, *last = NULL; + struct fsnotify_mark *lmark, *last = NULL; int ret = 0; mark->flags |= FSNOTIFY_MARK_FLAG_INODE; @@ -214,8 +211,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, } /* should mark be in the middle of the current list? */ - hlist_for_each_entry(lmark, node, &inode->i_fsnotify_marks, i.i_list) { - last = node; + hlist_for_each_entry(lmark, &inode->i_fsnotify_marks, i.i_list) { + last = lmark; if ((lmark->group == group) && !allow_dups) { ret = -EEXIST; @@ -235,7 +232,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, BUG_ON(last == NULL); /* mark should be the last entry. last is the current last entry */ - hlist_add_after_rcu(last, &mark->i.i_list); + hlist_add_after_rcu(&last->i.i_list, &mark->i.i_list); out: fsnotify_recalc_inode_mask_locked(inode); spin_unlock(&inode->i_lock); diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 871569c7d609..4216308b81b4 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -197,7 +197,6 @@ static void inotify_free_group_priv(struct fsnotify_group *group) { /* ideally the idr is empty and we won't hit the BUG in the callback */ idr_for_each(&group->inotify_data.idr, idr_callback, group); - idr_remove_all(&group->inotify_data.idr); idr_destroy(&group->inotify_data.idr); atomic_dec(&group->inotify_data.user->inotify_devs); free_uid(group->inotify_data.user); diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 07f7a92fe88e..e0f7c1241a6a 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -364,22 +364,20 @@ static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock, { int ret; - do { - if (unlikely(!idr_pre_get(idr, GFP_KERNEL))) - return -ENOMEM; + idr_preload(GFP_KERNEL); + spin_lock(idr_lock); - spin_lock(idr_lock); - ret = idr_get_new_above(idr, i_mark, *last_wd + 1, - &i_mark->wd); + ret = idr_alloc(idr, i_mark, *last_wd + 1, 0, GFP_NOWAIT); + if (ret >= 0) { /* we added the mark to the idr, take a reference */ - if (!ret) { - *last_wd = i_mark->wd; - fsnotify_get_mark(&i_mark->fsn_mark); - } - spin_unlock(idr_lock); - } while (ret == -EAGAIN); + i_mark->wd = ret; + *last_wd = i_mark->wd; + fsnotify_get_mark(&i_mark->fsn_mark); + } - return ret; + spin_unlock(idr_lock); + idr_preload_end(); + return ret < 0 ? ret : 0; } static struct inotify_inode_mark *inotify_idr_find_locked(struct fsnotify_group *group, diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c index 4df58b8ea64a..68ca5a8704b5 100644 --- a/fs/notify/vfsmount_mark.c +++ b/fs/notify/vfsmount_mark.c @@ -33,12 +33,12 @@ void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) { struct fsnotify_mark *mark, *lmark; - struct hlist_node *pos, *n; + struct hlist_node *n; struct mount *m = real_mount(mnt); LIST_HEAD(free_list); spin_lock(&mnt->mnt_root->d_lock); - hlist_for_each_entry_safe(mark, pos, n, &m->mnt_fsnotify_marks, m.m_list) { + hlist_for_each_entry_safe(mark, n, &m->mnt_fsnotify_marks, m.m_list) { list_add(&mark->m.free_m_list, &free_list); hlist_del_init_rcu(&mark->m.m_list); fsnotify_get_mark(mark); @@ -71,12 +71,11 @@ static void fsnotify_recalc_vfsmount_mask_locked(struct vfsmount *mnt) { struct mount *m = real_mount(mnt); struct fsnotify_mark *mark; - struct hlist_node *pos; __u32 new_mask = 0; assert_spin_locked(&mnt->mnt_root->d_lock); - hlist_for_each_entry(mark, pos, &m->mnt_fsnotify_marks, m.m_list) + hlist_for_each_entry(mark, &m->mnt_fsnotify_marks, m.m_list) new_mask |= mark->mask; m->mnt_fsnotify_mask = new_mask; } @@ -114,11 +113,10 @@ static struct fsnotify_mark *fsnotify_find_vfsmount_mark_locked(struct fsnotify_ { struct mount *m = real_mount(mnt); struct fsnotify_mark *mark; - struct hlist_node *pos; assert_spin_locked(&mnt->mnt_root->d_lock); - hlist_for_each_entry(mark, pos, &m->mnt_fsnotify_marks, m.m_list) { + hlist_for_each_entry(mark, &m->mnt_fsnotify_marks, m.m_list) { if (mark->group == group) { fsnotify_get_mark(mark); return mark; @@ -153,8 +151,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, int allow_dups) { struct mount *m = real_mount(mnt); - struct fsnotify_mark *lmark; - struct hlist_node *node, *last = NULL; + struct fsnotify_mark *lmark, *last = NULL; int ret = 0; mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; @@ -173,8 +170,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, } /* should mark be in the middle of the current list? */ - hlist_for_each_entry(lmark, node, &m->mnt_fsnotify_marks, m.m_list) { - last = node; + hlist_for_each_entry(lmark, &m->mnt_fsnotify_marks, m.m_list) { + last = lmark; if ((lmark->group == group) && !allow_dups) { ret = -EEXIST; @@ -194,7 +191,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, BUG_ON(last == NULL); /* mark should be the last entry. last is the current last entry */ - hlist_add_after_rcu(last, &mark->m.m_list); + hlist_add_after_rcu(&last->m.m_list, &mark->m.m_list); out: fsnotify_recalc_vfsmount_mask_locked(mnt); spin_unlock(&mnt->mnt_root->d_lock); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 0d2bf566e39a..aa88bd8bcedc 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -304,28 +304,22 @@ static u8 o2net_num_from_nn(struct o2net_node *nn) static int o2net_prep_nsw(struct o2net_node *nn, struct o2net_status_wait *nsw) { - int ret = 0; - - do { - if (!idr_pre_get(&nn->nn_status_idr, GFP_ATOMIC)) { - ret = -EAGAIN; - break; - } - spin_lock(&nn->nn_lock); - ret = idr_get_new(&nn->nn_status_idr, nsw, &nsw->ns_id); - if (ret == 0) - list_add_tail(&nsw->ns_node_item, - &nn->nn_status_list); - spin_unlock(&nn->nn_lock); - } while (ret == -EAGAIN); + int ret; - if (ret == 0) { - init_waitqueue_head(&nsw->ns_wq); - nsw->ns_sys_status = O2NET_ERR_NONE; - nsw->ns_status = 0; + spin_lock(&nn->nn_lock); + ret = idr_alloc(&nn->nn_status_idr, nsw, 0, 0, GFP_ATOMIC); + if (ret >= 0) { + nsw->ns_id = ret; + list_add_tail(&nsw->ns_node_item, &nn->nn_status_list); } + spin_unlock(&nn->nn_lock); + if (ret < 0) + return ret; - return ret; + init_waitqueue_head(&nsw->ns_wq); + nsw->ns_sys_status = O2NET_ERR_NONE; + nsw->ns_status = 0; + return 0; } static void o2net_complete_nsw_locked(struct o2net_node *nn, diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 8db4b58b2e4b..ef999729e274 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -169,11 +169,10 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, int skip_unhashed) { - struct hlist_node *p; struct dentry *dentry; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { trace_ocfs2_find_local_alias(dentry->d_name.len, diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 01ebfd0bdad7..eeac97bb3bfa 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2083,7 +2083,6 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, u8 dead_node, u8 new_master) { int i; - struct hlist_node *hash_iter; struct hlist_head *bucket; struct dlm_lock_resource *res, *next; @@ -2114,7 +2113,7 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, * if necessary */ for (i = 0; i < DLM_HASH_BUCKETS; i++) { bucket = dlm_lockres_hash(dlm, i); - hlist_for_each_entry(res, hash_iter, bucket, hash_node) { + hlist_for_each_entry(res, bucket, hash_node) { if (!(res->state & DLM_LOCK_RES_RECOVERING)) continue; @@ -2273,7 +2272,6 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) { - struct hlist_node *iter; struct dlm_lock_resource *res; int i; struct hlist_head *bucket; @@ -2299,7 +2297,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) */ for (i = 0; i < DLM_HASH_BUCKETS; i++) { bucket = dlm_lockres_hash(dlm, i); - hlist_for_each_entry(res, iter, bucket, hash_node) { + hlist_for_each_entry(res, bucket, hash_node) { /* always prune any $RECOVERY entries for dead nodes, * otherwise hangs can occur during later recovery */ if (dlm_is_recovery_lock(res->lockname.name, diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index f169da4624fd..b7e74b580c0f 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -642,7 +642,7 @@ ocfs2_block_group_alloc_discontig(handle_t *handle, * cluster groups will be staying in cache for the duration of * this operation. */ - ac->ac_allow_chain_relink = 0; + ac->ac_disable_chain_relink = 1; /* Claim the first region */ status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits, @@ -1823,7 +1823,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, * Do this *after* figuring out how many bits we're taking out * of our target group. */ - if (ac->ac_allow_chain_relink && + if (!ac->ac_disable_chain_relink && (prev_group_bh) && (ocfs2_block_group_reasonably_empty(bg, res->sr_bits))) { status = ocfs2_relink_block_group(handle, alloc_inode, @@ -1928,7 +1928,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, victim = ocfs2_find_victim_chain(cl); ac->ac_chain = victim; - ac->ac_allow_chain_relink = 1; status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, res, &bits_left); @@ -1947,7 +1946,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, * searching each chain in order. Don't allow chain relinking * because we only calculate enough journal credits for one * relink per alloc. */ - ac->ac_allow_chain_relink = 0; + ac->ac_disable_chain_relink = 1; for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) { if (i == victim) continue; diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index b8afabfeede4..a36d0aa50911 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -49,7 +49,7 @@ struct ocfs2_alloc_context { /* these are used by the chain search */ u16 ac_chain; - int ac_allow_chain_relink; + int ac_disable_chain_relink; group_search_t *ac_group_search; u64 ac_last_group; diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 0ba9ea1e7961..2e3ea308c144 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -7189,7 +7189,7 @@ int ocfs2_init_security_and_acl(struct inode *dir, struct buffer_head *dir_bh = NULL; ret = ocfs2_init_security_get(inode, dir, qstr, NULL); - if (!ret) { + if (ret) { mlog_errno(ret); goto leave; } diff --git a/fs/proc/base.c b/fs/proc/base.c index f3b133d79914..69078c7cef1f 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -73,6 +73,7 @@ #include <linux/security.h> #include <linux/ptrace.h> #include <linux/tracehook.h> +#include <linux/printk.h> #include <linux/cgroup.h> #include <linux/cpuset.h> #include <linux/audit.h> @@ -952,7 +953,7 @@ static ssize_t oom_adj_write(struct file *file, const char __user *buf, * /proc/pid/oom_adj is provided for legacy purposes, ask users to use * /proc/pid/oom_score_adj instead. */ - printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", + pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", current->comm, task_pid_nr(current), task_pid_nr(task), task_pid_nr(task)); diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 2983dc52ca25..4b3b3ffb52f1 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -15,6 +15,7 @@ #include <linux/mm.h> #include <linux/module.h> #include <linux/slab.h> +#include <linux/printk.h> #include <linux/mount.h> #include <linux/init.h> #include <linux/idr.h> @@ -132,11 +133,8 @@ __proc_file_read(struct file *file, char __user *buf, size_t nbytes, } if (start == NULL) { - if (n > PAGE_SIZE) { - printk(KERN_ERR - "proc_file_read: Apparent buffer overflow!\n"); + if (n > PAGE_SIZE) /* Apparent buffer overflow */ n = PAGE_SIZE; - } n -= *ppos; if (n <= 0) break; @@ -144,26 +142,19 @@ __proc_file_read(struct file *file, char __user *buf, size_t nbytes, n = count; start = page + *ppos; } else if (start < page) { - if (n > PAGE_SIZE) { - printk(KERN_ERR - "proc_file_read: Apparent buffer overflow!\n"); + if (n > PAGE_SIZE) /* Apparent buffer overflow */ n = PAGE_SIZE; - } if (n > count) { /* * Don't reduce n because doing so might * cut off part of a data block. */ - printk(KERN_WARNING - "proc_file_read: Read count exceeded\n"); + pr_warn("proc_file_read: count exceeded\n"); } } else /* start >= page */ { unsigned long startoff = (unsigned long)(start - page); - if (n > (PAGE_SIZE - startoff)) { - printk(KERN_ERR - "proc_file_read: Apparent buffer overflow!\n"); + if (n > (PAGE_SIZE - startoff)) /* buffer overflow? */ n = PAGE_SIZE - startoff; - } if (n > count) n = count; } @@ -569,7 +560,7 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp for (tmp = dir->subdir; tmp; tmp = tmp->next) if (strcmp(tmp->name, dp->name) == 0) { - WARN(1, KERN_WARNING "proc_dir_entry '%s/%s' already registered\n", + WARN(1, "proc_dir_entry '%s/%s' already registered\n", dir->name, dp->name); break; } @@ -830,9 +821,9 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) if (S_ISDIR(de->mode)) parent->nlink--; de->nlink = 0; - WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory " - "'%s/%s', leaking at least '%s'\n", __func__, - de->parent->name, de->name, de->subdir->name); + WARN(de->subdir, "%s: removing non-empty directory " + "'%s/%s', leaking at least '%s'\n", __func__, + de->parent->name, de->name, de->subdir->name); pde_put(de); } EXPORT_SYMBOL(remove_proc_entry); diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 70322e1a4f0f..a86aebc9ba7c 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -13,6 +13,7 @@ #include <linux/stat.h> #include <linux/completion.h> #include <linux/poll.h> +#include <linux/printk.h> #include <linux/file.h> #include <linux/limits.h> #include <linux/init.h> @@ -495,13 +496,13 @@ int proc_fill_super(struct super_block *s) pde_get(&proc_root); root_inode = proc_get_inode(s, &proc_root); if (!root_inode) { - printk(KERN_ERR "proc_fill_super: get root inode failed\n"); + pr_err("proc_fill_super: get root inode failed\n"); return -ENOMEM; } s->s_root = d_make_root(root_inode); if (!s->s_root) { - printk(KERN_ERR "proc_fill_super: allocate dentry failed\n"); + pr_err("proc_fill_super: allocate dentry failed\n"); return -ENOMEM; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 252544c05207..85ff3a4598b3 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -11,6 +11,7 @@ #include <linux/sched.h> #include <linux/proc_fs.h> +#include <linux/binfmts.h> struct ctl_table_header; struct mempolicy; @@ -108,7 +109,7 @@ static inline int task_dumpable(struct task_struct *task) if (mm) dumpable = get_dumpable(mm); task_unlock(task); - if (dumpable == SUID_DUMPABLE_ENABLED) + if (dumpable == SUID_DUMP_USER) return 1; return 0; } diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index e96d4f18ca3a..eda6f017f272 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -17,6 +17,7 @@ #include <linux/elfcore.h> #include <linux/vmalloc.h> #include <linux/highmem.h> +#include <linux/printk.h> #include <linux/bootmem.h> #include <linux/init.h> #include <linux/slab.h> @@ -619,7 +620,7 @@ static int __init proc_kcore_init(void) proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations); if (!proc_root_kcore) { - printk(KERN_ERR "couldn't create /proc/kcore\n"); + pr_err("couldn't create /proc/kcore\n"); return 0; /* Always returns 0. */ } /* Store text area if it's special */ diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index de20ec480fa0..30b590f5bd35 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c @@ -8,6 +8,7 @@ #include <linux/time.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/printk.h> #include <linux/stat.h> #include <linux/string.h> #include <linux/of.h> @@ -110,8 +111,8 @@ void proc_device_tree_update_prop(struct proc_dir_entry *pde, if (ent->data == oldprop) break; if (ent == NULL) { - printk(KERN_WARNING "device-tree: property \"%s\" " - " does not exist\n", oldprop->name); + pr_warn("device-tree: property \"%s\" does not exist\n", + oldprop->name); } else { ent->data = newprop; ent->size = newprop->length; @@ -153,8 +154,8 @@ static const char *fixup_name(struct device_node *np, struct proc_dir_entry *de, realloc: fixed_name = kmalloc(fixup_len, GFP_KERNEL); if (fixed_name == NULL) { - printk(KERN_ERR "device-tree: Out of memory trying to fixup " - "name \"%s\"\n", name); + pr_err("device-tree: Out of memory trying to fixup " + "name \"%s\"\n", name); return name; } @@ -175,8 +176,8 @@ retry: goto retry; } - printk(KERN_WARNING "device-tree: Duplicate name in %s, " - "renamed to \"%s\"\n", np->full_name, fixed_name); + pr_warn("device-tree: Duplicate name in %s, renamed to \"%s\"\n", + np->full_name, fixed_name); return fixed_name; } diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 612df79cc6a1..ac05f33a0dde 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -5,6 +5,7 @@ #include <linux/sysctl.h> #include <linux/poll.h> #include <linux/proc_fs.h> +#include <linux/printk.h> #include <linux/security.h> #include <linux/sched.h> #include <linux/namei.h> @@ -57,7 +58,7 @@ static void sysctl_print_dir(struct ctl_dir *dir) { if (dir->header.parent) sysctl_print_dir(dir->header.parent); - printk(KERN_CONT "%s/", dir->header.ctl_table[0].procname); + pr_cont("%s/", dir->header.ctl_table[0].procname); } static int namecmp(const char *name1, int len1, const char *name2, int len2) @@ -134,9 +135,9 @@ static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry) else if (cmp > 0) p = &(*p)->rb_right; else { - printk(KERN_ERR "sysctl duplicate entry: "); + pr_err("sysctl duplicate entry: "); sysctl_print_dir(head->parent); - printk(KERN_CONT "/%s\n", entry->procname); + pr_cont("/%s\n", entry->procname); return -EEXIST; } } @@ -927,9 +928,9 @@ found: subdir->header.nreg++; failed: if (unlikely(IS_ERR(subdir))) { - printk(KERN_ERR "sysctl could not get directory: "); + pr_err("sysctl could not get directory: "); sysctl_print_dir(dir); - printk(KERN_CONT "/%*.*s %ld\n", + pr_cont("/%*.*s %ld\n", namelen, namelen, name, PTR_ERR(subdir)); } drop_sysctl_table(&dir->header); @@ -995,8 +996,8 @@ static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...) vaf.fmt = fmt; vaf.va = &args; - printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n", - path, table->procname, &vaf); + pr_err("sysctl table check failed: %s/%s %pV\n", + path, table->procname, &vaf); va_end(args); return -EINVAL; @@ -1510,9 +1511,9 @@ static void put_links(struct ctl_table_header *header) drop_sysctl_table(link_head); } else { - printk(KERN_ERR "sysctl link missing during unregister: "); + pr_err("sysctl link missing during unregister: "); sysctl_print_dir(parent); - printk(KERN_CONT "/%s\n", name); + pr_cont("/%s\n", name); } } } diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 0d5071d29985..b870f740ab5a 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -15,6 +15,7 @@ #include <linux/export.h> #include <linux/slab.h> #include <linux/highmem.h> +#include <linux/printk.h> #include <linux/bootmem.h> #include <linux/init.h> #include <linux/crash_dump.h> @@ -175,15 +176,15 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, start = map_offset_to_paddr(*fpos, &vmcore_list, &curr_m); if (!curr_m) return -EINVAL; - if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen) - tsz = buflen; - - /* Calculate left bytes in current memory segment. */ - nr_bytes = (curr_m->size - (start - curr_m->paddr)); - if (tsz > nr_bytes) - tsz = nr_bytes; while (buflen) { + tsz = min_t(size_t, buflen, PAGE_SIZE - (start & ~PAGE_MASK)); + + /* Calculate left bytes in current memory segment. */ + nr_bytes = (curr_m->size - (start - curr_m->paddr)); + if (tsz > nr_bytes) + tsz = nr_bytes; + tmp = read_from_oldmem(buffer, tsz, &start, 1); if (tmp < 0) return tmp; @@ -198,12 +199,6 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, struct vmcore, list); start = curr_m->paddr; } - if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen) - tsz = buflen; - /* Calculate left bytes in current memory segment. */ - nr_bytes = (curr_m->size - (start - curr_m->paddr)); - if (tsz > nr_bytes) - tsz = nr_bytes; } return acc; } @@ -553,8 +548,7 @@ static int __init parse_crash_elf64_headers(void) ehdr.e_ehsize != sizeof(Elf64_Ehdr) || ehdr.e_phentsize != sizeof(Elf64_Phdr) || ehdr.e_phnum == 0) { - printk(KERN_WARNING "Warning: Core image elf header is not" - "sane\n"); + pr_warn("Warning: Core image elf header is not sane\n"); return -EINVAL; } @@ -609,8 +603,7 @@ static int __init parse_crash_elf32_headers(void) ehdr.e_ehsize != sizeof(Elf32_Ehdr) || ehdr.e_phentsize != sizeof(Elf32_Phdr) || ehdr.e_phnum == 0) { - printk(KERN_WARNING "Warning: Core image elf header is not" - "sane\n"); + pr_warn("Warning: Core image elf header is not sane\n"); return -EINVAL; } @@ -653,8 +646,7 @@ static int __init parse_crash_elf_headers(void) if (rc < 0) return rc; if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) { - printk(KERN_WARNING "Warning: Core image elf header" - " not found\n"); + pr_warn("Warning: Core image elf header not found\n"); return -EINVAL; } @@ -673,8 +665,7 @@ static int __init parse_crash_elf_headers(void) /* Determine vmcore size. */ vmcore_size = get_vmcore_size_elf32(elfcorebuf); } else { - printk(KERN_WARNING "Warning: Core image elf header is not" - " sane\n"); + pr_warn("Warning: Core image elf header is not sane\n"); return -EINVAL; } return 0; @@ -690,7 +681,7 @@ static int __init vmcore_init(void) return rc; rc = parse_crash_elf_headers(); if (rc) { - printk(KERN_WARNING "Kdump: vmcore not initialized\n"); + pr_warn("Kdump: vmcore not initialized\n"); return rc; } diff --git a/fs/seq_file.c b/fs/seq_file.c index f2bc3dfd0b88..15c6304bab71 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -308,27 +308,27 @@ loff_t seq_lseek(struct file *file, loff_t offset, int whence) mutex_lock(&m->lock); m->version = file->f_version; switch (whence) { - case 1: - offset += file->f_pos; - case 0: - if (offset < 0) - break; - retval = offset; - if (offset != m->read_pos) { - while ((retval=traverse(m, offset)) == -EAGAIN) - ; - if (retval) { - /* with extreme prejudice... */ - file->f_pos = 0; - m->read_pos = 0; - m->version = 0; - m->index = 0; - m->count = 0; - } else { - m->read_pos = offset; - retval = file->f_pos = offset; - } + case SEEK_CUR: + offset += file->f_pos; + case SEEK_SET: + if (offset < 0) + break; + retval = offset; + if (offset != m->read_pos) { + while ((retval = traverse(m, offset)) == -EAGAIN) + ; + if (retval) { + /* with extreme prejudice... */ + file->f_pos = 0; + m->read_pos = 0; + m->version = 0; + m->index = 0; + m->count = 0; + } else { + m->read_pos = offset; + retval = file->f_pos = offset; } + } } file->f_version = m->version; mutex_unlock(&m->lock); diff --git a/fs/super.c b/fs/super.c index 12f123712161..7465d4364208 100644 --- a/fs/super.c +++ b/fs/super.c @@ -447,14 +447,13 @@ struct super_block *sget(struct file_system_type *type, void *data) { struct super_block *s = NULL; - struct hlist_node *node; struct super_block *old; int err; retry: spin_lock(&sb_lock); if (test) { - hlist_for_each_entry(old, node, &type->fs_supers, s_instances) { + hlist_for_each_entry(old, &type->fs_supers, s_instances) { if (!test(old, data)) continue; if (!grab_super(old)) @@ -554,10 +553,9 @@ void iterate_supers_type(struct file_system_type *type, void (*f)(struct super_block *, void *), void *arg) { struct super_block *sb, *p = NULL; - struct hlist_node *node; spin_lock(&sb_lock); - hlist_for_each_entry(sb, node, &type->fs_supers, s_instances) { + hlist_for_each_entry(sb, &type->fs_supers, s_instances) { sb->s_count++; spin_unlock(&sb_lock); @@ -842,7 +840,7 @@ int get_anon_bdev(dev_t *p) else if (error) return -EAGAIN; - if ((dev & MAX_IDR_MASK) == (1 << MINORBITS)) { + if (dev == (1 << MINORBITS)) { spin_lock(&unnamed_dev_lock); ida_remove(&unnamed_dev_ida, dev); if (unnamed_dev_start > dev) diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 2ce9a5db6ab5..15c68f9489ae 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -461,14 +461,13 @@ const struct file_operations bin_fops = { void unmap_bin_file(struct sysfs_dirent *attr_sd) { struct bin_buffer *bb; - struct hlist_node *tmp; if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR) return; mutex_lock(&sysfs_bin_lock); - hlist_for_each_entry(bb, tmp, &attr_sd->s_bin_attr.buffers, list) { + hlist_for_each_entry(bb, &attr_sd->s_bin_attr.buffers, list) { struct inode *inode = file_inode(bb->file); unmap_mapping_range(inode->i_mapping, 0, 0, 1); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 96fcbb85ff83..d1dba7ce75ae 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1442,9 +1442,8 @@ xlog_recover_find_tid( xlog_tid_t tid) { xlog_recover_t *trans; - struct hlist_node *n; - hlist_for_each_entry(trans, n, head, r_list) { + hlist_for_each_entry(trans, head, r_list) { if (trans->r_log_tid == tid) return trans; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f94bc83011ed..78feda9bbae2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -19,6 +19,7 @@ #include <linux/gfp.h> #include <linux/bsg.h> #include <linux/smp.h> +#include <linux/rcupdate.h> #include <asm/scatterlist.h> @@ -437,6 +438,7 @@ struct request_queue { /* Throttle data */ struct throtl_data *td; #endif + struct rcu_head rcu_head; }; #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ @@ -974,7 +976,6 @@ struct blk_plug { unsigned long magic; /* detect uninitialized use-cases */ struct list_head list; /* requests */ struct list_head cb_list; /* md requires an unplug callback */ - unsigned int should_sort; /* list to be sorted before flushing? */ }; #define BLK_MAX_REQUEST_COUNT 16 diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 7c2e030e72f1..0ea61e07a91c 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -12,6 +12,7 @@ struct blk_trace { int trace_state; + bool rq_based; struct rchan *rchan; unsigned long __percpu *sequence; unsigned char __percpu *msg_data; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 458f497738a4..5afc4f94d110 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -126,7 +126,6 @@ BUFFER_FNS(Write_EIO, write_io_error) BUFFER_FNS(Unwritten, unwritten) #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) -#define touch_buffer(bh) mark_page_accessed(bh->b_page) /* If we *know* page->private refers to buffer_heads */ #define page_buffers(page) \ @@ -142,6 +141,7 @@ BUFFER_FNS(Unwritten, unwritten) void mark_buffer_dirty(struct buffer_head *bh); void init_buffer(struct buffer_head *, bh_end_io_t *, void *); +void touch_buffer(struct buffer_head *bh); void set_bh_page(struct buffer_head *bh, struct page *page, unsigned long offset); int try_to_free_buffers(struct page *); diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index dad579b0c0e6..76554cecaab2 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -12,16 +12,46 @@ #define CEPH_FEATURE_MONNAMES (1<<5) #define CEPH_FEATURE_RECONNECT_SEQ (1<<6) #define CEPH_FEATURE_DIRLAYOUTHASH (1<<7) -/* bits 8-17 defined by user-space; not supported yet here */ +#define CEPH_FEATURE_OBJECTLOCATOR (1<<8) +#define CEPH_FEATURE_PGID64 (1<<9) +#define CEPH_FEATURE_INCSUBOSDMAP (1<<10) +#define CEPH_FEATURE_PGPOOL3 (1<<11) +#define CEPH_FEATURE_OSDREPLYMUX (1<<12) +#define CEPH_FEATURE_OSDENC (1<<13) +#define CEPH_FEATURE_OMAP (1<<14) +#define CEPH_FEATURE_MONENC (1<<15) +#define CEPH_FEATURE_QUERY_T (1<<16) +#define CEPH_FEATURE_INDEP_PG_MAP (1<<17) #define CEPH_FEATURE_CRUSH_TUNABLES (1<<18) +#define CEPH_FEATURE_CHUNKY_SCRUB (1<<19) +#define CEPH_FEATURE_MON_NULLROUTE (1<<20) +#define CEPH_FEATURE_MON_GV (1<<21) +#define CEPH_FEATURE_BACKFILL_RESERVATION (1<<22) +#define CEPH_FEATURE_MSG_AUTH (1<<23) +#define CEPH_FEATURE_RECOVERY_RESERVATION (1<<24) +#define CEPH_FEATURE_CRUSH_TUNABLES2 (1<<25) +#define CEPH_FEATURE_CREATEPOOLID (1<<26) +#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27) +#define CEPH_FEATURE_OSD_HBMSGS (1<<28) +#define CEPH_FEATURE_MDSENC (1<<29) +#define CEPH_FEATURE_OSDHASHPSPOOL (1<<30) /* * Features supported. */ #define CEPH_FEATURES_SUPPORTED_DEFAULT \ - (CEPH_FEATURE_NOSRCADDR | \ - CEPH_FEATURE_CRUSH_TUNABLES) + (CEPH_FEATURE_NOSRCADDR | \ + CEPH_FEATURE_PGID64 | \ + CEPH_FEATURE_PGPOOL3 | \ + CEPH_FEATURE_OSDENC | \ + CEPH_FEATURE_CRUSH_TUNABLES | \ + CEPH_FEATURE_CRUSH_TUNABLES2 | \ + CEPH_FEATURE_REPLY_CREATE_INODE | \ + CEPH_FEATURE_OSDHASHPSPOOL) #define CEPH_FEATURES_REQUIRED_DEFAULT \ - (CEPH_FEATURE_NOSRCADDR) + (CEPH_FEATURE_NOSRCADDR | \ + CEPH_FEATURE_PGID64 | \ + CEPH_FEATURE_PGPOOL3 | \ + CEPH_FEATURE_OSDENC) #endif diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index cf6f4d998a76..2ad7b860f062 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -21,16 +21,14 @@ * internal cluster protocols separately from the public, * client-facing protocol. */ -#define CEPH_OSD_PROTOCOL 8 /* cluster internal */ -#define CEPH_MDS_PROTOCOL 12 /* cluster internal */ -#define CEPH_MON_PROTOCOL 5 /* cluster internal */ #define CEPH_OSDC_PROTOCOL 24 /* server/client */ #define CEPH_MDSC_PROTOCOL 32 /* server/client */ #define CEPH_MONC_PROTOCOL 15 /* server/client */ -#define CEPH_INO_ROOT 1 -#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ +#define CEPH_INO_ROOT 1 +#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ +#define CEPH_INO_DOTDOT 3 /* used by ceph fuse for parent (..) */ /* arbitrary limit on max # of monitors (cluster of 3 is typical) */ #define CEPH_MAX_MON 31 @@ -51,7 +49,7 @@ struct ceph_file_layout { __le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */ /* object -> pg layout */ - __le32 fl_unused; /* unused; used to be preferred primary (-1) */ + __le32 fl_unused; /* unused; used to be preferred primary for pg (-1 for none) */ __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */ } __attribute__ ((packed)); @@ -101,6 +99,8 @@ struct ceph_dir_layout { #define CEPH_MSG_MON_SUBSCRIBE_ACK 16 #define CEPH_MSG_AUTH 17 #define CEPH_MSG_AUTH_REPLY 18 +#define CEPH_MSG_MON_GET_VERSION 19 +#define CEPH_MSG_MON_GET_VERSION_REPLY 20 /* client <-> mds */ #define CEPH_MSG_MDS_MAP 21 @@ -221,6 +221,11 @@ struct ceph_mon_subscribe_ack { } __attribute__ ((packed)); /* + * mdsmap flags + */ +#define CEPH_MDSMAP_DOWN (1<<0) /* cluster deliberately down */ + +/* * mds states * > 0 -> in * <= 0 -> out @@ -233,6 +238,7 @@ struct ceph_mon_subscribe_ack { #define CEPH_MDS_STATE_CREATING -6 /* up, creating MDS instance. */ #define CEPH_MDS_STATE_STARTING -7 /* up, starting previously stopped mds */ #define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */ +#define CEPH_MDS_STATE_REPLAYONCE -9 /* up, replaying an active node's journal */ #define CEPH_MDS_STATE_REPLAY 8 /* up, replaying journal. */ #define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed @@ -264,6 +270,7 @@ extern const char *ceph_mds_state_name(int s); #define CEPH_LOCK_IXATTR 2048 #define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */ #define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ +#define CEPH_LOCK_IPOLICY 16384 /* policy lock on dirs. MDS internal */ /* client_session ops */ enum { @@ -338,6 +345,12 @@ extern const char *ceph_mds_op_name(int op); #define CEPH_SETATTR_SIZE 32 #define CEPH_SETATTR_CTIME 64 +/* + * Ceph setxattr request flags. + */ +#define CEPH_XATTR_CREATE 1 +#define CEPH_XATTR_REPLACE 2 + union ceph_mds_request_args { struct { __le32 mask; /* CEPH_CAP_* */ @@ -522,14 +535,17 @@ int ceph_flags_to_mode(int flags); #define CEPH_CAP_GWREXTEND 64 /* (file) client can extend EOF */ #define CEPH_CAP_GLAZYIO 128 /* (file) client can perform lazy io */ +#define CEPH_CAP_SIMPLE_BITS 2 +#define CEPH_CAP_FILE_BITS 8 + /* per-lock shift */ #define CEPH_CAP_SAUTH 2 #define CEPH_CAP_SLINK 4 #define CEPH_CAP_SXATTR 6 #define CEPH_CAP_SFILE 8 -#define CEPH_CAP_SFLOCK 20 +#define CEPH_CAP_SFLOCK 20 -#define CEPH_CAP_BITS 22 +#define CEPH_CAP_BITS 22 /* composed values */ #define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH) diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index 63d092822bad..360d9d08ca9e 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -52,10 +52,10 @@ static inline int ceph_has_room(void **p, void *end, size_t n) return end >= *p && n <= end - *p; } -#define ceph_decode_need(p, end, n, bad) \ - do { \ - if (!likely(ceph_has_room(p, end, n))) \ - goto bad; \ +#define ceph_decode_need(p, end, n, bad) \ + do { \ + if (!likely(ceph_has_room(p, end, n))) \ + goto bad; \ } while (0) #define ceph_decode_64_safe(p, end, v, bad) \ @@ -99,8 +99,8 @@ static inline int ceph_has_room(void **p, void *end, size_t n) * * There are two possible failures: * - converting the string would require accessing memory at or - * beyond the "end" pointer provided (-E - * - memory could not be allocated for the result + * beyond the "end" pointer provided (-ERANGE) + * - memory could not be allocated for the result (-ENOMEM) */ static inline char *ceph_extract_encoded_string(void **p, void *end, size_t *lenp, gfp_t gfp) @@ -217,10 +217,10 @@ static inline void ceph_encode_string(void **p, void *end, *p += len; } -#define ceph_encode_need(p, end, n, bad) \ - do { \ - if (!likely(ceph_has_room(p, end, n))) \ - goto bad; \ +#define ceph_encode_need(p, end, n, bad) \ + do { \ + if (!likely(ceph_has_room(p, end, n))) \ + goto bad; \ } while (0) #define ceph_encode_64_safe(p, end, v, bad) \ @@ -231,12 +231,17 @@ static inline void ceph_encode_string(void **p, void *end, #define ceph_encode_32_safe(p, end, v, bad) \ do { \ ceph_encode_need(p, end, sizeof(u32), bad); \ - ceph_encode_32(p, v); \ + ceph_encode_32(p, v); \ } while (0) #define ceph_encode_16_safe(p, end, v, bad) \ do { \ ceph_encode_need(p, end, sizeof(u16), bad); \ - ceph_encode_16(p, v); \ + ceph_encode_16(p, v); \ + } while (0) +#define ceph_encode_8_safe(p, end, v, bad) \ + do { \ + ceph_encode_need(p, end, sizeof(u8), bad); \ + ceph_encode_8(p, v); \ } while (0) #define ceph_encode_copy_safe(p, end, pv, n, bad) \ diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 084d3c622b12..29818fc3fa49 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -193,6 +193,8 @@ static inline int calc_pages_for(u64 off, u64 len) } /* ceph_common.c */ +extern bool libceph_compatible(void *data); + extern const char *ceph_msg_type_name(int type); extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); extern struct kmem_cache *ceph_inode_cachep; @@ -220,7 +222,7 @@ extern int ceph_open_session(struct ceph_client *client); /* pagevec.c */ extern void ceph_release_page_vector(struct page **pages, int num_pages); -extern struct page **ceph_get_direct_page_vector(const char __user *data, +extern struct page **ceph_get_direct_page_vector(const void __user *data, int num_pages, bool write_page); extern void ceph_put_page_vector(struct page **pages, int num_pages, @@ -228,15 +230,15 @@ extern void ceph_put_page_vector(struct page **pages, int num_pages, extern void ceph_release_page_vector(struct page **pages, int num_pages); extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_copy_user_to_page_vector(struct page **pages, - const char __user *data, + const void __user *data, loff_t off, size_t len); -extern int ceph_copy_to_page_vector(struct page **pages, - const char *data, +extern void ceph_copy_to_page_vector(struct page **pages, + const void *data, loff_t off, size_t len); -extern int ceph_copy_from_page_vector(struct page **pages, - char *data, +extern void ceph_copy_from_page_vector(struct page **pages, + void *data, loff_t off, size_t len); -extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data, +extern int ceph_copy_page_vector_to_user(struct page **pages, void __user *data, loff_t off, size_t len); extern void ceph_zero_page_vector_range(int off, int len, struct page **pages); diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index cb15b5d867c7..87ed09f54800 100644 --- a/include/linux/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h @@ -29,8 +29,8 @@ struct ceph_mdsmap { /* which object pools file data can be stored in */ int m_num_data_pg_pools; - u32 *m_data_pg_pools; - u32 m_cas_pg_pool; + u64 *m_data_pg_pools; + u64 m_cas_pg_pool; }; static inline struct ceph_entity_addr * diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 14ba5ee738a9..60903e0f665c 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -83,9 +83,11 @@ struct ceph_msg { struct list_head list_head; struct kref kref; +#ifdef CONFIG_BLOCK struct bio *bio; /* instead of pages/pagelist */ struct bio *bio_iter; /* bio iterator */ int bio_seg; /* current bio segment */ +#endif /* CONFIG_BLOCK */ struct ceph_pagelist *trail; /* the trailing part of the data */ bool front_is_vmalloc; bool more_to_follow; diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index d9b880e977e6..1dd5d466b6f9 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -10,6 +10,7 @@ #include <linux/ceph/osdmap.h> #include <linux/ceph/messenger.h> #include <linux/ceph/auth.h> +#include <linux/ceph/pagelist.h> /* * Maximum object name size @@ -22,7 +23,6 @@ struct ceph_snap_context; struct ceph_osd_request; struct ceph_osd_client; struct ceph_authorizer; -struct ceph_pagelist; /* * completion callback for async writepages @@ -47,6 +47,9 @@ struct ceph_osd { struct list_head o_keepalive_item; }; + +#define CEPH_OSD_MAX_OP 10 + /* an in-flight request */ struct ceph_osd_request { u64 r_tid; /* unique for this client */ @@ -63,9 +66,23 @@ struct ceph_osd_request { struct ceph_connection *r_con_filling_msg; struct ceph_msg *r_request, *r_reply; - int r_result; int r_flags; /* any additional flags for the osd */ u32 r_sent; /* >0 if r_request is sending/sent */ + int r_num_ops; + + /* encoded message content */ + struct ceph_osd_op *r_request_ops; + /* these are updated on each send */ + __le32 *r_request_osdmap_epoch; + __le32 *r_request_flags; + __le64 *r_request_pool; + void *r_request_pgid; + __le32 *r_request_attempts; + struct ceph_eversion *r_request_reassert_version; + + int r_result; + int r_reply_op_len[CEPH_OSD_MAX_OP]; + s32 r_reply_op_result[CEPH_OSD_MAX_OP]; int r_got_reply; int r_linger; @@ -82,6 +99,7 @@ struct ceph_osd_request { char r_oid[MAX_OBJ_NAME_SIZE]; /* object name */ int r_oid_len; + u64 r_snapid; unsigned long r_stamp; /* send OR check time */ struct ceph_file_layout r_file_layout; @@ -95,7 +113,7 @@ struct ceph_osd_request { struct bio *r_bio; /* instead of pages */ #endif - struct ceph_pagelist *r_trail; /* trailing part of the data */ + struct ceph_pagelist r_trail; /* trailing part of the data */ }; struct ceph_osd_event { @@ -107,7 +125,6 @@ struct ceph_osd_event { struct rb_node node; struct list_head osd_node; struct kref kref; - struct completion completion; }; struct ceph_osd_event_work { @@ -157,7 +174,7 @@ struct ceph_osd_client { struct ceph_osd_req_op { u16 op; /* CEPH_OSD_OP_* */ - u32 flags; /* CEPH_OSD_FLAG_* */ + u32 payload_len; union { struct { u64 offset, length; @@ -166,23 +183,24 @@ struct ceph_osd_req_op { } extent; struct { const char *name; - u32 name_len; const char *val; + u32 name_len; u32 value_len; __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ } xattr; struct { const char *class_name; - __u8 class_len; const char *method_name; - __u8 method_len; - __u8 argc; const char *indata; u32 indata_len; + __u8 class_len; + __u8 method_len; + __u8 argc; } cls; struct { - u64 cookie, count; + u64 cookie; + u64 count; } pgls; struct { u64 snapid; @@ -190,12 +208,11 @@ struct ceph_osd_req_op { struct { u64 cookie; u64 ver; - __u8 flag; u32 prot_ver; u32 timeout; + __u8 flag; } watch; }; - u32 payload_len; }; extern int ceph_osdc_init(struct ceph_osd_client *osdc, @@ -207,29 +224,19 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg); -extern int ceph_calc_raw_layout(struct ceph_osd_client *osdc, - struct ceph_file_layout *layout, - u64 snapid, - u64 off, u64 *plen, u64 *bno, - struct ceph_osd_request *req, - struct ceph_osd_req_op *op); - extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, - int flags, struct ceph_snap_context *snapc, - struct ceph_osd_req_op *ops, + unsigned int num_op, bool use_mempool, - gfp_t gfp_flags, - struct page **pages, - struct bio *bio); + gfp_t gfp_flags); extern void ceph_osdc_build_request(struct ceph_osd_request *req, - u64 off, u64 *plen, + u64 off, u64 len, + unsigned int num_op, struct ceph_osd_req_op *src_ops, struct ceph_snap_context *snapc, - struct timespec *mtime, - const char *oid, - int oid_len); + u64 snap_id, + struct timespec *mtime); extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, struct ceph_file_layout *layout, @@ -239,8 +246,7 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, int do_sync, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - bool use_mempool, int num_reply, - int page_align); + bool use_mempool, int page_align); extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, struct ceph_osd_request *req); @@ -279,17 +285,13 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, u64 off, u64 len, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - struct page **pages, int nr_pages, - int flags, int do_sync, bool nofail); + struct page **pages, int nr_pages); /* watch/notify events */ extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, void (*event_cb)(u64, u64, u8, void *), - int one_shot, void *data, - struct ceph_osd_event **pevent); + void *data, struct ceph_osd_event **pevent); extern void ceph_osdc_cancel_event(struct ceph_osd_event *event); -extern int ceph_osdc_wait_event(struct ceph_osd_event *event, - unsigned long timeout); extern void ceph_osdc_put_event(struct ceph_osd_event *event); #endif diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 10a417f9f76f..c819190d1642 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -18,14 +18,31 @@ * The map can be updated either via an incremental map (diff) describing * the change between two successive epochs, or as a fully encoded map. */ +struct ceph_pg { + uint64_t pool; + uint32_t seed; +}; + +#define CEPH_POOL_FLAG_HASHPSPOOL 1 + struct ceph_pg_pool_info { struct rb_node node; - int id; - struct ceph_pg_pool v; - int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask; + s64 id; + u8 type; + u8 size; + u8 crush_ruleset; + u8 object_hash; + u32 pg_num, pgp_num; + int pg_num_mask, pgp_num_mask; + u64 flags; char *name; }; +struct ceph_object_locator { + uint64_t pool; + char *key; +}; + struct ceph_pg_mapping { struct rb_node node; struct ceph_pg pgid; @@ -110,15 +127,16 @@ extern void ceph_osdmap_destroy(struct ceph_osdmap *map); /* calculate mapping of a file extent to an object */ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, - u64 off, u64 *plen, + u64 off, u64 len, u64 *bno, u64 *oxoff, u64 *oxlen); /* calculate mapping of object to a placement group */ -extern int ceph_calc_object_layout(struct ceph_object_layout *ol, +extern int ceph_calc_object_layout(struct ceph_pg *pg, const char *oid, struct ceph_file_layout *fl, struct ceph_osdmap *osdmap); -extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, +extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, + struct ceph_pg pgid, int *acting); extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid); diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 2c04afeead1c..68c96a508ac2 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -9,14 +9,6 @@ #include <linux/ceph/msgr.h> /* - * osdmap encoding versions - */ -#define CEPH_OSDMAP_INC_VERSION 5 -#define CEPH_OSDMAP_INC_VERSION_EXT 6 -#define CEPH_OSDMAP_VERSION 5 -#define CEPH_OSDMAP_VERSION_EXT 6 - -/* * fs id */ struct ceph_fsid { @@ -64,7 +56,7 @@ struct ceph_timespec { * placement group. * we encode this into one __le64. */ -struct ceph_pg { +struct ceph_pg_v1 { __le16 preferred; /* preferred primary osd */ __le16 ps; /* placement seed */ __le32 pool; /* object pool */ @@ -91,21 +83,6 @@ struct ceph_pg { #define CEPH_PG_TYPE_REP 1 #define CEPH_PG_TYPE_RAID4 2 -#define CEPH_PG_POOL_VERSION 2 -struct ceph_pg_pool { - __u8 type; /* CEPH_PG_TYPE_* */ - __u8 size; /* number of osds in each pg */ - __u8 crush_ruleset; /* crush placement rule */ - __u8 object_hash; /* hash mapping object name to ps */ - __le32 pg_num, pgp_num; /* number of pg's */ - __le32 lpg_num, lpgp_num; /* number of localized pg's */ - __le32 last_change; /* most recent epoch changed */ - __le64 snap_seq; /* seq for per-pool snapshot */ - __le32 snap_epoch; /* epoch of last snap */ - __le32 num_snaps; - __le32 num_removed_snap_intervals; /* if non-empty, NO per-pool snaps */ - __le64 auid; /* who owns the pg */ -} __attribute__ ((packed)); /* * stable_mod func is used to control number of placement groups. @@ -128,7 +105,7 @@ static inline int ceph_stable_mod(int x, int b, int bmask) * object layout - how a given object should be stored. */ struct ceph_object_layout { - struct ceph_pg ol_pgid; /* raw pg, with _full_ ps precision. */ + struct ceph_pg_v1 ol_pgid; /* raw pg, with _full_ ps precision. */ __le32 ol_stripe_unit; /* for per-object parity, if any */ } __attribute__ ((packed)); @@ -145,8 +122,12 @@ struct ceph_eversion { */ /* status bits */ -#define CEPH_OSD_EXISTS 1 -#define CEPH_OSD_UP 2 +#define CEPH_OSD_EXISTS (1<<0) +#define CEPH_OSD_UP (1<<1) +#define CEPH_OSD_AUTOOUT (1<<2) /* osd was automatically marked out */ +#define CEPH_OSD_NEW (1<<3) /* osd is new, never marked in */ + +extern const char *ceph_osd_state_name(int s); /* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */ #define CEPH_OSD_IN 0x10000 @@ -161,9 +142,25 @@ struct ceph_eversion { #define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ #define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ #define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */ +#define CEPH_OSDMAP_NOUP (1<<5) /* block osd boot */ +#define CEPH_OSDMAP_NODOWN (1<<6) /* block osd mark-down/failure */ +#define CEPH_OSDMAP_NOOUT (1<<7) /* block osd auto mark-out */ +#define CEPH_OSDMAP_NOIN (1<<8) /* block osd auto mark-in */ +#define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */ +#define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */ + +/* + * The error code to return when an OSD can't handle a write + * because it is too large. + */ +#define OSD_WRITETOOBIG EMSGSIZE /* * osd ops + * + * WARNING: do not use these op codes directly. Use the helpers + * defined below instead. In certain cases, op code behavior was + * redefined, resulting in special-cases in the helpers. */ #define CEPH_OSD_OP_MODE 0xf000 #define CEPH_OSD_OP_MODE_RD 0x1000 @@ -177,6 +174,7 @@ struct ceph_eversion { #define CEPH_OSD_OP_TYPE_ATTR 0x0300 #define CEPH_OSD_OP_TYPE_EXEC 0x0400 #define CEPH_OSD_OP_TYPE_PG 0x0500 +#define CEPH_OSD_OP_TYPE_MULTI 0x0600 /* multiobject */ enum { /** data **/ @@ -217,6 +215,23 @@ enum { CEPH_OSD_OP_WATCH = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15, + /* omap */ + CEPH_OSD_OP_OMAPGETKEYS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 17, + CEPH_OSD_OP_OMAPGETVALS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 18, + CEPH_OSD_OP_OMAPGETHEADER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 19, + CEPH_OSD_OP_OMAPGETVALSBYKEYS = + CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 20, + CEPH_OSD_OP_OMAPSETVALS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 21, + CEPH_OSD_OP_OMAPSETHEADER = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 22, + CEPH_OSD_OP_OMAPCLEAR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 23, + CEPH_OSD_OP_OMAPRMKEYS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24, + CEPH_OSD_OP_OMAP_CMP = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25, + + /** multi **/ + CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1, + CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2, + CEPH_OSD_OP_SRC_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 3, + /** attrs **/ /* read */ CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, @@ -238,6 +253,7 @@ enum { CEPH_OSD_OP_SCRUB_RESERVE = CEPH_OSD_OP_MODE_SUB | 6, CEPH_OSD_OP_SCRUB_UNRESERVE = CEPH_OSD_OP_MODE_SUB | 7, CEPH_OSD_OP_SCRUB_STOP = CEPH_OSD_OP_MODE_SUB | 8, + CEPH_OSD_OP_SCRUB_MAP = CEPH_OSD_OP_MODE_SUB | 9, /** lock **/ CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1, @@ -248,10 +264,12 @@ enum { CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6, /** exec **/ + /* note: the RD bit here is wrong; see special-case below in helper */ CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1, /** pg **/ CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1, + CEPH_OSD_OP_PGLS_FILTER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 2, }; static inline int ceph_osd_op_type_lock(int op) @@ -274,6 +292,10 @@ static inline int ceph_osd_op_type_pg(int op) { return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG; } +static inline int ceph_osd_op_type_multi(int op) +{ + return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_MULTI; +} static inline int ceph_osd_op_mode_subop(int op) { @@ -281,11 +303,12 @@ static inline int ceph_osd_op_mode_subop(int op) } static inline int ceph_osd_op_mode_read(int op) { - return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_RD; + return (op & CEPH_OSD_OP_MODE_RD) && + op != CEPH_OSD_OP_CALL; } static inline int ceph_osd_op_mode_modify(int op) { - return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; + return op & CEPH_OSD_OP_MODE_WR; } /* @@ -294,34 +317,38 @@ static inline int ceph_osd_op_mode_modify(int op) */ #define CEPH_OSD_TMAP_HDR 'h' #define CEPH_OSD_TMAP_SET 's' +#define CEPH_OSD_TMAP_CREATE 'c' /* create key */ #define CEPH_OSD_TMAP_RM 'r' +#define CEPH_OSD_TMAP_RMSLOPPY 'R' extern const char *ceph_osd_op_name(int op); - /* * osd op flags * * An op may be READ, WRITE, or READ|WRITE. */ enum { - CEPH_OSD_FLAG_ACK = 1, /* want (or is) "ack" ack */ - CEPH_OSD_FLAG_ONNVRAM = 2, /* want (or is) "onnvram" ack */ - CEPH_OSD_FLAG_ONDISK = 4, /* want (or is) "ondisk" ack */ - CEPH_OSD_FLAG_RETRY = 8, /* resend attempt */ - CEPH_OSD_FLAG_READ = 16, /* op may read */ - CEPH_OSD_FLAG_WRITE = 32, /* op may write */ - CEPH_OSD_FLAG_ORDERSNAP = 64, /* EOLDSNAP if snapc is out of order */ - CEPH_OSD_FLAG_PEERSTAT = 128, /* msg includes osd_peer_stat */ - CEPH_OSD_FLAG_BALANCE_READS = 256, - CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ - CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ - CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */ - CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */ + CEPH_OSD_FLAG_ACK = 0x0001, /* want (or is) "ack" ack */ + CEPH_OSD_FLAG_ONNVRAM = 0x0002, /* want (or is) "onnvram" ack */ + CEPH_OSD_FLAG_ONDISK = 0x0004, /* want (or is) "ondisk" ack */ + CEPH_OSD_FLAG_RETRY = 0x0008, /* resend attempt */ + CEPH_OSD_FLAG_READ = 0x0010, /* op may read */ + CEPH_OSD_FLAG_WRITE = 0x0020, /* op may write */ + CEPH_OSD_FLAG_ORDERSNAP = 0x0040, /* EOLDSNAP if snapc is out of order */ + CEPH_OSD_FLAG_PEERSTAT_OLD = 0x0080, /* DEPRECATED msg includes osd_peer_stat */ + CEPH_OSD_FLAG_BALANCE_READS = 0x0100, + CEPH_OSD_FLAG_PARALLELEXEC = 0x0200, /* execute op in parallel */ + CEPH_OSD_FLAG_PGOP = 0x0400, /* pg op, no object */ + CEPH_OSD_FLAG_EXEC = 0x0800, /* op may exec */ + CEPH_OSD_FLAG_EXEC_PUBLIC = 0x1000, /* DEPRECATED op may exec (public) */ + CEPH_OSD_FLAG_LOCALIZE_READS = 0x2000, /* read from nearby replica, if any */ + CEPH_OSD_FLAG_RWORDERED = 0x4000, /* order wrt concurrent reads */ }; enum { CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */ + CEPH_OSD_OP_FLAG_FAILOK = 2, /* continue despite failure */ }; #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ @@ -381,48 +408,13 @@ struct ceph_osd_op { __le64 ver; __u8 flag; /* 0 = unwatch, 1 = watch */ } __attribute__ ((packed)) watch; -}; + struct { + __le64 offset, length; + __le64 src_offset; + } __attribute__ ((packed)) clonerange; + }; __le32 payload_len; } __attribute__ ((packed)); -/* - * osd request message header. each request may include multiple - * ceph_osd_op object operations. - */ -struct ceph_osd_request_head { - __le32 client_inc; /* client incarnation */ - struct ceph_object_layout layout; /* pgid */ - __le32 osdmap_epoch; /* client's osdmap epoch */ - - __le32 flags; - - struct ceph_timespec mtime; /* for mutations only */ - struct ceph_eversion reassert_version; /* if we are replaying op */ - - __le32 object_len; /* length of object name */ - - __le64 snapid; /* snapid to read */ - __le64 snap_seq; /* writer's snap context */ - __le32 num_snaps; - - __le16 num_ops; - struct ceph_osd_op ops[]; /* followed by ops[], obj, ticket, snaps */ -} __attribute__ ((packed)); - -struct ceph_osd_reply_head { - __le32 client_inc; /* client incarnation */ - __le32 flags; - struct ceph_object_layout layout; - __le32 osdmap_epoch; - struct ceph_eversion reassert_version; /* for replaying uncommitted */ - - __le32 result; /* result code */ - - __le32 object_len; /* length of object name */ - __le32 num_ops; - struct ceph_osd_op ops[0]; /* ops[], object */ -} __attribute__ ((packed)); - - #endif diff --git a/include/linux/completion.h b/include/linux/completion.h index 51494e6b5548..33f0280fd533 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -77,10 +77,13 @@ static inline void init_completion(struct completion *x) } extern void wait_for_completion(struct completion *); +extern void wait_for_completion_io(struct completion *); extern int wait_for_completion_interruptible(struct completion *x); extern int wait_for_completion_killable(struct completion *x); extern unsigned long wait_for_completion_timeout(struct completion *x, unsigned long timeout); +extern unsigned long wait_for_completion_io_timeout(struct completion *x, + unsigned long timeout); extern long wait_for_completion_interruptible_timeout( struct completion *x, unsigned long timeout); extern long wait_for_completion_killable_timeout( diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 25baa287cff7..6a1101f24cfb 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -162,6 +162,8 @@ struct crush_map { __u32 choose_local_fallback_tries; /* choose attempts before giving up */ __u32 choose_total_tries; + /* attempt chooseleaf inner descent once; on failure retry outer descent */ + __u32 chooseleaf_descend_once; }; diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 3bd46f766751..a975de1ff59f 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -51,7 +51,7 @@ struct task_struct; extern void debug_show_all_locks(void); extern void debug_show_held_locks(struct task_struct *task); extern void debug_check_no_locks_freed(const void *from, unsigned long len); -extern void debug_check_no_locks_held(struct task_struct *task); +extern void debug_check_no_locks_held(void); #else static inline void debug_show_all_locks(void) { @@ -67,7 +67,7 @@ debug_check_no_locks_freed(const void *from, unsigned long len) } static inline void -debug_check_no_locks_held(struct task_struct *task) +debug_check_no_locks_held(void) { } #endif diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 186620631750..acd0312d46fb 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -2,6 +2,7 @@ #define _LINUX_ELEVATOR_H #include <linux/percpu.h> +#include <linux/hashtable.h> #ifdef CONFIG_BLOCK @@ -96,6 +97,8 @@ struct elevator_type struct list_head list; }; +#define ELV_HASH_BITS 6 + /* * each queue has an elevator_queue associated with it */ @@ -105,8 +108,8 @@ struct elevator_queue void *elevator_data; struct kobject kobj; struct mutex sysfs_lock; - struct hlist_head *hash; unsigned int registered:1; + DECLARE_HASHTABLE(hash, ELV_HASH_BITS); }; /* diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index 3c3ef19a625a..cf5d2af61b81 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -13,7 +13,7 @@ #include <linux/wait.h> /* - * CAREFUL: Check include/asm-generic/fcntl.h when defining + * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining * new flags, since they might collide with O_* ones. We want * to re-use O_* flags that couldn't possibly have a meaning * from eventfd, in order to leave a free define-space for diff --git a/include/linux/freezer.h b/include/linux/freezer.h index e70df40d84f6..043a5cf8b5ba 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -3,6 +3,7 @@ #ifndef FREEZER_H_INCLUDED #define FREEZER_H_INCLUDED +#include <linux/debug_locks.h> #include <linux/sched.h> #include <linux/wait.h> #include <linux/atomic.h> @@ -48,6 +49,8 @@ extern void thaw_kernel_threads(void); static inline bool try_to_freeze(void) { + if (!(current->flags & PF_NOFREEZE)) + debug_check_no_locks_held(); might_sleep(); if (likely(!freezing(current))) return false; diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h index 227c62424f3c..a9df51f5d54c 100644 --- a/include/linux/hashtable.h +++ b/include/linux/hashtable.h @@ -115,51 +115,50 @@ static inline void hash_del_rcu(struct hlist_node *node) * hash_for_each - iterate over a hashtable * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor - * @node: the &struct list_head to use as a loop cursor for each entry * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ -#define hash_for_each(name, bkt, node, obj, member) \ - for ((bkt) = 0, node = NULL; node == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\ - hlist_for_each_entry(obj, node, &name[bkt], member) +#define hash_for_each(name, bkt, obj, member) \ + for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ + (bkt)++)\ + hlist_for_each_entry(obj, &name[bkt], member) /** * hash_for_each_rcu - iterate over a rcu enabled hashtable * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor - * @node: the &struct list_head to use as a loop cursor for each entry * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ -#define hash_for_each_rcu(name, bkt, node, obj, member) \ - for ((bkt) = 0, node = NULL; node == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\ - hlist_for_each_entry_rcu(obj, node, &name[bkt], member) +#define hash_for_each_rcu(name, bkt, obj, member) \ + for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ + (bkt)++)\ + hlist_for_each_entry_rcu(obj, &name[bkt], member) /** * hash_for_each_safe - iterate over a hashtable safe against removal of * hash entry * @name: hashtable to iterate * @bkt: integer to use as bucket loop cursor - * @node: the &struct list_head to use as a loop cursor for each entry * @tmp: a &struct used for temporary storage * @obj: the type * to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct */ -#define hash_for_each_safe(name, bkt, node, tmp, obj, member) \ - for ((bkt) = 0, node = NULL; node == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\ - hlist_for_each_entry_safe(obj, node, tmp, &name[bkt], member) +#define hash_for_each_safe(name, bkt, tmp, obj, member) \ + for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\ + (bkt)++)\ + hlist_for_each_entry_safe(obj, tmp, &name[bkt], member) /** * hash_for_each_possible - iterate over all possible objects hashing to the * same bucket * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry - * @node: the &struct list_head to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ -#define hash_for_each_possible(name, obj, node, member, key) \ - hlist_for_each_entry(obj, node, &name[hash_min(key, HASH_BITS(name))], member) +#define hash_for_each_possible(name, obj, member, key) \ + hlist_for_each_entry(obj, &name[hash_min(key, HASH_BITS(name))], member) /** * hash_for_each_possible_rcu - iterate over all possible objects hashing to the @@ -167,25 +166,24 @@ static inline void hash_del_rcu(struct hlist_node *node) * in a rcu enabled hashtable * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry - * @node: the &struct list_head to use as a loop cursor for each entry * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ -#define hash_for_each_possible_rcu(name, obj, node, member, key) \ - hlist_for_each_entry_rcu(obj, node, &name[hash_min(key, HASH_BITS(name))], member) +#define hash_for_each_possible_rcu(name, obj, member, key) \ + hlist_for_each_entry_rcu(obj, &name[hash_min(key, HASH_BITS(name))],\ + member) /** * hash_for_each_possible_safe - iterate over all possible objects hashing to the * same bucket safe against removals * @name: hashtable to iterate * @obj: the type * to use as a loop cursor for each entry - * @node: the &struct list_head to use as a loop cursor for each entry * @tmp: a &struct used for temporary storage * @member: the name of the hlist_node within the struct * @key: the key of the objects to iterate over */ -#define hash_for_each_possible_safe(name, obj, node, tmp, member, key) \ - hlist_for_each_entry_safe(obj, node, tmp, \ +#define hash_for_each_possible_safe(name, obj, tmp, member, key) \ + hlist_for_each_entry_safe(obj, tmp,\ &name[hash_min(key, HASH_BITS(name))], member) diff --git a/include/linux/idr.h b/include/linux/idr.h index e5eb125effe6..a6f38b5c34e4 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -17,69 +17,40 @@ #include <linux/init.h> #include <linux/rcupdate.h> -#if BITS_PER_LONG == 32 -# define IDR_BITS 5 -# define IDR_FULL 0xfffffffful -/* We can only use two of the bits in the top level because there is - only one possible bit in the top level (5 bits * 7 levels = 35 - bits, but you only use 31 bits in the id). */ -# define TOP_LEVEL_FULL (IDR_FULL >> 30) -#elif BITS_PER_LONG == 64 -# define IDR_BITS 6 -# define IDR_FULL 0xfffffffffffffffful -/* We can only use two of the bits in the top level because there is - only one possible bit in the top level (6 bits * 6 levels = 36 - bits, but you only use 31 bits in the id). */ -# define TOP_LEVEL_FULL (IDR_FULL >> 62) -#else -# error "BITS_PER_LONG is not 32 or 64" -#endif - +/* + * We want shallower trees and thus more bits covered at each layer. 8 + * bits gives us large enough first layer for most use cases and maximum + * tree depth of 4. Each idr_layer is slightly larger than 2k on 64bit and + * 1k on 32bit. + */ +#define IDR_BITS 8 #define IDR_SIZE (1 << IDR_BITS) #define IDR_MASK ((1 << IDR_BITS)-1) -#define MAX_IDR_SHIFT (sizeof(int)*8 - 1) -#define MAX_IDR_BIT (1U << MAX_IDR_SHIFT) -#define MAX_IDR_MASK (MAX_IDR_BIT - 1) - -/* Leave the possibility of an incomplete final layer */ -#define MAX_IDR_LEVEL ((MAX_IDR_SHIFT + IDR_BITS - 1) / IDR_BITS) - -/* Number of id_layer structs to leave in free list */ -#define MAX_IDR_FREE (MAX_IDR_LEVEL * 2) - struct idr_layer { - unsigned long bitmap; /* A zero bit means "space here" */ + int prefix; /* the ID prefix of this idr_layer */ + DECLARE_BITMAP(bitmap, IDR_SIZE); /* A zero bit means "space here" */ struct idr_layer __rcu *ary[1<<IDR_BITS]; - int count; /* When zero, we can release it */ - int layer; /* distance from leaf */ - struct rcu_head rcu_head; + int count; /* When zero, we can release it */ + int layer; /* distance from leaf */ + struct rcu_head rcu_head; }; struct idr { - struct idr_layer __rcu *top; - struct idr_layer *id_free; - int layers; /* only valid without concurrent changes */ - int id_free_cnt; - spinlock_t lock; + struct idr_layer __rcu *hint; /* the last layer allocated from */ + struct idr_layer __rcu *top; + struct idr_layer *id_free; + int layers; /* only valid w/o concurrent changes */ + int id_free_cnt; + spinlock_t lock; }; -#define IDR_INIT(name) \ -{ \ - .top = NULL, \ - .id_free = NULL, \ - .layers = 0, \ - .id_free_cnt = 0, \ - .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ +#define IDR_INIT(name) \ +{ \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ } #define DEFINE_IDR(name) struct idr name = IDR_INIT(name) -/* Actions to be taken after a call to _idr_sub_alloc */ -#define IDR_NEED_TO_GROW -2 -#define IDR_NOMORE_SPACE -3 - -#define _idr_rc_to_errno(rc) ((rc) == -1 ? -EAGAIN : -ENOSPC) - /** * DOC: idr sync * idr synchronization (stolen from radix-tree.h) @@ -101,19 +72,90 @@ struct idr { * This is what we export. */ -void *idr_find(struct idr *idp, int id); +void *idr_find_slowpath(struct idr *idp, int id); int idr_pre_get(struct idr *idp, gfp_t gfp_mask); -int idr_get_new(struct idr *idp, void *ptr, int *id); int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id); +void idr_preload(gfp_t gfp_mask); +int idr_alloc(struct idr *idp, void *ptr, int start, int end, gfp_t gfp_mask); int idr_for_each(struct idr *idp, int (*fn)(int id, void *p, void *data), void *data); void *idr_get_next(struct idr *idp, int *nextid); void *idr_replace(struct idr *idp, void *ptr, int id); void idr_remove(struct idr *idp, int id); -void idr_remove_all(struct idr *idp); +void idr_free(struct idr *idp, int id); void idr_destroy(struct idr *idp); void idr_init(struct idr *idp); +/** + * idr_preload_end - end preload section started with idr_preload() + * + * Each idr_preload() should be matched with an invocation of this + * function. See idr_preload() for details. + */ +static inline void idr_preload_end(void) +{ + preempt_enable(); +} + +/** + * idr_find - return pointer for given id + * @idp: idr handle + * @id: lookup key + * + * Return the pointer given the id it has been registered with. A %NULL + * return indicates that @id is not valid or you passed %NULL in + * idr_get_new(). + * + * This function can be called under rcu_read_lock(), given that the leaf + * pointers lifetimes are correctly managed. + */ +static inline void *idr_find(struct idr *idr, int id) +{ + struct idr_layer *hint = rcu_dereference_raw(idr->hint); + + if (hint && (id & ~IDR_MASK) == hint->prefix) + return rcu_dereference_raw(hint->ary[id & IDR_MASK]); + + return idr_find_slowpath(idr, id); +} + +/** + * idr_get_new - allocate new idr entry + * @idp: idr handle + * @ptr: pointer you want associated with the id + * @id: pointer to the allocated handle + * + * Simple wrapper around idr_get_new_above() w/ @starting_id of zero. + */ +static inline int idr_get_new(struct idr *idp, void *ptr, int *id) +{ + return idr_get_new_above(idp, ptr, 0, id); +} + +/** + * idr_for_each_entry - iterate over an idr's elements of a given type + * @idp: idr handle + * @entry: the type * to use as cursor + * @id: id entry's key + */ +#define idr_for_each_entry(idp, entry, id) \ + for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \ + entry != NULL; \ + ++id, entry = (typeof(entry))idr_get_next((idp), &(id))) + +void __idr_remove_all(struct idr *idp); /* don't use */ + +/** + * idr_remove_all - remove all ids from the given idr tree + * @idp: idr handle + * + * If you're trying to destroy @idp, calling idr_destroy() is enough. + * This is going away. Don't use. + */ +static inline void __deprecated idr_remove_all(struct idr *idp) +{ + __idr_remove_all(idp); +} /* * IDA - IDR based id allocator, use when translation from id to @@ -141,7 +183,6 @@ struct ida { int ida_pre_get(struct ida *ida, gfp_t gfp_mask); int ida_get_new_above(struct ida *ida, int starting_id, int *p_id); -int ida_get_new(struct ida *ida, int *p_id); void ida_remove(struct ida *ida, int id); void ida_destroy(struct ida *ida); void ida_init(struct ida *ida); @@ -150,17 +191,18 @@ int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end, gfp_t gfp_mask); void ida_simple_remove(struct ida *ida, unsigned int id); -void __init idr_init_cache(void); - /** - * idr_for_each_entry - iterate over an idr's elements of a given type - * @idp: idr handle - * @entry: the type * to use as cursor - * @id: id entry's key + * ida_get_new - allocate new ID + * @ida: idr handle + * @p_id: pointer to the allocated handle + * + * Simple wrapper around ida_get_new_above() w/ @starting_id of zero. */ -#define idr_for_each_entry(idp, entry, id) \ - for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \ - entry != NULL; \ - ++id, entry = (typeof(entry))idr_get_next((idp), &(id))) +static inline int ida_get_new(struct ida *ida, int *p_id) +{ + return ida_get_new_above(ida, 0, p_id); +} + +void __init idr_init_cache(void); #endif /* __IDR_H__ */ diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 4648d8021244..cfd21e3d5506 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -216,11 +216,10 @@ static inline struct hlist_head *team_port_index_hash(struct team *team, static inline struct team_port *team_get_port_by_index(struct team *team, int port_index) { - struct hlist_node *p; struct team_port *port; struct hlist_head *head = team_port_index_hash(team, port_index); - hlist_for_each_entry(port, p, head, hlist) + hlist_for_each_entry(port, head, hlist) if (port->index == port_index) return port; return NULL; @@ -228,11 +227,10 @@ static inline struct team_port *team_get_port_by_index(struct team *team, static inline struct team_port *team_get_port_by_index_rcu(struct team *team, int port_index) { - struct hlist_node *p; struct team_port *port; struct hlist_head *head = team_port_index_hash(team, port_index); - hlist_for_each_entry_rcu(port, p, head, hlist) + hlist_for_each_entry_rcu(port, head, hlist) if (port->index == port_index) return port; return NULL; diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index 1487e7906bbd..1f9f56e28851 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -35,10 +35,6 @@ #include <uapi/linux/ipmi.h> - -/* - * The in-kernel interface. - */ #include <linux/list.h> #include <linux/proc_fs.h> diff --git a/include/linux/list.h b/include/linux/list.h index cc6d2aa6b415..d991cc147c98 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -666,54 +666,49 @@ static inline void hlist_move_list(struct hlist_head *old, for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \ pos = n) +#define hlist_entry_safe(ptr, type, member) \ + (ptr) ? hlist_entry(ptr, type, member) : NULL + /** * hlist_for_each_entry - iterate over list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry(tpos, pos, head, member) \ - for (pos = (head)->first; \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) +#define hlist_for_each_entry(pos, head, member) \ + for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member);\ + pos; \ + pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) /** * hlist_for_each_entry_continue - iterate over a hlist continuing after current point - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry_continue(tpos, pos, member) \ - for (pos = (pos)->next; \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) +#define hlist_for_each_entry_continue(pos, member) \ + for (pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member);\ + pos; \ + pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) /** * hlist_for_each_entry_from - iterate over a hlist continuing from current point - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry_from(tpos, pos, member) \ - for (; pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) +#define hlist_for_each_entry_from(pos, member) \ + for (; pos; \ + pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) /** * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @n: another &struct hlist_node to use as temporary storage * @head: the head for your list. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ - for (pos = (head)->first; \ - pos && ({ n = pos->next; 1; }) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = n) +#define hlist_for_each_entry_safe(pos, n, head, member) \ + for (pos = hlist_entry_safe((head)->first, typeof(*pos), member);\ + pos && ({ n = pos->member.next; 1; }); \ + pos = hlist_entry_safe(n, typeof(*pos), member)) #endif diff --git a/include/linux/llist.h b/include/linux/llist.h index d0ab98f73d38..a5199f6d0e82 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -125,31 +125,6 @@ static inline void init_llist_head(struct llist_head *list) (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member)) /** - * llist_for_each_entry_safe - iterate safely against remove over some entries - * of lock-less list of given type. - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as a temporary storage. - * @node: the fist entry of deleted list entries. - * @member: the name of the llist_node with the struct. - * - * In general, some entries of the lock-less list can be traversed - * safely only after being removed from list, so start with an entry - * instead of list head. This variant allows removal of entries - * as we iterate. - * - * If being used on entries deleted from lock-less list directly, the - * traverse order is from the newest to the oldest added entry. If - * you want to traverse from the oldest to the newest, you must - * reverse the order by yourself before traversing. - */ -#define llist_for_each_entry_safe(pos, n, node, member) \ - for ((pos) = llist_entry((node), typeof(*(pos)), member), \ - (n) = (pos)->member.next; \ - &(pos)->member != NULL; \ - (pos) = llist_entry(n, typeof(*(pos)), member), \ - (n) = (&(pos)->member != NULL) ? (pos)->member.next : NULL) - -/** * llist_empty - tests whether a lock-less list is empty * @head: the list to test * diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 0e62d84f9f7f..dcaad79f54ed 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -212,7 +212,8 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout) __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock); void nlmclnt_recovery(struct nlm_host *); -int nlmclnt_reclaim(struct nlm_host *, struct file_lock *); +int nlmclnt_reclaim(struct nlm_host *, struct file_lock *, + struct nlm_rqst *); void nlmclnt_next_cookie(struct nlm_cookie *); /* diff --git a/include/linux/mfd/lp8788.h b/include/linux/mfd/lp8788.h index 2a32b16f79cb..786bf6679a28 100644 --- a/include/linux/mfd/lp8788.h +++ b/include/linux/mfd/lp8788.h @@ -16,6 +16,7 @@ #include <linux/gpio.h> #include <linux/irqdomain.h> +#include <linux/pwm.h> #include <linux/regmap.h> #define LP8788_DEV_BUCK "lp8788-buck" @@ -124,11 +125,6 @@ enum lp8788_bl_ramp_step { LP8788_RAMP_65538us, }; -enum lp8788_bl_pwm_polarity { - LP8788_PWM_ACTIVE_HIGH, - LP8788_PWM_ACTIVE_LOW, -}; - enum lp8788_isink_scale { LP8788_ISINK_SCALE_100mA, LP8788_ISINK_SCALE_120mA, @@ -229,16 +225,6 @@ struct lp8788_charger_platform_data { }; /* - * struct lp8788_bl_pwm_data - * @pwm_set_intensity : set duty of pwm - * @pwm_get_intensity : get current duty of pwm - */ -struct lp8788_bl_pwm_data { - void (*pwm_set_intensity) (int brightness, int max_brightness); - int (*pwm_get_intensity) (int max_brightness); -}; - -/* * struct lp8788_backlight_platform_data * @name : backlight driver name. (default: "lcd-backlight") * @initial_brightness : initial value of backlight brightness @@ -248,8 +234,8 @@ struct lp8788_bl_pwm_data { * @rise_time : brightness ramp up step time * @fall_time : brightness ramp down step time * @pwm_pol : pwm polarity setting when bl_mode is pwm based - * @pwm_data : platform specific pwm generation functions - * only valid when bl_mode is pwm based + * @period_ns : platform specific pwm period value. unit is nano. + Only valid when bl_mode is LP8788_BL_COMB_PWM_BASED */ struct lp8788_backlight_platform_data { char *name; @@ -259,8 +245,8 @@ struct lp8788_backlight_platform_data { enum lp8788_bl_full_scale_current full_scale; enum lp8788_bl_ramp_step rise_time; enum lp8788_bl_ramp_step fall_time; - enum lp8788_bl_pwm_polarity pwm_pol; - struct lp8788_bl_pwm_data pwm_data; + enum pwm_polarity pwm_pol; + unsigned int period_ns; }; /* diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index fed3def62818..779cf7c4a3d1 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -33,8 +33,7 @@ struct ieee1394_device_id { __u32 model_id; __u32 specifier_id; __u32 version; - kernel_ulong_t driver_data - __attribute__((aligned(sizeof(kernel_ulong_t)))); + kernel_ulong_t driver_data; }; @@ -148,8 +147,7 @@ struct hid_device_id { __u16 group; __u32 vendor; __u32 product; - kernel_ulong_t driver_data - __attribute__((aligned(sizeof(kernel_ulong_t)))); + kernel_ulong_t driver_data; }; /* s390 CCW devices */ @@ -173,8 +171,6 @@ struct ccw_device_id { struct ap_device_id { __u16 match_flags; /* which fields to match against */ __u8 dev_type; /* device type */ - __u8 pad1; - __u32 pad2; kernel_ulong_t driver_info; }; @@ -184,13 +180,10 @@ struct ap_device_id { struct css_device_id { __u8 match_flags; __u8 type; /* subchannel type */ - __u16 pad2; - __u32 pad3; kernel_ulong_t driver_data; }; -#define ACPI_ID_LEN 16 /* only 9 bytes needed here, 16 bytes are used */ - /* to workaround crosscompile issues */ +#define ACPI_ID_LEN 9 struct acpi_device_id { __u8 id[ACPI_ID_LEN]; @@ -231,11 +224,7 @@ struct of_device_id char name[32]; char type[32]; char compatible[128]; -#ifdef __KERNEL__ const void *data; -#else - kernel_ulong_t data; -#endif }; /* VIO */ @@ -260,24 +249,14 @@ struct pcmcia_device_id { /* for pseudo multi-function devices */ __u8 device_no; - __u32 prod_id_hash[4] - __attribute__((aligned(sizeof(__u32)))); + __u32 prod_id_hash[4]; /* not matched against in kernelspace*/ -#ifdef __KERNEL__ const char * prod_id[4]; -#else - kernel_ulong_t prod_id[4] - __attribute__((aligned(sizeof(kernel_ulong_t)))); -#endif /* not matched against */ kernel_ulong_t driver_info; -#ifdef __KERNEL__ char * cisfile; -#else - kernel_ulong_t cisfile; -#endif }; #define PCMCIA_DEV_ID_MATCH_MANF_ID 0x0001 @@ -373,8 +352,7 @@ struct sdio_device_id { __u8 class; /* Standard interface or SDIO_ANY_ID */ __u16 vendor; /* Vendor or SDIO_ANY_ID */ __u16 device; /* Device ID or SDIO_ANY_ID */ - kernel_ulong_t driver_data /* Data private to the driver */ - __attribute__((aligned(sizeof(kernel_ulong_t)))); + kernel_ulong_t driver_data; /* Data private to the driver */ }; /* SSB core, see drivers/ssb/ */ @@ -420,8 +398,7 @@ struct virtio_device_id { */ struct hv_vmbus_device_id { __u8 guid[16]; - kernel_ulong_t driver_data /* Data private to the driver */ - __attribute__((aligned(sizeof(kernel_ulong_t)))); + kernel_ulong_t driver_data; /* Data private to the driver */ }; /* rpmsg */ @@ -440,8 +417,7 @@ struct rpmsg_device_id { struct i2c_device_id { char name[I2C_NAME_SIZE]; - kernel_ulong_t driver_data /* Data private to the driver */ - __attribute__((aligned(sizeof(kernel_ulong_t)))); + kernel_ulong_t driver_data; /* Data private to the driver */ }; /* spi */ @@ -451,8 +427,7 @@ struct i2c_device_id { struct spi_device_id { char name[SPI_NAME_SIZE]; - kernel_ulong_t driver_data /* Data private to the driver */ - __attribute__((aligned(sizeof(kernel_ulong_t)))); + kernel_ulong_t driver_data; /* Data private to the driver */ }; /* dmi */ @@ -484,15 +459,6 @@ struct dmi_strmatch { char substr[79]; }; -#ifndef __KERNEL__ -struct dmi_system_id { - kernel_ulong_t callback; - kernel_ulong_t ident; - struct dmi_strmatch matches[4]; - kernel_ulong_t driver_data - __attribute__((aligned(sizeof(kernel_ulong_t)))); -}; -#else struct dmi_system_id { int (*callback)(const struct dmi_system_id *); const char *ident; @@ -506,7 +472,6 @@ struct dmi_system_id { * error: storage size of '__mod_dmi_device_table' isn't known */ #define dmi_device_id dmi_system_id -#endif #define DMI_MATCH(a, b) { a, b } @@ -515,8 +480,7 @@ struct dmi_system_id { struct platform_device_id { char name[PLATFORM_NAME_SIZE]; - kernel_ulong_t driver_data - __attribute__((aligned(sizeof(kernel_ulong_t)))); + kernel_ulong_t driver_data; }; #define MDIO_MODULE_PREFIX "mdio:" @@ -572,11 +536,7 @@ struct isapnp_device_id { struct amba_id { unsigned int id; unsigned int mask; -#ifndef __KERNEL__ - kernel_ulong_t data; -#else void *data; -#endif }; /* diff --git a/include/linux/pid.h b/include/linux/pid.h index 2381c973d897..a089a3c447fc 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -176,9 +176,8 @@ pid_t pid_vnr(struct pid *pid); #define do_each_pid_task(pid, type, task) \ do { \ - struct hlist_node *pos___; \ if ((pid) != NULL) \ - hlist_for_each_entry_rcu((task), pos___, \ + hlist_for_each_entry_rcu((task), \ &(pid)->tasks[type], pids[type].node) { /* diff --git a/include/linux/rculist.h b/include/linux/rculist.h index c92dd28eaa6c..8089e35d47ac 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -445,8 +445,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, /** * hlist_for_each_entry_rcu - iterate over rcu list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the hlist_node within the struct. * @@ -454,16 +453,16 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ -#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = rcu_dereference_raw(hlist_first_rcu(head)); \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_raw(hlist_next_rcu(pos))) +#define hlist_for_each_entry_rcu(pos, head, member) \ + for (pos = hlist_entry_safe (rcu_dereference_raw(hlist_first_rcu(head)),\ + typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\ + &(pos)->member)), typeof(*(pos)), member)) /** * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the hlist_node within the struct. * @@ -471,35 +470,36 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ -#define hlist_for_each_entry_rcu_bh(tpos, pos, head, member) \ - for (pos = rcu_dereference_bh((head)->first); \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_bh(pos->next)) +#define hlist_for_each_entry_rcu_bh(pos, head, member) \ + for (pos = hlist_entry_safe(rcu_dereference_bh(hlist_first_rcu(head)),\ + typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference_bh(hlist_next_rcu(\ + &(pos)->member)), typeof(*(pos)), member)) /** * hlist_for_each_entry_continue_rcu - iterate over a hlist continuing after current point - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry_continue_rcu(tpos, pos, member) \ - for (pos = rcu_dereference((pos)->next); \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference(pos->next)) +#define hlist_for_each_entry_continue_rcu(pos, member) \ + for (pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\ + typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\ + typeof(*(pos)), member)) /** * hlist_for_each_entry_continue_rcu_bh - iterate over a hlist continuing after current point - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. + * @pos: the type * to use as a loop cursor. * @member: the name of the hlist_node within the struct. */ -#define hlist_for_each_entry_continue_rcu_bh(tpos, pos, member) \ - for (pos = rcu_dereference_bh((pos)->next); \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_bh(pos->next)) +#define hlist_for_each_entry_continue_rcu_bh(pos, member) \ + for (pos = hlist_entry_safe(rcu_dereference_bh((pos)->member.next),\ + typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference_bh((pos)->member.next),\ + typeof(*(pos)), member)) #endif /* __KERNEL__ */ diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 4bd6c06eb28e..2d8bdaef9611 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -231,6 +231,41 @@ size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents, */ #define SG_MAX_SINGLE_ALLOC (PAGE_SIZE / sizeof(struct scatterlist)) +/* + * sg page iterator + * + * Iterates over sg entries page-by-page. On each successful iteration, + * @piter->page points to the current page, @piter->sg to the sg holding this + * page and @piter->sg_pgoffset to the page's page offset within the sg. The + * iteration will stop either when a maximum number of sg entries was reached + * or a terminating sg (sg_last(sg) == true) was reached. + */ +struct sg_page_iter { + struct page *page; /* current page */ + struct scatterlist *sg; /* sg holding the page */ + unsigned int sg_pgoffset; /* page offset within the sg */ + + /* these are internal states, keep away */ + unsigned int __nents; /* remaining sg entries */ + int __pg_advance; /* nr pages to advance at the + * next step */ +}; + +bool __sg_page_iter_next(struct sg_page_iter *piter); +void __sg_page_iter_start(struct sg_page_iter *piter, + struct scatterlist *sglist, unsigned int nents, + unsigned long pgoffset); + +/** + * for_each_sg_page - iterate over the pages of the given sg list + * @sglist: sglist to iterate over + * @piter: page iterator to hold current page, sg, sg_pgoffset + * @nents: maximum number of sg entries to iterate over + * @pgoffset: starting page offset + */ +#define for_each_sg_page(sglist, piter, nents, pgoffset) \ + for (__sg_page_iter_start((piter), (sglist), (nents), (pgoffset)); \ + __sg_page_iter_next(piter);) /* * Mapping sg iterator @@ -258,11 +293,11 @@ struct sg_mapping_iter { void *addr; /* pointer to the mapped area */ size_t length; /* length of the mapped area */ size_t consumed; /* number of consumed bytes */ + struct sg_page_iter piter; /* page iterator */ /* these are internal states, keep away */ - struct scatterlist *__sg; /* current entry */ - unsigned int __nents; /* nr of remaining entries */ - unsigned int __offset; /* offset within sg */ + unsigned int __offset; /* offset within page */ + unsigned int __remaining; /* remaining bytes on page */ unsigned int __flags; }; diff --git a/include/linux/sched.h b/include/linux/sched.h index 6853bf947fde..d35d2b6ddbfb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -346,11 +346,6 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} extern void set_dumpable(struct mm_struct *mm, int value); extern int get_dumpable(struct mm_struct *mm); -/* get/set_dumpable() values */ -#define SUID_DUMPABLE_DISABLED 0 -#define SUID_DUMPABLE_ENABLED 1 -#define SUID_DUMPABLE_SAFE 2 - /* mm flags */ /* dumpable bits */ #define MMF_DUMPABLE 0 /* core dump is permitted */ diff --git a/include/linux/sunrpc/addr.h b/include/linux/sunrpc/addr.h new file mode 100644 index 000000000000..07d8e53bedfc --- /dev/null +++ b/include/linux/sunrpc/addr.h @@ -0,0 +1,170 @@ +/* + * linux/include/linux/sunrpc/addr.h + * + * Various routines for copying and comparing sockaddrs and for + * converting them to and from presentation format. + */ +#ifndef _LINUX_SUNRPC_ADDR_H +#define _LINUX_SUNRPC_ADDR_H + +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <net/ipv6.h> + +size_t rpc_ntop(const struct sockaddr *, char *, const size_t); +size_t rpc_pton(struct net *, const char *, const size_t, + struct sockaddr *, const size_t); +char * rpc_sockaddr2uaddr(const struct sockaddr *, gfp_t); +size_t rpc_uaddr2sockaddr(struct net *, const char *, const size_t, + struct sockaddr *, const size_t); + +static inline unsigned short rpc_get_port(const struct sockaddr *sap) +{ + switch (sap->sa_family) { + case AF_INET: + return ntohs(((struct sockaddr_in *)sap)->sin_port); + case AF_INET6: + return ntohs(((struct sockaddr_in6 *)sap)->sin6_port); + } + return 0; +} + +static inline void rpc_set_port(struct sockaddr *sap, + const unsigned short port) +{ + switch (sap->sa_family) { + case AF_INET: + ((struct sockaddr_in *)sap)->sin_port = htons(port); + break; + case AF_INET6: + ((struct sockaddr_in6 *)sap)->sin6_port = htons(port); + break; + } +} + +#define IPV6_SCOPE_DELIMITER '%' +#define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn") + +static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1; + const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2; + + return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; +} + +static inline bool __rpc_copy_addr4(struct sockaddr *dst, + const struct sockaddr *src) +{ + const struct sockaddr_in *ssin = (struct sockaddr_in *) src; + struct sockaddr_in *dsin = (struct sockaddr_in *) dst; + + dsin->sin_family = ssin->sin_family; + dsin->sin_addr.s_addr = ssin->sin_addr.s_addr; + return true; +} + +#if IS_ENABLED(CONFIG_IPV6) +static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1; + const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; + + if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr)) + return false; + else if (ipv6_addr_type(&sin1->sin6_addr) & IPV6_ADDR_LINKLOCAL) + return sin1->sin6_scope_id == sin2->sin6_scope_id; + + return true; +} + +static inline bool __rpc_copy_addr6(struct sockaddr *dst, + const struct sockaddr *src) +{ + const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src; + struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst; + + dsin6->sin6_family = ssin6->sin6_family; + dsin6->sin6_addr = ssin6->sin6_addr; + dsin6->sin6_scope_id = ssin6->sin6_scope_id; + return true; +} +#else /* !(IS_ENABLED(CONFIG_IPV6) */ +static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + return false; +} + +static inline bool __rpc_copy_addr6(struct sockaddr *dst, + const struct sockaddr *src) +{ + return false; +} +#endif /* !(IS_ENABLED(CONFIG_IPV6) */ + +/** + * rpc_cmp_addr - compare the address portion of two sockaddrs. + * @sap1: first sockaddr + * @sap2: second sockaddr + * + * Just compares the family and address portion. Ignores port, but + * compares the scope if it's a link-local address. + * + * Returns true if the addrs are equal, false if they aren't. + */ +static inline bool rpc_cmp_addr(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + if (sap1->sa_family == sap2->sa_family) { + switch (sap1->sa_family) { + case AF_INET: + return __rpc_cmp_addr4(sap1, sap2); + case AF_INET6: + return __rpc_cmp_addr6(sap1, sap2); + } + } + return false; +} + +/** + * rpc_copy_addr - copy the address portion of one sockaddr to another + * @dst: destination sockaddr + * @src: source sockaddr + * + * Just copies the address portion and family. Ignores port, scope, etc. + * Caller is responsible for making certain that dst is large enough to hold + * the address in src. Returns true if address family is supported. Returns + * false otherwise. + */ +static inline bool rpc_copy_addr(struct sockaddr *dst, + const struct sockaddr *src) +{ + switch (src->sa_family) { + case AF_INET: + return __rpc_copy_addr4(dst, src); + case AF_INET6: + return __rpc_copy_addr6(dst, src); + } + return false; +} + +/** + * rpc_get_scope_id - return scopeid for a given sockaddr + * @sa: sockaddr to get scopeid from + * + * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if + * not an AF_INET6 address. + */ +static inline u32 rpc_get_scope_id(const struct sockaddr *sa) +{ + if (sa->sa_family != AF_INET6) + return 0; + + return ((struct sockaddr_in6 *) sa)->sin6_scope_id; +} + +#endif /* _LINUX_SUNRPC_ADDR_H */ diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 5dc9ee4d616e..303399b1ba59 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -83,6 +83,10 @@ struct cache_detail { int (*cache_upcall)(struct cache_detail *, struct cache_head *); + void (*cache_request)(struct cache_detail *cd, + struct cache_head *ch, + char **bpp, int *blen); + int (*cache_parse)(struct cache_detail *, char *buf, int len); @@ -157,11 +161,7 @@ sunrpc_cache_update(struct cache_detail *detail, struct cache_head *new, struct cache_head *old, int hash); extern int -sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, - void (*cache_request)(struct cache_detail *, - struct cache_head *, - char **, - int *)); +sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h); extern void cache_clean_deferred(void *owner); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 34206b84d8da..4a4abde000cb 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -165,157 +165,5 @@ size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); int rpc_localaddr(struct rpc_clnt *, struct sockaddr *, size_t); -size_t rpc_ntop(const struct sockaddr *, char *, const size_t); -size_t rpc_pton(struct net *, const char *, const size_t, - struct sockaddr *, const size_t); -char * rpc_sockaddr2uaddr(const struct sockaddr *, gfp_t); -size_t rpc_uaddr2sockaddr(struct net *, const char *, const size_t, - struct sockaddr *, const size_t); - -static inline unsigned short rpc_get_port(const struct sockaddr *sap) -{ - switch (sap->sa_family) { - case AF_INET: - return ntohs(((struct sockaddr_in *)sap)->sin_port); - case AF_INET6: - return ntohs(((struct sockaddr_in6 *)sap)->sin6_port); - } - return 0; -} - -static inline void rpc_set_port(struct sockaddr *sap, - const unsigned short port) -{ - switch (sap->sa_family) { - case AF_INET: - ((struct sockaddr_in *)sap)->sin_port = htons(port); - break; - case AF_INET6: - ((struct sockaddr_in6 *)sap)->sin6_port = htons(port); - break; - } -} - -#define IPV6_SCOPE_DELIMITER '%' -#define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn") - -static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1; - const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2; - - return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; -} - -static inline bool __rpc_copy_addr4(struct sockaddr *dst, - const struct sockaddr *src) -{ - const struct sockaddr_in *ssin = (struct sockaddr_in *) src; - struct sockaddr_in *dsin = (struct sockaddr_in *) dst; - - dsin->sin_family = ssin->sin_family; - dsin->sin_addr.s_addr = ssin->sin_addr.s_addr; - return true; -} - -#if IS_ENABLED(CONFIG_IPV6) -static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1; - const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; - - if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr)) - return false; - else if (ipv6_addr_type(&sin1->sin6_addr) & IPV6_ADDR_LINKLOCAL) - return sin1->sin6_scope_id == sin2->sin6_scope_id; - - return true; -} - -static inline bool __rpc_copy_addr6(struct sockaddr *dst, - const struct sockaddr *src) -{ - const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src; - struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst; - - dsin6->sin6_family = ssin6->sin6_family; - dsin6->sin6_addr = ssin6->sin6_addr; - return true; -} -#else /* !(IS_ENABLED(CONFIG_IPV6) */ -static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - return false; -} - -static inline bool __rpc_copy_addr6(struct sockaddr *dst, - const struct sockaddr *src) -{ - return false; -} -#endif /* !(IS_ENABLED(CONFIG_IPV6) */ - -/** - * rpc_cmp_addr - compare the address portion of two sockaddrs. - * @sap1: first sockaddr - * @sap2: second sockaddr - * - * Just compares the family and address portion. Ignores port, scope, etc. - * Returns true if the addrs are equal, false if they aren't. - */ -static inline bool rpc_cmp_addr(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - if (sap1->sa_family == sap2->sa_family) { - switch (sap1->sa_family) { - case AF_INET: - return __rpc_cmp_addr4(sap1, sap2); - case AF_INET6: - return __rpc_cmp_addr6(sap1, sap2); - } - } - return false; -} - -/** - * rpc_copy_addr - copy the address portion of one sockaddr to another - * @dst: destination sockaddr - * @src: source sockaddr - * - * Just copies the address portion and family. Ignores port, scope, etc. - * Caller is responsible for making certain that dst is large enough to hold - * the address in src. Returns true if address family is supported. Returns - * false otherwise. - */ -static inline bool rpc_copy_addr(struct sockaddr *dst, - const struct sockaddr *src) -{ - switch (src->sa_family) { - case AF_INET: - return __rpc_copy_addr4(dst, src); - case AF_INET6: - return __rpc_copy_addr6(dst, src); - } - return false; -} - -/** - * rpc_get_scope_id - return scopeid for a given sockaddr - * @sa: sockaddr to get scopeid from - * - * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if - * not an AF_INET6 address. - */ -static inline u32 rpc_get_scope_id(const struct sockaddr *sa) -{ - if (sa->sa_family != AF_INET6) - return 0; - - return ((struct sockaddr_in6 *) sa)->sin6_scope_id; -} - #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 676ddf53b3ee..1f0216b9a6c9 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -50,6 +50,7 @@ struct svc_pool { unsigned int sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ struct svc_pool_stats sp_stats; /* statistics on pool operation */ + int sp_task_pending;/* has pending task */ } ____cacheline_aligned_in_smp; /* diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 63988990bd36..15f9204ee70b 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -56,7 +56,7 @@ struct xdr_buf { struct kvec head[1], /* RPC header + non-page data */ tail[1]; /* Appended after page data */ - struct page ** pages; /* Array of contiguous pages */ + struct page ** pages; /* Array of pages */ unsigned int page_base, /* Start of page data */ page_len, /* Length of page data */ flags; /* Flags for data disposition */ @@ -152,6 +152,7 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) extern void xdr_shift_buf(struct xdr_buf *, size_t); extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); +extern void xdr_buf_trim(struct xdr_buf *, unsigned int); extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, unsigned int); extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index b82a83aba311..9a9367c0c076 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -87,9 +87,9 @@ int inode_wait(void *); void writeback_inodes_sb(struct super_block *, enum wb_reason reason); void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, enum wb_reason reason); -int writeback_inodes_sb_if_idle(struct super_block *, enum wb_reason reason); -int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr, - enum wb_reason reason); +int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason); +int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, + enum wb_reason reason); void sync_inodes_sb(struct super_block *); long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, enum wb_reason reason); diff --git a/include/net/ax25.h b/include/net/ax25.h index 53539acbd81a..89ed9ac5701f 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -161,8 +161,8 @@ typedef struct ax25_uid_assoc { ax25_address call; } ax25_uid_assoc; -#define ax25_uid_for_each(__ax25, node, list) \ - hlist_for_each_entry(__ax25, node, list, uid_node) +#define ax25_uid_for_each(__ax25, list) \ + hlist_for_each_entry(__ax25, list, uid_node) #define ax25_uid_hold(ax25) \ atomic_inc(&((ax25)->refcount)) @@ -247,8 +247,8 @@ typedef struct ax25_cb { #define ax25_sk(__sk) ((ax25_cb *)(__sk)->sk_protinfo) -#define ax25_for_each(__ax25, node, list) \ - hlist_for_each_entry(__ax25, node, list, ax25_node) +#define ax25_for_each(__ax25, list) \ + hlist_for_each_entry(__ax25, list, ax25_node) #define ax25_cb_hold(__ax25) \ atomic_inc(&((__ax25)->refcount)) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 7b2ae9d37076..ef83d9e844b5 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -94,8 +94,8 @@ static inline struct net *ib_net(struct inet_bind_bucket *ib) return read_pnet(&ib->ib_net); } -#define inet_bind_bucket_for_each(tb, pos, head) \ - hlist_for_each_entry(tb, pos, head, node) +#define inet_bind_bucket_for_each(tb, head) \ + hlist_for_each_entry(tb, head, node) struct inet_bind_hashbucket { spinlock_t lock; diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 7d658d577368..f908dfc06505 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -178,11 +178,11 @@ static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw) #define inet_twsk_for_each(tw, node, head) \ hlist_nulls_for_each_entry(tw, node, head, tw_node) -#define inet_twsk_for_each_inmate(tw, node, jail) \ - hlist_for_each_entry(tw, node, jail, tw_death_node) +#define inet_twsk_for_each_inmate(tw, jail) \ + hlist_for_each_entry(tw, jail, tw_death_node) -#define inet_twsk_for_each_inmate_safe(tw, node, safe, jail) \ - hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node) +#define inet_twsk_for_each_inmate_safe(tw, safe, jail) \ + hlist_for_each_entry_safe(tw, safe, jail, tw_death_node) static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) { diff --git a/include/net/netrom.h b/include/net/netrom.h index f0793c1cb5f8..121dcf854db5 100644 --- a/include/net/netrom.h +++ b/include/net/netrom.h @@ -154,17 +154,17 @@ static __inline__ void nr_node_unlock(struct nr_node *nr_node) nr_node_put(nr_node); } -#define nr_neigh_for_each(__nr_neigh, node, list) \ - hlist_for_each_entry(__nr_neigh, node, list, neigh_node) +#define nr_neigh_for_each(__nr_neigh, list) \ + hlist_for_each_entry(__nr_neigh, list, neigh_node) -#define nr_neigh_for_each_safe(__nr_neigh, node, node2, list) \ - hlist_for_each_entry_safe(__nr_neigh, node, node2, list, neigh_node) +#define nr_neigh_for_each_safe(__nr_neigh, node2, list) \ + hlist_for_each_entry_safe(__nr_neigh, node2, list, neigh_node) -#define nr_node_for_each(__nr_node, node, list) \ - hlist_for_each_entry(__nr_node, node, list, node_node) +#define nr_node_for_each(__nr_node, list) \ + hlist_for_each_entry(__nr_node, list, node_node) -#define nr_node_for_each_safe(__nr_node, node, node2, list) \ - hlist_for_each_entry_safe(__nr_node, node, node2, list, node_node) +#define nr_node_for_each_safe(__nr_node, node2, list) \ + hlist_for_each_entry_safe(__nr_node, node2, list, node_node) /*********************************************************************/ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 2761c905504e..f10818fc8804 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -339,11 +339,10 @@ static inline struct Qdisc_class_common * qdisc_class_find(const struct Qdisc_class_hash *hash, u32 id) { struct Qdisc_class_common *cl; - struct hlist_node *n; unsigned int h; h = qdisc_class_hash(id, hash->hashmask); - hlist_for_each_entry(cl, n, &hash->hash[h], hnode) { + hlist_for_each_entry(cl, &hash->hash[h], hnode) { if (cl->classid == id) return cl; } diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 7fdf298a47ef..df85a0c0f2d5 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -675,8 +675,8 @@ static inline int sctp_vtag_hashfn(__u16 lport, __u16 rport, __u32 vtag) return h & (sctp_assoc_hashsize - 1); } -#define sctp_for_each_hentry(epb, node, head) \ - hlist_for_each_entry(epb, node, head, node) +#define sctp_for_each_hentry(epb, head) \ + hlist_for_each_entry(epb, head, node) /* Is a socket of this style? */ #define sctp_style(sk, style) __sctp_style((sk), (SCTP_SOCKET_##style)) diff --git a/include/net/sock.h b/include/net/sock.h index a66caa223d18..14f6e9d19dc7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -606,24 +606,23 @@ static inline void sk_add_bind_node(struct sock *sk, hlist_add_head(&sk->sk_bind_node, list); } -#define sk_for_each(__sk, node, list) \ - hlist_for_each_entry(__sk, node, list, sk_node) -#define sk_for_each_rcu(__sk, node, list) \ - hlist_for_each_entry_rcu(__sk, node, list, sk_node) +#define sk_for_each(__sk, list) \ + hlist_for_each_entry(__sk, list, sk_node) +#define sk_for_each_rcu(__sk, list) \ + hlist_for_each_entry_rcu(__sk, list, sk_node) #define sk_nulls_for_each(__sk, node, list) \ hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node) #define sk_nulls_for_each_rcu(__sk, node, list) \ hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node) -#define sk_for_each_from(__sk, node) \ - if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ - hlist_for_each_entry_from(__sk, node, sk_node) +#define sk_for_each_from(__sk) \ + hlist_for_each_entry_from(__sk, sk_node) #define sk_nulls_for_each_from(__sk, node) \ if (__sk && ({ node = &(__sk)->sk_nulls_node; 1; })) \ hlist_nulls_for_each_entry_from(__sk, node, sk_nulls_node) -#define sk_for_each_safe(__sk, node, tmp, list) \ - hlist_for_each_entry_safe(__sk, node, tmp, list, sk_node) -#define sk_for_each_bound(__sk, node, list) \ - hlist_for_each_entry(__sk, node, list, sk_bind_node) +#define sk_for_each_safe(__sk, tmp, list) \ + hlist_for_each_entry_safe(__sk, tmp, list, sk_node) +#define sk_for_each_bound(__sk, list) \ + hlist_for_each_entry(__sk, list, sk_bind_node) static inline struct user_namespace *sk_user_ns(struct sock *sk) { diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 05c5e61f0a7c..9961726523d0 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -6,10 +6,61 @@ #include <linux/blktrace_api.h> #include <linux/blkdev.h> +#include <linux/buffer_head.h> #include <linux/tracepoint.h> #define RWBS_LEN 8 +DECLARE_EVENT_CLASS(block_buffer, + + TP_PROTO(struct buffer_head *bh), + + TP_ARGS(bh), + + TP_STRUCT__entry ( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( size_t, size ) + ), + + TP_fast_assign( + __entry->dev = bh->b_bdev->bd_dev; + __entry->sector = bh->b_blocknr; + __entry->size = bh->b_size; + ), + + TP_printk("%d,%d sector=%llu size=%zu", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->sector, __entry->size + ) +); + +/** + * block_touch_buffer - mark a buffer accessed + * @bh: buffer_head being touched + * + * Called from touch_buffer(). + */ +DEFINE_EVENT(block_buffer, block_touch_buffer, + + TP_PROTO(struct buffer_head *bh), + + TP_ARGS(bh) +); + +/** + * block_dirty_buffer - mark a buffer dirty + * @bh: buffer_head being dirtied + * + * Called from mark_buffer_dirty(). + */ +DEFINE_EVENT(block_buffer, block_dirty_buffer, + + TP_PROTO(struct buffer_head *bh), + + TP_ARGS(bh) +); + DECLARE_EVENT_CLASS(block_rq_with_error, TP_PROTO(struct request_queue *q, struct request *rq), @@ -206,7 +257,6 @@ TRACE_EVENT(block_bio_bounce, /** * block_bio_complete - completed all work on the block operation - * @q: queue holding the block operation * @bio: block operation completed * @error: io error value * @@ -215,9 +265,9 @@ TRACE_EVENT(block_bio_bounce, */ TRACE_EVENT(block_bio_complete, - TP_PROTO(struct request_queue *q, struct bio *bio, int error), + TP_PROTO(struct bio *bio, int error), - TP_ARGS(q, bio, error), + TP_ARGS(bio, error), TP_STRUCT__entry( __field( dev_t, dev ) @@ -228,7 +278,8 @@ TRACE_EVENT(block_bio_complete, ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; + __entry->dev = bio->bi_bdev ? + bio->bi_bdev->bd_dev : 0; __entry->sector = bio->bi_sector; __entry->nr_sector = bio->bi_size >> 9; __entry->error = error; @@ -241,11 +292,11 @@ TRACE_EVENT(block_bio_complete, __entry->nr_sector, __entry->error) ); -DECLARE_EVENT_CLASS(block_bio, +DECLARE_EVENT_CLASS(block_bio_merge, - TP_PROTO(struct request_queue *q, struct bio *bio), + TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), - TP_ARGS(q, bio), + TP_ARGS(q, rq, bio), TP_STRUCT__entry( __field( dev_t, dev ) @@ -272,31 +323,33 @@ DECLARE_EVENT_CLASS(block_bio, /** * block_bio_backmerge - merging block operation to the end of an existing operation * @q: queue holding operation + * @rq: request bio is being merged into * @bio: new block operation to merge * * Merging block request @bio to the end of an existing block request * in queue @q. */ -DEFINE_EVENT(block_bio, block_bio_backmerge, +DEFINE_EVENT(block_bio_merge, block_bio_backmerge, - TP_PROTO(struct request_queue *q, struct bio *bio), + TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), - TP_ARGS(q, bio) + TP_ARGS(q, rq, bio) ); /** * block_bio_frontmerge - merging block operation to the beginning of an existing operation * @q: queue holding operation + * @rq: request bio is being merged into * @bio: new block operation to merge * * Merging block IO operation @bio to the beginning of an existing block * operation in queue @q. */ -DEFINE_EVENT(block_bio, block_bio_frontmerge, +DEFINE_EVENT(block_bio_merge, block_bio_frontmerge, - TP_PROTO(struct request_queue *q, struct bio *bio), + TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), - TP_ARGS(q, bio) + TP_ARGS(q, rq, bio) ); /** @@ -306,11 +359,32 @@ DEFINE_EVENT(block_bio, block_bio_frontmerge, * * About to place the block IO operation @bio into queue @q. */ -DEFINE_EVENT(block_bio, block_bio_queue, +TRACE_EVENT(block_bio_queue, TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio) + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, RWBS_LEN ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) ); DECLARE_EVENT_CLASS(block_get_rq, diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index b453d92c2253..6a16fd2e70ed 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -32,6 +32,115 @@ struct wb_writeback_work; +TRACE_EVENT(writeback_dirty_page, + + TP_PROTO(struct page *page, struct address_space *mapping), + + TP_ARGS(page, mapping), + + TP_STRUCT__entry ( + __array(char, name, 32) + __field(unsigned long, ino) + __field(pgoff_t, index) + ), + + TP_fast_assign( + strncpy(__entry->name, + mapping ? dev_name(mapping->backing_dev_info->dev) : "(unknown)", 32); + __entry->ino = mapping ? mapping->host->i_ino : 0; + __entry->index = page->index; + ), + + TP_printk("bdi %s: ino=%lu index=%lu", + __entry->name, + __entry->ino, + __entry->index + ) +); + +DECLARE_EVENT_CLASS(writeback_dirty_inode_template, + + TP_PROTO(struct inode *inode, int flags), + + TP_ARGS(inode, flags), + + TP_STRUCT__entry ( + __array(char, name, 32) + __field(unsigned long, ino) + __field(unsigned long, flags) + ), + + TP_fast_assign( + struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info; + + /* may be called for files on pseudo FSes w/ unregistered bdi */ + strncpy(__entry->name, + bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32); + __entry->ino = inode->i_ino; + __entry->flags = flags; + ), + + TP_printk("bdi %s: ino=%lu flags=%s", + __entry->name, + __entry->ino, + show_inode_state(__entry->flags) + ) +); + +DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode_start, + + TP_PROTO(struct inode *inode, int flags), + + TP_ARGS(inode, flags) +); + +DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode, + + TP_PROTO(struct inode *inode, int flags), + + TP_ARGS(inode, flags) +); + +DECLARE_EVENT_CLASS(writeback_write_inode_template, + + TP_PROTO(struct inode *inode, struct writeback_control *wbc), + + TP_ARGS(inode, wbc), + + TP_STRUCT__entry ( + __array(char, name, 32) + __field(unsigned long, ino) + __field(int, sync_mode) + ), + + TP_fast_assign( + strncpy(__entry->name, + dev_name(inode->i_mapping->backing_dev_info->dev), 32); + __entry->ino = inode->i_ino; + __entry->sync_mode = wbc->sync_mode; + ), + + TP_printk("bdi %s: ino=%lu sync_mode=%d", + __entry->name, + __entry->ino, + __entry->sync_mode + ) +); + +DEFINE_EVENT(writeback_write_inode_template, writeback_write_inode_start, + + TP_PROTO(struct inode *inode, struct writeback_control *wbc), + + TP_ARGS(inode, wbc) +); + +DEFINE_EVENT(writeback_write_inode_template, writeback_write_inode, + + TP_PROTO(struct inode *inode, struct writeback_control *wbc), + + TP_ARGS(inode, wbc) +); + DECLARE_EVENT_CLASS(writeback_work_class, TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), TP_ARGS(bdi, work), @@ -479,6 +588,13 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, ) ); +DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode_start, + TP_PROTO(struct inode *inode, + struct writeback_control *wbc, + unsigned long nr_to_write), + TP_ARGS(inode, wbc, nr_to_write) +); + DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode, TP_PROTO(struct inode *inode, struct writeback_control *wbc, diff --git a/include/uapi/linux/ipmi.h b/include/uapi/linux/ipmi.h index 33fbc99b3812..7b26a62e5707 100644 --- a/include/uapi/linux/ipmi.h +++ b/include/uapi/linux/ipmi.h @@ -59,15 +59,7 @@ * if it becomes full and it is queried once a second to see if * anything is in it. Incoming commands to the driver will get * delivered as commands. - * - * This driver provides two main interfaces: one for in-kernel - * applications and another for userland applications. The - * capabilities are basically the same for both interface, although - * the interfaces are somewhat different. The stuff in the - * #ifdef __KERNEL__ below is the in-kernel interface. The userland - * interface is defined later in the file. */ - - + */ /* * This is an overlay for all the address types, so it's easy to diff --git a/include/uapi/linux/msdos_fs.h b/include/uapi/linux/msdos_fs.h index 996719f82e28..f055e58b3147 100644 --- a/include/uapi/linux/msdos_fs.h +++ b/include/uapi/linux/msdos_fs.h @@ -87,6 +87,8 @@ #define IS_FSINFO(x) (le32_to_cpu((x)->signature1) == FAT_FSINFO_SIG1 \ && le32_to_cpu((x)->signature2) == FAT_FSINFO_SIG2) +#define FAT_STATE_DIRTY 0x01 + struct __fat_dirent { long d_ino; __kernel_off_t d_off; @@ -120,14 +122,34 @@ struct fat_boot_sector { __le32 hidden; /* hidden sectors (unused) */ __le32 total_sect; /* number of sectors (if sectors == 0) */ - /* The following fields are only used by FAT32 */ - __le32 fat32_length; /* sectors/FAT */ - __le16 flags; /* bit 8: fat mirroring, low 4: active fat */ - __u8 version[2]; /* major, minor filesystem version */ - __le32 root_cluster; /* first cluster in root directory */ - __le16 info_sector; /* filesystem info sector */ - __le16 backup_boot; /* backup boot sector */ - __le16 reserved2[6]; /* Unused */ + union { + struct { + /* Extended BPB Fields for FAT16 */ + __u8 drive_number; /* Physical drive number */ + __u8 state; /* undocumented, but used + for mount state. */ + /* other fiealds are not added here */ + } fat16; + + struct { + /* only used by FAT32 */ + __le32 length; /* sectors/FAT */ + __le16 flags; /* bit 8: fat mirroring, + low 4: active fat */ + __u8 version[2]; /* major, minor filesystem + version */ + __le32 root_cluster; /* first cluster in + root directory */ + __le16 info_sector; /* filesystem info sector */ + __le16 backup_boot; /* backup boot sector */ + __le16 reserved2[6]; /* Unused */ + /* Extended BPB Fields for FAT32 */ + __u8 drive_number; /* Physical drive number */ + __u8 state; /* undocumented, but used + for mount state. */ + /* other fiealds are not added here */ + } fat32; + }; }; struct fat_boot_fsinfo { diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h index dfb514472cbc..4f52549b23ff 100644 --- a/include/uapi/linux/nbd.h +++ b/include/uapi/linux/nbd.h @@ -33,13 +33,14 @@ enum { NBD_CMD_READ = 0, NBD_CMD_WRITE = 1, NBD_CMD_DISC = 2, - /* there is a gap here to match userspace */ + NBD_CMD_FLUSH = 3, NBD_CMD_TRIM = 4 }; /* values for flags field */ #define NBD_FLAG_HAS_FLAGS (1 << 0) /* nbd-server supports flags */ #define NBD_FLAG_READ_ONLY (1 << 1) /* device is read-only */ +#define NBD_FLAG_SEND_FLUSH (1 << 2) /* can flush writeback cache */ /* there is a gap here to match userspace */ #define NBD_FLAG_SEND_TRIM (1 << 5) /* send trim/discard */ diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h index 26607bd965fa..e4629b93bdd6 100644 --- a/include/uapi/linux/xattr.h +++ b/include/uapi/linux/xattr.h @@ -15,19 +15,22 @@ /* Namespaces */ #define XATTR_OS2_PREFIX "os2." -#define XATTR_OS2_PREFIX_LEN (sizeof (XATTR_OS2_PREFIX) - 1) +#define XATTR_OS2_PREFIX_LEN (sizeof(XATTR_OS2_PREFIX) - 1) + +#define XATTR_MAC_OSX_PREFIX "osx." +#define XATTR_MAC_OSX_PREFIX_LEN (sizeof(XATTR_MAC_OSX_PREFIX) - 1) #define XATTR_SECURITY_PREFIX "security." -#define XATTR_SECURITY_PREFIX_LEN (sizeof (XATTR_SECURITY_PREFIX) - 1) +#define XATTR_SECURITY_PREFIX_LEN (sizeof(XATTR_SECURITY_PREFIX) - 1) #define XATTR_SYSTEM_PREFIX "system." -#define XATTR_SYSTEM_PREFIX_LEN (sizeof (XATTR_SYSTEM_PREFIX) - 1) +#define XATTR_SYSTEM_PREFIX_LEN (sizeof(XATTR_SYSTEM_PREFIX) - 1) #define XATTR_TRUSTED_PREFIX "trusted." -#define XATTR_TRUSTED_PREFIX_LEN (sizeof (XATTR_TRUSTED_PREFIX) - 1) +#define XATTR_TRUSTED_PREFIX_LEN (sizeof(XATTR_TRUSTED_PREFIX) - 1) #define XATTR_USER_PREFIX "user." -#define XATTR_USER_PREFIX_LEN (sizeof (XATTR_USER_PREFIX) - 1) +#define XATTR_USER_PREFIX_LEN (sizeof(XATTR_USER_PREFIX) - 1) /* Security namespace */ #define XATTR_EVM_SUFFIX "evm" diff --git a/ipc/util.c b/ipc/util.c index 74e1d9c7a98a..464a8abd779f 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -252,7 +252,7 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) { kuid_t euid; kgid_t egid; - int id, err; + int id; int next_id = ids->next_id; if (size > IPCMNI) @@ -261,17 +261,21 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) if (ids->in_use >= size) return -ENOSPC; + idr_preload(GFP_KERNEL); + spin_lock_init(&new->lock); new->deleted = 0; rcu_read_lock(); spin_lock(&new->lock); - err = idr_get_new_above(&ids->ipcs_idr, new, - (next_id < 0) ? 0 : ipcid_to_idx(next_id), &id); - if (err) { + id = idr_alloc(&ids->ipcs_idr, new, + (next_id < 0) ? 0 : ipcid_to_idx(next_id), 0, + GFP_NOWAIT); + idr_preload_end(); + if (id < 0) { spin_unlock(&new->lock); rcu_read_unlock(); - return err; + return id; } ids->in_use++; @@ -307,19 +311,10 @@ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids, struct ipc_ops *ops, struct ipc_params *params) { int err; -retry: - err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL); - - if (!err) - return -ENOMEM; down_write(&ids->rw_mutex); err = ops->getnew(ns, params); up_write(&ids->rw_mutex); - - if (err == -EAGAIN) - goto retry; - return err; } @@ -376,8 +371,6 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, struct kern_ipc_perm *ipcp; int flg = params->flg; int err; -retry: - err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL); /* * Take the lock as a writer since we are potentially going to add @@ -389,8 +382,6 @@ retry: /* key not used */ if (!(flg & IPC_CREAT)) err = -ENOENT; - else if (!err) - err = -ENOMEM; else err = ops->getnew(ns, params); } else { @@ -413,9 +404,6 @@ retry: } up_write(&ids->rw_mutex); - if (err == -EAGAIN) - goto retry; - return err; } diff --git a/kernel/Makefile b/kernel/Makefile index eceac38f3c65..bbde5f1a4486 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -7,7 +7,7 @@ obj-y = fork.o exec_domain.o panic.o printk.o \ sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o task_work.o \ rcupdate.o extable.o params.o posix-timers.o \ - kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ + kthread.o wait.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o cred.o \ async.o range.o groups.o lglock.o smpboot.o @@ -25,9 +25,7 @@ endif obj-y += sched/ obj-y += power/ -ifeq ($(CONFIG_CHECKPOINT_RESTORE),y) -obj-$(CONFIG_X86) += kcmp.o -endif +obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o obj-$(CONFIG_FREEZER) += freezer.o obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_STACKTRACE) += stacktrace.o @@ -127,11 +125,19 @@ $(obj)/config_data.h: $(obj)/config_data.gz FORCE $(obj)/time.o: $(obj)/timeconst.h -quiet_cmd_timeconst = TIMEC $@ - cmd_timeconst = $(PERL) $< $(CONFIG_HZ) > $@ +quiet_cmd_hzfile = HZFILE $@ + cmd_hzfile = echo "hz=$(CONFIG_HZ)" > $@ + +targets += hz.bc +$(obj)/hz.bc: $(objtree)/include/config/hz.h FORCE + $(call if_changed,hzfile) + +quiet_cmd_bc = BC $@ + cmd_bc = bc -q $(filter-out FORCE,$^) > $@ + targets += timeconst.h -$(obj)/timeconst.h: $(src)/timeconst.pl FORCE - $(call if_changed,timeconst) +$(obj)/timeconst.h: $(obj)/hz.bc $(src)/timeconst.bc FORCE + $(call if_changed,bc) ifeq ($(CONFIG_MODULE_SIG),y) # diff --git a/kernel/cgroup.c b/kernel/cgroup.c index fb2fb11fbb25..a32f9432666c 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -554,7 +554,6 @@ static struct css_set *find_existing_css_set( { int i; struct cgroupfs_root *root = cgrp->root; - struct hlist_node *node; struct css_set *cg; unsigned long key; @@ -577,7 +576,7 @@ static struct css_set *find_existing_css_set( } key = css_set_hash(template); - hash_for_each_possible(css_set_table, cg, node, hlist, key) { + hash_for_each_possible(css_set_table, cg, hlist, key) { if (!compare_css_sets(cg, oldcg, cgrp, template)) continue; @@ -1611,7 +1610,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, struct cgroupfs_root *existing_root; const struct cred *cred; int i; - struct hlist_node *node; struct css_set *cg; BUG_ON(sb->s_root != NULL); @@ -1666,7 +1664,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, /* Link the top cgroup in this hierarchy into all * the css_set objects */ write_lock(&css_set_lock); - hash_for_each(css_set_table, i, node, cg, hlist) + hash_for_each(css_set_table, i, cg, hlist) link_css_set(&tmp_cg_links, cg, root_cgrp); write_unlock(&css_set_lock); @@ -4493,7 +4491,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) { struct cgroup_subsys_state *css; int i, ret; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; struct css_set *cg; unsigned long key; @@ -4561,7 +4559,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) * this is all done under the css_set_lock. */ write_lock(&css_set_lock); - hash_for_each_safe(css_set_table, i, node, tmp, cg, hlist) { + hash_for_each_safe(css_set_table, i, tmp, cg, hlist) { /* skip entries that we already rehashed */ if (cg->subsys[ss->subsys_id]) continue; @@ -4571,7 +4569,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) cg->subsys[ss->subsys_id] = css; /* recompute hash and restore entry */ key = css_set_hash(cg->subsys); - hash_add(css_set_table, node, key); + hash_add(css_set_table, &cg->hlist, key); } write_unlock(&css_set_lock); @@ -4618,10 +4616,8 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) offline_css(ss, dummytop); ss->active = 0; - if (ss->use_id) { - idr_remove_all(&ss->idr); + if (ss->use_id) idr_destroy(&ss->idr); - } /* deassign the subsys_id */ subsys[ss->subsys_id] = NULL; @@ -5322,7 +5318,7 @@ EXPORT_SYMBOL_GPL(free_css_id); static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth) { struct css_id *newid; - int myid, error, size; + int ret, size; BUG_ON(!ss->use_id); @@ -5330,35 +5326,24 @@ static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth) newid = kzalloc(size, GFP_KERNEL); if (!newid) return ERR_PTR(-ENOMEM); - /* get id */ - if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) { - error = -ENOMEM; - goto err_out; - } + + idr_preload(GFP_KERNEL); spin_lock(&ss->id_lock); /* Don't use 0. allocates an ID of 1-65535 */ - error = idr_get_new_above(&ss->idr, newid, 1, &myid); + ret = idr_alloc(&ss->idr, newid, 1, CSS_ID_MAX + 1, GFP_NOWAIT); spin_unlock(&ss->id_lock); + idr_preload_end(); /* Returns error when there are no free spaces for new ID.*/ - if (error) { - error = -ENOSPC; + if (ret < 0) goto err_out; - } - if (myid > CSS_ID_MAX) - goto remove_idr; - newid->id = myid; + newid->id = ret; newid->depth = depth; return newid; -remove_idr: - error = -ENOSPC; - spin_lock(&ss->id_lock); - idr_remove(&ss->idr, myid); - spin_unlock(&ss->id_lock); err_out: kfree(newid); - return ERR_PTR(error); + return ERR_PTR(ret); } diff --git a/kernel/events/core.c b/kernel/events/core.c index ccc457e36354..b0cd86501c30 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5126,7 +5126,6 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, { struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); struct perf_event *event; - struct hlist_node *node; struct hlist_head *head; rcu_read_lock(); @@ -5134,7 +5133,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, if (!head) goto end; - hlist_for_each_entry_rcu(event, node, head, hlist_entry) { + hlist_for_each_entry_rcu(event, head, hlist_entry) { if (perf_swevent_match(event, type, event_id, data, regs)) perf_swevent_event(event, nr, data, regs); } @@ -5419,7 +5418,6 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, { struct perf_sample_data data; struct perf_event *event; - struct hlist_node *node; struct perf_raw_record raw = { .size = entry_size, @@ -5429,7 +5427,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, perf_sample_data_init(&data, addr, 0); data.raw = &raw; - hlist_for_each_entry_rcu(event, node, head, hlist_entry) { + hlist_for_each_entry_rcu(event, head, hlist_entry) { if (perf_tp_event_match(event, &data, regs)) perf_swevent_event(event, count, &data, regs); } @@ -5965,13 +5963,9 @@ int perf_pmu_register(struct pmu *pmu, char *name, int type) pmu->name = name; if (type < 0) { - int err = idr_pre_get(&pmu_idr, GFP_KERNEL); - if (!err) - goto free_pdc; - - err = idr_get_new_above(&pmu_idr, pmu, PERF_TYPE_MAX, &type); - if (err) { - ret = err; + type = idr_alloc(&pmu_idr, pmu, PERF_TYPE_MAX, 0, GFP_KERNEL); + if (type < 0) { + ret = type; goto free_pdc; } } diff --git a/kernel/exit.c b/kernel/exit.c index 7dd20408707c..51e485ca9935 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -20,6 +20,7 @@ #include <linux/tsacct_kern.h> #include <linux/file.h> #include <linux/fdtable.h> +#include <linux/freezer.h> #include <linux/binfmts.h> #include <linux/nsproxy.h> #include <linux/pid_namespace.h> @@ -31,7 +32,6 @@ #include <linux/mempolicy.h> #include <linux/taskstats_kern.h> #include <linux/delayacct.h> -#include <linux/freezer.h> #include <linux/cgroup.h> #include <linux/syscalls.h> #include <linux/signal.h> @@ -485,7 +485,7 @@ static void exit_mm(struct task_struct * tsk) set_task_state(tsk, TASK_UNINTERRUPTIBLE); if (!self.task) /* see coredump_finish() */ break; - schedule(); + freezable_schedule(); } __set_task_state(tsk, TASK_RUNNING); down_read(&mm->mmap_sem); @@ -835,7 +835,7 @@ void do_exit(long code) /* * Make sure we are holding no locks: */ - debug_check_no_locks_held(tsk); + debug_check_no_locks_held(); /* * We can do this unlocked here. The futex code uses this flag * just to verify whether the pi state cleanup has been done diff --git a/kernel/fork.c b/kernel/fork.c index 8f62b2a0f120..8d932b1c9056 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1861,10 +1861,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) exit_sem(current); } - if (new_nsproxy) { + if (new_nsproxy) switch_task_namespaces(current, new_nsproxy); - new_nsproxy = NULL; - } task_lock(current); @@ -1894,9 +1892,6 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) } } - if (new_nsproxy) - put_nsproxy(new_nsproxy); - bad_unshare_cleanup_cred: if (new_cred) put_cred(new_cred); diff --git a/kernel/kexec.c b/kernel/kexec.c index 2436ffcec91f..bddd3d7a74b6 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -229,6 +229,8 @@ out: } +static void kimage_free_page_list(struct list_head *list); + static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry, unsigned long nr_segments, struct kexec_segment __user *segments) @@ -242,8 +244,6 @@ static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry, if (result) goto out; - *rimage = image; - /* * Find a location for the control code buffer, and add it * the vector of segments so that it's pages will also be @@ -254,22 +254,22 @@ static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry, get_order(KEXEC_CONTROL_PAGE_SIZE)); if (!image->control_code_page) { printk(KERN_ERR "Could not allocate control_code_buffer\n"); - goto out; + goto out_free; } image->swap_page = kimage_alloc_control_pages(image, 0); if (!image->swap_page) { printk(KERN_ERR "Could not allocate swap buffer\n"); - goto out; + goto out_free; } - result = 0; - out: - if (result == 0) - *rimage = image; - else - kfree(image); + *rimage = image; + return 0; +out_free: + kimage_free_page_list(&image->control_pages); + kfree(image); +out: return result; } @@ -316,7 +316,7 @@ static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry, mend = mstart + image->segment[i].memsz - 1; /* Ensure we are within the crash kernel limits */ if ((mstart < crashk_res.start) || (mend > crashk_res.end)) - goto out; + goto out_free; } /* @@ -329,16 +329,15 @@ static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry, get_order(KEXEC_CONTROL_PAGE_SIZE)); if (!image->control_code_page) { printk(KERN_ERR "Could not allocate control_code_buffer\n"); - goto out; + goto out_free; } - result = 0; -out: - if (result == 0) - *rimage = image; - else - kfree(image); + *rimage = image; + return 0; +out_free: + kfree(image); +out: return result; } @@ -503,8 +502,6 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image, if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT) break; - if (hole_end > crashk_res.end) - break; /* See if I overlap any of the segments */ for (i = 0; i < image->nr_segments; i++) { unsigned long mstart, mend; @@ -1514,6 +1511,8 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_OFFSET(page, _count); VMCOREINFO_OFFSET(page, mapping); VMCOREINFO_OFFSET(page, lru); + VMCOREINFO_OFFSET(page, _mapcount); + VMCOREINFO_OFFSET(page, private); VMCOREINFO_OFFSET(pglist_data, node_zones); VMCOREINFO_OFFSET(pglist_data, nr_zones); #ifdef CONFIG_FLAT_NODE_MEM_MAP @@ -1536,6 +1535,11 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_NUMBER(PG_lru); VMCOREINFO_NUMBER(PG_private); VMCOREINFO_NUMBER(PG_swapcache); + VMCOREINFO_NUMBER(PG_slab); +#ifdef CONFIG_MEMORY_FAILURE + VMCOREINFO_NUMBER(PG_hwpoison); +#endif + VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); arch_crash_save_vmcoreinfo(); update_vmcoreinfo_note(); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 550294d58a02..e35be53f6613 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -334,11 +334,10 @@ static inline void reset_kprobe_instance(void) struct kprobe __kprobes *get_kprobe(void *addr) { struct hlist_head *head; - struct hlist_node *node; struct kprobe *p; head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]; - hlist_for_each_entry_rcu(p, node, head, hlist) { + hlist_for_each_entry_rcu(p, head, hlist) { if (p->addr == addr) return p; } @@ -799,7 +798,6 @@ out: static void __kprobes optimize_all_kprobes(void) { struct hlist_head *head; - struct hlist_node *node; struct kprobe *p; unsigned int i; @@ -810,7 +808,7 @@ static void __kprobes optimize_all_kprobes(void) kprobes_allow_optimization = true; for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; - hlist_for_each_entry_rcu(p, node, head, hlist) + hlist_for_each_entry_rcu(p, head, hlist) if (!kprobe_disabled(p)) optimize_kprobe(p); } @@ -821,7 +819,6 @@ static void __kprobes optimize_all_kprobes(void) static void __kprobes unoptimize_all_kprobes(void) { struct hlist_head *head; - struct hlist_node *node; struct kprobe *p; unsigned int i; @@ -832,7 +829,7 @@ static void __kprobes unoptimize_all_kprobes(void) kprobes_allow_optimization = false; for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; - hlist_for_each_entry_rcu(p, node, head, hlist) { + hlist_for_each_entry_rcu(p, head, hlist) { if (!kprobe_disabled(p)) unoptimize_kprobe(p, false); } @@ -1148,7 +1145,7 @@ void __kprobes kprobe_flush_task(struct task_struct *tk) { struct kretprobe_instance *ri; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; unsigned long hash, flags = 0; if (unlikely(!kprobes_initialized)) @@ -1159,12 +1156,12 @@ void __kprobes kprobe_flush_task(struct task_struct *tk) hash = hash_ptr(tk, KPROBE_HASH_BITS); head = &kretprobe_inst_table[hash]; kretprobe_table_lock(hash, &flags); - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task == tk) recycle_rp_inst(ri, &empty_rp); } kretprobe_table_unlock(hash, &flags); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } @@ -1173,9 +1170,9 @@ void __kprobes kprobe_flush_task(struct task_struct *tk) static inline void free_rp_inst(struct kretprobe *rp) { struct kretprobe_instance *ri; - struct hlist_node *pos, *next; + struct hlist_node *next; - hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, hlist) { + hlist_for_each_entry_safe(ri, next, &rp->free_instances, hlist) { hlist_del(&ri->hlist); kfree(ri); } @@ -1185,14 +1182,14 @@ static void __kprobes cleanup_rp_inst(struct kretprobe *rp) { unsigned long flags, hash; struct kretprobe_instance *ri; - struct hlist_node *pos, *next; + struct hlist_node *next; struct hlist_head *head; /* No race here */ for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) { kretprobe_table_lock(hash, &flags); head = &kretprobe_inst_table[hash]; - hlist_for_each_entry_safe(ri, pos, next, head, hlist) { + hlist_for_each_entry_safe(ri, next, head, hlist) { if (ri->rp == rp) ri->rp = NULL; } @@ -2028,7 +2025,6 @@ static int __kprobes kprobes_module_callback(struct notifier_block *nb, { struct module *mod = data; struct hlist_head *head; - struct hlist_node *node; struct kprobe *p; unsigned int i; int checkcore = (val == MODULE_STATE_GOING); @@ -2045,7 +2041,7 @@ static int __kprobes kprobes_module_callback(struct notifier_block *nb, mutex_lock(&kprobe_mutex); for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; - hlist_for_each_entry_rcu(p, node, head, hlist) + hlist_for_each_entry_rcu(p, head, hlist) if (within_module_init((unsigned long)p->addr, mod) || (checkcore && within_module_core((unsigned long)p->addr, mod))) { @@ -2192,7 +2188,6 @@ static void __kprobes kprobe_seq_stop(struct seq_file *f, void *v) static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v) { struct hlist_head *head; - struct hlist_node *node; struct kprobe *p, *kp; const char *sym = NULL; unsigned int i = *(loff_t *) v; @@ -2201,7 +2196,7 @@ static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v) head = &kprobe_table[i]; preempt_disable(); - hlist_for_each_entry_rcu(p, node, head, hlist) { + hlist_for_each_entry_rcu(p, head, hlist) { sym = kallsyms_lookup((unsigned long)p->addr, NULL, &offset, &modname, namebuf); if (kprobe_aggrprobe(p)) { @@ -2236,7 +2231,6 @@ static const struct file_operations debugfs_kprobes_operations = { static void __kprobes arm_all_kprobes(void) { struct hlist_head *head; - struct hlist_node *node; struct kprobe *p; unsigned int i; @@ -2249,7 +2243,7 @@ static void __kprobes arm_all_kprobes(void) /* Arming kprobes doesn't optimize kprobe itself */ for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; - hlist_for_each_entry_rcu(p, node, head, hlist) + hlist_for_each_entry_rcu(p, head, hlist) if (!kprobe_disabled(p)) arm_kprobe(p); } @@ -2265,7 +2259,6 @@ already_enabled: static void __kprobes disarm_all_kprobes(void) { struct hlist_head *head; - struct hlist_node *node; struct kprobe *p; unsigned int i; @@ -2282,7 +2275,7 @@ static void __kprobes disarm_all_kprobes(void) for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; - hlist_for_each_entry_rcu(p, node, head, hlist) { + hlist_for_each_entry_rcu(p, head, hlist) { if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) disarm_kprobe(p, false); } diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 8a0efac4f99d..259db207b5d9 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -4088,7 +4088,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) } EXPORT_SYMBOL_GPL(debug_check_no_locks_freed); -static void print_held_locks_bug(struct task_struct *curr) +static void print_held_locks_bug(void) { if (!debug_locks_off()) return; @@ -4097,22 +4097,21 @@ static void print_held_locks_bug(struct task_struct *curr) printk("\n"); printk("=====================================\n"); - printk("[ BUG: lock held at task exit time! ]\n"); + printk("[ BUG: %s/%d still has locks held! ]\n", + current->comm, task_pid_nr(current)); print_kernel_ident(); printk("-------------------------------------\n"); - printk("%s/%d is exiting with locks still held!\n", - curr->comm, task_pid_nr(curr)); - lockdep_print_held_locks(curr); - + lockdep_print_held_locks(current); printk("\nstack backtrace:\n"); dump_stack(); } -void debug_check_no_locks_held(struct task_struct *task) +void debug_check_no_locks_held(void) { - if (unlikely(task->lockdep_depth > 0)) - print_held_locks_bug(task); + if (unlikely(current->lockdep_depth > 0)) + print_held_locks_bug(); } +EXPORT_SYMBOL_GPL(debug_check_no_locks_held); void debug_show_all_locks(void) { diff --git a/kernel/pid.c b/kernel/pid.c index f2c6a6825098..047dc6264638 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -350,10 +350,9 @@ void disable_pid_allocation(struct pid_namespace *ns) struct pid *find_pid_ns(int nr, struct pid_namespace *ns) { - struct hlist_node *elem; struct upid *pnr; - hlist_for_each_entry_rcu(pnr, elem, + hlist_for_each_entry_rcu(pnr, &pid_hash[pid_hashfn(nr, ns)], pid_chain) if (pnr->nr == nr && pnr->ns == ns) return container_of(pnr, struct pid, diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 7edfe4b901e7..6edbb2c55c22 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -552,24 +552,22 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, return -EAGAIN; spin_lock_init(&new_timer->it_lock); - retry: - if (unlikely(!idr_pre_get(&posix_timers_id, GFP_KERNEL))) { - error = -EAGAIN; - goto out; - } + + idr_preload(GFP_KERNEL); spin_lock_irq(&idr_lock); - error = idr_get_new(&posix_timers_id, new_timer, &new_timer_id); + error = idr_alloc(&posix_timers_id, new_timer, 0, 0, GFP_NOWAIT); spin_unlock_irq(&idr_lock); - if (error) { - if (error == -EAGAIN) - goto retry; + idr_preload_end(); + if (error < 0) { /* * Weird looking, but we return EAGAIN if the IDR is * full (proper POSIX return value for this) */ - error = -EAGAIN; + if (error == -ENOSPC) + error = -EAGAIN; goto out; } + new_timer_id = error; it_id_set = IT_ID_SET; new_timer->it_id = (timer_t) new_timer_id; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2b5243176aba..7f12624a393c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1752,9 +1752,8 @@ EXPORT_SYMBOL_GPL(preempt_notifier_unregister); static void fire_sched_in_preempt_notifiers(struct task_struct *curr) { struct preempt_notifier *notifier; - struct hlist_node *node; - hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) + hlist_for_each_entry(notifier, &curr->preempt_notifiers, link) notifier->ops->sched_in(notifier, raw_smp_processor_id()); } @@ -1763,9 +1762,8 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr, struct task_struct *next) { struct preempt_notifier *notifier; - struct hlist_node *node; - hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) + hlist_for_each_entry(notifier, &curr->preempt_notifiers, link) notifier->ops->sched_out(notifier, next); } @@ -3260,7 +3258,8 @@ void complete_all(struct completion *x) EXPORT_SYMBOL(complete_all); static inline long __sched -do_wait_for_common(struct completion *x, long timeout, int state) +do_wait_for_common(struct completion *x, + long (*action)(long), long timeout, int state) { if (!x->done) { DECLARE_WAITQUEUE(wait, current); @@ -3273,7 +3272,7 @@ do_wait_for_common(struct completion *x, long timeout, int state) } __set_current_state(state); spin_unlock_irq(&x->wait.lock); - timeout = schedule_timeout(timeout); + timeout = action(timeout); spin_lock_irq(&x->wait.lock); } while (!x->done && timeout); __remove_wait_queue(&x->wait, &wait); @@ -3284,17 +3283,30 @@ do_wait_for_common(struct completion *x, long timeout, int state) return timeout ?: 1; } -static long __sched -wait_for_common(struct completion *x, long timeout, int state) +static inline long __sched +__wait_for_common(struct completion *x, + long (*action)(long), long timeout, int state) { might_sleep(); spin_lock_irq(&x->wait.lock); - timeout = do_wait_for_common(x, timeout, state); + timeout = do_wait_for_common(x, action, timeout, state); spin_unlock_irq(&x->wait.lock); return timeout; } +static long __sched +wait_for_common(struct completion *x, long timeout, int state) +{ + return __wait_for_common(x, schedule_timeout, timeout, state); +} + +static long __sched +wait_for_common_io(struct completion *x, long timeout, int state) +{ + return __wait_for_common(x, io_schedule_timeout, timeout, state); +} + /** * wait_for_completion: - waits for completion of a task * @x: holds the state of this particular completion @@ -3331,6 +3343,39 @@ wait_for_completion_timeout(struct completion *x, unsigned long timeout) EXPORT_SYMBOL(wait_for_completion_timeout); /** + * wait_for_completion_io: - waits for completion of a task + * @x: holds the state of this particular completion + * + * This waits to be signaled for completion of a specific task. It is NOT + * interruptible and there is no timeout. The caller is accounted as waiting + * for IO. + */ +void __sched wait_for_completion_io(struct completion *x) +{ + wait_for_common_io(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE); +} +EXPORT_SYMBOL(wait_for_completion_io); + +/** + * wait_for_completion_io_timeout: - waits for completion of a task (w/timeout) + * @x: holds the state of this particular completion + * @timeout: timeout value in jiffies + * + * This waits for either a completion of a specific task to be signaled or for a + * specified timeout to expire. The timeout is in jiffies. It is not + * interruptible. The caller is accounted as waiting for IO. + * + * The return value is 0 if timed out, and positive (at least 1, or number of + * jiffies left till timeout) if completed. + */ +unsigned long __sched +wait_for_completion_io_timeout(struct completion *x, unsigned long timeout) +{ + return wait_for_common_io(x, timeout, TASK_UNINTERRUPTIBLE); +} +EXPORT_SYMBOL(wait_for_completion_io_timeout); + +/** * wait_for_completion_interruptible: - waits for completion of a task (w/intr) * @x: holds the state of this particular completion * diff --git a/kernel/signal.c b/kernel/signal.c index 2a7ae2963185..2676aac4103d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1157,11 +1157,11 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t, static void print_fatal_signal(int signr) { struct pt_regs *regs = signal_pt_regs(); - printk("%s/%d: potentially unexpected fatal signal %d.\n", + printk(KERN_INFO "%s/%d: potentially unexpected fatal signal %d.\n", current->comm, task_pid_nr(current), signr); #if defined(__i386__) && !defined(__arch_um__) - printk("code at %08lx: ", regs->ip); + printk(KERN_INFO "code at %08lx: ", regs->ip); { int i; for (i = 0; i < 16; i++) { @@ -1169,11 +1169,11 @@ static void print_fatal_signal(int signr) if (get_user(insn, (unsigned char *)(regs->ip + i))) break; - printk("%02x ", insn); + printk(KERN_CONT "%02x ", insn); } } + printk(KERN_CONT "\n"); #endif - printk("\n"); preempt_disable(); show_regs(regs); preempt_enable(); @@ -2996,7 +2996,8 @@ static int do_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t *info) /* Not even root can pretend to send signals from the kernel. * Nor can they impersonate a kill()/tgkill(), which adds source info. */ - if (info->si_code >= 0 || info->si_code == SI_TKILL) { + if ((info->si_code >= 0 || info->si_code == SI_TKILL) && + (task_pid_vnr(current) != pid)) { /* We used to allow any < 0 si_code */ WARN_ON_ONCE(info->si_code < 0); return -EPERM; @@ -3045,7 +3046,8 @@ static int do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info) /* Not even root can pretend to send signals from the kernel. * Nor can they impersonate a kill()/tgkill(), which adds source info. */ - if (info->si_code >= 0 || info->si_code == SI_TKILL) { + if (((info->si_code >= 0 || info->si_code == SI_TKILL)) && + (task_pid_vnr(current) != pid)) { /* We used to allow any < 0 si_code */ WARN_ON_ONCE(info->si_code < 0); return -EPERM; diff --git a/kernel/smpboot.c b/kernel/smpboot.c index d4abac261779..b9bde5727829 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -131,7 +131,7 @@ static int smpboot_thread_fn(void *data) continue; } - BUG_ON(td->cpu != smp_processor_id()); + //BUG_ON(td->cpu != smp_processor_id()); /* Check for state change setup */ switch (td->status) { diff --git a/kernel/sys.c b/kernel/sys.c index e10566bee399..81f56445fba9 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2185,11 +2185,6 @@ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; -static void argv_cleanup(struct subprocess_info *info) -{ - argv_free(info->argv); -} - static int __orderly_poweroff(void) { int argc; @@ -2209,9 +2204,8 @@ static int __orderly_poweroff(void) } ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_WAIT_EXEC, - NULL, argv_cleanup, NULL); - if (ret == -ENOMEM) - argv_free(argv); + NULL, NULL, NULL); + argv_free(argv); return ret; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d8df00e69c14..d1b4ee67d2df 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2095,7 +2095,7 @@ int proc_dointvec_minmax(struct ctl_table *table, int write, static void validate_coredump_safety(void) { #ifdef CONFIG_COREDUMP - if (suid_dumpable == SUID_DUMPABLE_SAFE && + if (suid_dumpable == SUID_DUMP_ROOT && core_pattern[0] != '/' && core_pattern[0] != '|') { printk(KERN_WARNING "Unsafe core_pattern used with "\ "suid_dumpable=2. Pipe handler or fully qualified "\ diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index b25115e8c7f3..ebf72358e86a 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -1171,9 +1171,10 @@ static ssize_t bin_dn_node_address(struct file *file, /* Convert the decnet address to binary */ result = -EIO; - nodep = strchr(buf, '.') + 1; + nodep = strchr(buf, '.'); if (!nodep) goto out; + ++nodep; area = simple_strtoul(buf, NULL, 10); node = simple_strtoul(nodep, NULL, 10); diff --git a/kernel/timeconst.bc b/kernel/timeconst.bc new file mode 100644 index 000000000000..511bdf2cafda --- /dev/null +++ b/kernel/timeconst.bc @@ -0,0 +1,108 @@ +scale=0 + +define gcd(a,b) { + auto t; + while (b) { + t = b; + b = a % b; + a = t; + } + return a; +} + +/* Division by reciprocal multiplication. */ +define fmul(b,n,d) { + return (2^b*n+d-1)/d; +} + +/* Adjustment factor when a ceiling value is used. Use as: + (imul * n) + (fmulxx * n + fadjxx) >> xx) */ +define fadj(b,n,d) { + auto v; + d = d/gcd(n,d); + v = 2^b*(d-1)/d; + return v; +} + +/* Compute the appropriate mul/adj values as well as a shift count, + which brings the mul value into the range 2^b-1 <= x < 2^b. Such + a shift value will be correct in the signed integer range and off + by at most one in the upper half of the unsigned range. */ +define fmuls(b,n,d) { + auto s, m; + for (s = 0; 1; s++) { + m = fmul(s,n,d); + if (m >= 2^(b-1)) + return s; + } + return 0; +} + +define timeconst(hz) { + print "/* Automatically generated by kernel/timeconst.bc */\n" + print "/* Time conversion constants for HZ == ", hz, " */\n" + print "\n" + + print "#ifndef KERNEL_TIMECONST_H\n" + print "#define KERNEL_TIMECONST_H\n\n" + + print "#include <linux/param.h>\n" + print "#include <linux/types.h>\n\n" + + print "#if HZ != ", hz, "\n" + print "#error \qkernel/timeconst.h has the wrong HZ value!\q\n" + print "#endif\n\n" + + if (hz < 2) { + print "#error Totally bogus HZ value!\n" + } else { + s=fmuls(32,1000,hz) + obase=16 + print "#define HZ_TO_MSEC_MUL32\tU64_C(0x", fmul(s,1000,hz), ")\n" + print "#define HZ_TO_MSEC_ADJ32\tU64_C(0x", fadj(s,1000,hz), ")\n" + obase=10 + print "#define HZ_TO_MSEC_SHR32\t", s, "\n" + + s=fmuls(32,hz,1000) + obase=16 + print "#define MSEC_TO_HZ_MUL32\tU64_C(0x", fmul(s,hz,1000), ")\n" + print "#define MSEC_TO_HZ_ADJ32\tU64_C(0x", fadj(s,hz,1000), ")\n" + obase=10 + print "#define MSEC_TO_HZ_SHR32\t", s, "\n" + + obase=10 + cd=gcd(hz,1000) + print "#define HZ_TO_MSEC_NUM\t\t", 1000/cd, "\n" + print "#define HZ_TO_MSEC_DEN\t\t", hz/cd, "\n" + print "#define MSEC_TO_HZ_NUM\t\t", hz/cd, "\n" + print "#define MSEC_TO_HZ_DEN\t\t", 1000/cd, "\n" + print "\n" + + s=fmuls(32,1000000,hz) + obase=16 + print "#define HZ_TO_USEC_MUL32\tU64_C(0x", fmul(s,1000000,hz), ")\n" + print "#define HZ_TO_USEC_ADJ32\tU64_C(0x", fadj(s,1000000,hz), ")\n" + obase=10 + print "#define HZ_TO_USEC_SHR32\t", s, "\n" + + s=fmuls(32,hz,1000000) + obase=16 + print "#define USEC_TO_HZ_MUL32\tU64_C(0x", fmul(s,hz,1000000), ")\n" + print "#define USEC_TO_HZ_ADJ32\tU64_C(0x", fadj(s,hz,1000000), ")\n" + obase=10 + print "#define USEC_TO_HZ_SHR32\t", s, "\n" + + obase=10 + cd=gcd(hz,1000000) + print "#define HZ_TO_USEC_NUM\t\t", 1000000/cd, "\n" + print "#define HZ_TO_USEC_DEN\t\t", hz/cd, "\n" + print "#define USEC_TO_HZ_NUM\t\t", hz/cd, "\n" + print "#define USEC_TO_HZ_DEN\t\t", 1000000/cd, "\n" + print "\n" + + print "#endif /* KERNEL_TIMECONST_H */\n" + } + halt +} + +timeconst(hz) diff --git a/kernel/timeconst.pl b/kernel/timeconst.pl deleted file mode 100644 index 3f42652a6a37..000000000000 --- a/kernel/timeconst.pl +++ /dev/null @@ -1,376 +0,0 @@ -#!/usr/bin/perl -# ----------------------------------------------------------------------- -# -# Copyright 2007-2008 rPath, Inc. - All Rights Reserved -# -# This file is part of the Linux kernel, and is made available under -# the terms of the GNU General Public License version 2 or (at your -# option) any later version; incorporated herein by reference. -# -# ----------------------------------------------------------------------- -# - -# -# Usage: timeconst.pl HZ > timeconst.h -# - -# Precomputed values for systems without Math::BigInt -# Generated by: -# timeconst.pl --can 24 32 48 64 100 122 128 200 250 256 300 512 1000 1024 1200 -%canned_values = ( - 24 => [ - '0xa6aaaaab','0x2aaaaaa',26, - 125,3, - '0xc49ba5e4','0x1fbe76c8b4',37, - 3,125, - '0xa2c2aaab','0xaaaa',16, - 125000,3, - '0xc9539b89','0x7fffbce4217d',47, - 3,125000, - ], 32 => [ - '0xfa000000','0x6000000',27, - 125,4, - '0x83126e98','0xfdf3b645a',36, - 4,125, - '0xf4240000','0x0',17, - 31250,1, - '0x8637bd06','0x3fff79c842fa',46, - 1,31250, - ], 48 => [ - '0xa6aaaaab','0x6aaaaaa',27, - 125,6, - '0xc49ba5e4','0xfdf3b645a',36, - 6,125, - '0xa2c2aaab','0x15555',17, - 62500,3, - '0xc9539b89','0x3fffbce4217d',46, - 3,62500, - ], 64 => [ - '0xfa000000','0xe000000',28, - 125,8, - '0x83126e98','0x7ef9db22d',35, - 8,125, - '0xf4240000','0x0',18, - 15625,1, - '0x8637bd06','0x1fff79c842fa',45, - 1,15625, - ], 100 => [ - '0xa0000000','0x0',28, - 10,1, - '0xcccccccd','0x733333333',35, - 1,10, - '0x9c400000','0x0',18, - 10000,1, - '0xd1b71759','0x1fff2e48e8a7',45, - 1,10000, - ], 122 => [ - '0x8325c53f','0xfbcda3a',28, - 500,61, - '0xf9db22d1','0x7fbe76c8b',35, - 61,500, - '0x8012e2a0','0x3ef36',18, - 500000,61, - '0xffda4053','0x1ffffbce4217',45, - 61,500000, - ], 128 => [ - '0xfa000000','0x1e000000',29, - 125,16, - '0x83126e98','0x3f7ced916',34, - 16,125, - '0xf4240000','0x40000',19, - 15625,2, - '0x8637bd06','0xfffbce4217d',44, - 2,15625, - ], 200 => [ - '0xa0000000','0x0',29, - 5,1, - '0xcccccccd','0x333333333',34, - 1,5, - '0x9c400000','0x0',19, - 5000,1, - '0xd1b71759','0xfff2e48e8a7',44, - 1,5000, - ], 250 => [ - '0x80000000','0x0',29, - 4,1, - '0x80000000','0x180000000',33, - 1,4, - '0xfa000000','0x0',20, - 4000,1, - '0x83126e98','0x7ff7ced9168',43, - 1,4000, - ], 256 => [ - '0xfa000000','0x3e000000',30, - 125,32, - '0x83126e98','0x1fbe76c8b',33, - 32,125, - '0xf4240000','0xc0000',20, - 15625,4, - '0x8637bd06','0x7ffde7210be',43, - 4,15625, - ], 300 => [ - '0xd5555556','0x2aaaaaaa',30, - 10,3, - '0x9999999a','0x1cccccccc',33, - 3,10, - '0xd0555556','0xaaaaa',20, - 10000,3, - '0x9d495183','0x7ffcb923a29',43, - 3,10000, - ], 512 => [ - '0xfa000000','0x7e000000',31, - 125,64, - '0x83126e98','0xfdf3b645',32, - 64,125, - '0xf4240000','0x1c0000',21, - 15625,8, - '0x8637bd06','0x3ffef39085f',42, - 8,15625, - ], 1000 => [ - '0x80000000','0x0',31, - 1,1, - '0x80000000','0x0',31, - 1,1, - '0xfa000000','0x0',22, - 1000,1, - '0x83126e98','0x1ff7ced9168',41, - 1,1000, - ], 1024 => [ - '0xfa000000','0xfe000000',32, - 125,128, - '0x83126e98','0x7ef9db22',31, - 128,125, - '0xf4240000','0x3c0000',22, - 15625,16, - '0x8637bd06','0x1fff79c842f',41, - 16,15625, - ], 1200 => [ - '0xd5555556','0xd5555555',32, - 5,6, - '0x9999999a','0x66666666',31, - 6,5, - '0xd0555556','0x2aaaaa',22, - 2500,3, - '0x9d495183','0x1ffcb923a29',41, - 3,2500, - ] -); - -$has_bigint = eval 'use Math::BigInt qw(bgcd); 1;'; - -sub bint($) -{ - my($x) = @_; - return Math::BigInt->new($x); -} - -# -# Constants for division by reciprocal multiplication. -# (bits, numerator, denominator) -# -sub fmul($$$) -{ - my ($b,$n,$d) = @_; - - $n = bint($n); - $d = bint($d); - - return scalar (($n << $b)+$d-bint(1))/$d; -} - -sub fadj($$$) -{ - my($b,$n,$d) = @_; - - $n = bint($n); - $d = bint($d); - - $d = $d/bgcd($n, $d); - return scalar (($d-bint(1)) << $b)/$d; -} - -sub fmuls($$$) { - my($b,$n,$d) = @_; - my($s,$m); - my($thres) = bint(1) << ($b-1); - - $n = bint($n); - $d = bint($d); - - for ($s = 0; 1; $s++) { - $m = fmul($s,$n,$d); - return $s if ($m >= $thres); - } - return 0; -} - -# Generate a hex value if the result fits in 64 bits; -# otherwise skip. -sub bignum_hex($) { - my($x) = @_; - my $s = $x->as_hex(); - - return (length($s) > 18) ? undef : $s; -} - -# Provides mul, adj, and shr factors for a specific -# (bit, time, hz) combination -sub muladj($$$) { - my($b, $t, $hz) = @_; - my $s = fmuls($b, $t, $hz); - my $m = fmul($s, $t, $hz); - my $a = fadj($s, $t, $hz); - return (bignum_hex($m), bignum_hex($a), $s); -} - -# Provides numerator, denominator values -sub numden($$) { - my($n, $d) = @_; - my $g = bgcd($n, $d); - return ($n/$g, $d/$g); -} - -# All values for a specific (time, hz) combo -sub conversions($$) { - my ($t, $hz) = @_; - my @val = (); - - # HZ_TO_xx - push(@val, muladj(32, $t, $hz)); - push(@val, numden($t, $hz)); - - # xx_TO_HZ - push(@val, muladj(32, $hz, $t)); - push(@val, numden($hz, $t)); - - return @val; -} - -sub compute_values($) { - my($hz) = @_; - my @val = (); - my $s, $m, $a, $g; - - if (!$has_bigint) { - die "$0: HZ == $hz not canned and ". - "Math::BigInt not available\n"; - } - - # MSEC conversions - push(@val, conversions(1000, $hz)); - - # USEC conversions - push(@val, conversions(1000000, $hz)); - - return @val; -} - -sub outputval($$) -{ - my($name, $val) = @_; - my $csuf; - - if (defined($val)) { - if ($name !~ /SHR/) { - $val = "U64_C($val)"; - } - printf "#define %-23s %s\n", $name.$csuf, $val.$csuf; - } -} - -sub output($@) -{ - my($hz, @val) = @_; - my $pfx, $bit, $suf, $s, $m, $a; - - print "/* Automatically generated by kernel/timeconst.pl */\n"; - print "/* Conversion constants for HZ == $hz */\n"; - print "\n"; - print "#ifndef KERNEL_TIMECONST_H\n"; - print "#define KERNEL_TIMECONST_H\n"; - print "\n"; - - print "#include <linux/param.h>\n"; - print "#include <linux/types.h>\n"; - - print "\n"; - print "#if HZ != $hz\n"; - print "#error \"kernel/timeconst.h has the wrong HZ value!\"\n"; - print "#endif\n"; - print "\n"; - - foreach $pfx ('HZ_TO_MSEC','MSEC_TO_HZ', - 'HZ_TO_USEC','USEC_TO_HZ') { - foreach $bit (32) { - foreach $suf ('MUL', 'ADJ', 'SHR') { - outputval("${pfx}_$suf$bit", shift(@val)); - } - } - foreach $suf ('NUM', 'DEN') { - outputval("${pfx}_$suf", shift(@val)); - } - } - - print "\n"; - print "#endif /* KERNEL_TIMECONST_H */\n"; -} - -# Pretty-print Perl values -sub perlvals(@) { - my $v; - my @l = (); - - foreach $v (@_) { - if (!defined($v)) { - push(@l, 'undef'); - } elsif ($v =~ /^0x/) { - push(@l, "\'".$v."\'"); - } else { - push(@l, $v.''); - } - } - return join(',', @l); -} - -($hz) = @ARGV; - -# Use this to generate the %canned_values structure -if ($hz eq '--can') { - shift(@ARGV); - @hzlist = sort {$a <=> $b} (@ARGV); - - print "# Precomputed values for systems without Math::BigInt\n"; - print "# Generated by:\n"; - print "# timeconst.pl --can ", join(' ', @hzlist), "\n"; - print "\%canned_values = (\n"; - my $pf = "\t"; - foreach $hz (@hzlist) { - my @values = compute_values($hz); - print "$pf$hz => [\n"; - while (scalar(@values)) { - my $bit; - foreach $bit (32) { - my $m = shift(@values); - my $a = shift(@values); - my $s = shift(@values); - print "\t\t", perlvals($m,$a,$s), ",\n"; - } - my $n = shift(@values); - my $d = shift(@values); - print "\t\t", perlvals($n,$d), ",\n"; - } - print "\t]"; - $pf = ', '; - } - print "\n);\n"; -} else { - $hz += 0; # Force to number - if ($hz < 1) { - die "Usage: $0 HZ\n"; - } - - $cv = $canned_values{$hz}; - @val = defined($cv) ? @$cv : compute_values($hz); - output($hz, @val); -} -exit 0; diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 71259e2b6b61..9e5b8c272eec 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -739,6 +739,12 @@ static void blk_add_trace_rq_complete(void *ignore, struct request_queue *q, struct request *rq) { + struct blk_trace *bt = q->blk_trace; + + /* if control ever passes through here, it's a request based driver */ + if (unlikely(bt && !bt->rq_based)) + bt->rq_based = true; + blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); } @@ -774,15 +780,30 @@ static void blk_add_trace_bio_bounce(void *ignore, blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); } -static void blk_add_trace_bio_complete(void *ignore, - struct request_queue *q, struct bio *bio, - int error) +static void blk_add_trace_bio_complete(void *ignore, struct bio *bio, int error) { + struct request_queue *q; + struct blk_trace *bt; + + if (!bio->bi_bdev) + return; + + q = bdev_get_queue(bio->bi_bdev); + bt = q->blk_trace; + + /* + * Request based drivers will generate both rq and bio completions. + * Ignore bio ones. + */ + if (likely(!bt) || bt->rq_based) + return; + blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); } static void blk_add_trace_bio_backmerge(void *ignore, struct request_queue *q, + struct request *rq, struct bio *bio) { blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0); @@ -790,6 +811,7 @@ static void blk_add_trace_bio_backmerge(void *ignore, static void blk_add_trace_bio_frontmerge(void *ignore, struct request_queue *q, + struct request *rq, struct bio *bio) { blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 98ca94a41819..ab25b88aae56 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -762,7 +762,6 @@ ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip) { struct ftrace_profile *rec; struct hlist_head *hhd; - struct hlist_node *n; unsigned long key; key = hash_long(ip, ftrace_profile_bits); @@ -771,7 +770,7 @@ ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip) if (hlist_empty(hhd)) return NULL; - hlist_for_each_entry_rcu(rec, n, hhd, node) { + hlist_for_each_entry_rcu(rec, hhd, node) { if (rec->ip == ip) return rec; } @@ -1133,7 +1132,6 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) unsigned long key; struct ftrace_func_entry *entry; struct hlist_head *hhd; - struct hlist_node *n; if (ftrace_hash_empty(hash)) return NULL; @@ -1145,7 +1143,7 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) hhd = &hash->buckets[key]; - hlist_for_each_entry_rcu(entry, n, hhd, hlist) { + hlist_for_each_entry_rcu(entry, hhd, hlist) { if (entry->ip == ip) return entry; } @@ -1202,7 +1200,7 @@ remove_hash_entry(struct ftrace_hash *hash, static void ftrace_hash_clear(struct ftrace_hash *hash) { struct hlist_head *hhd; - struct hlist_node *tp, *tn; + struct hlist_node *tn; struct ftrace_func_entry *entry; int size = 1 << hash->size_bits; int i; @@ -1212,7 +1210,7 @@ static void ftrace_hash_clear(struct ftrace_hash *hash) for (i = 0; i < size; i++) { hhd = &hash->buckets[i]; - hlist_for_each_entry_safe(entry, tp, tn, hhd, hlist) + hlist_for_each_entry_safe(entry, tn, hhd, hlist) free_hash_entry(hash, entry); } FTRACE_WARN_ON(hash->count); @@ -1275,7 +1273,6 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) { struct ftrace_func_entry *entry; struct ftrace_hash *new_hash; - struct hlist_node *tp; int size; int ret; int i; @@ -1290,7 +1287,7 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) size = 1 << hash->size_bits; for (i = 0; i < size; i++) { - hlist_for_each_entry(entry, tp, &hash->buckets[i], hlist) { + hlist_for_each_entry(entry, &hash->buckets[i], hlist) { ret = add_hash_entry(new_hash, entry->ip); if (ret < 0) goto free_hash; @@ -1316,7 +1313,7 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, struct ftrace_hash **dst, struct ftrace_hash *src) { struct ftrace_func_entry *entry; - struct hlist_node *tp, *tn; + struct hlist_node *tn; struct hlist_head *hhd; struct ftrace_hash *old_hash; struct ftrace_hash *new_hash; @@ -1362,7 +1359,7 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, size = 1 << src->size_bits; for (i = 0; i < size; i++) { hhd = &src->buckets[i]; - hlist_for_each_entry_safe(entry, tp, tn, hhd, hlist) { + hlist_for_each_entry_safe(entry, tn, hhd, hlist) { if (bits > 0) key = hash_long(entry->ip, bits); else @@ -2901,7 +2898,6 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, { struct ftrace_func_probe *entry; struct hlist_head *hhd; - struct hlist_node *n; unsigned long key; key = hash_long(ip, FTRACE_HASH_BITS); @@ -2917,7 +2913,7 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, * on the hash. rcu_read_lock is too dangerous here. */ preempt_disable_notrace(); - hlist_for_each_entry_rcu(entry, n, hhd, node) { + hlist_for_each_entry_rcu(entry, hhd, node) { if (entry->ip == ip) entry->ops->func(ip, parent_ip, &entry->data); } @@ -3068,7 +3064,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, void *data, int flags) { struct ftrace_func_probe *entry; - struct hlist_node *n, *tmp; + struct hlist_node *tmp; char str[KSYM_SYMBOL_LEN]; int type = MATCH_FULL; int i, len = 0; @@ -3091,7 +3087,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { struct hlist_head *hhd = &ftrace_func_hash[i]; - hlist_for_each_entry_safe(entry, n, tmp, hhd, node) { + hlist_for_each_entry_safe(entry, tmp, hhd, node) { /* break up if statements for readability */ if ((flags & PROBE_TEST_FUNC) && entry->ops != ops) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 194d79602dc7..697e88d13907 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -739,12 +739,11 @@ static int task_state_char(unsigned long state) struct trace_event *ftrace_find_event(int type) { struct trace_event *event; - struct hlist_node *n; unsigned key; key = type & (EVENT_HASHSIZE - 1); - hlist_for_each_entry(event, n, &event_hash[key], node) { + hlist_for_each_entry(event, &event_hash[key], node) { if (event->type == type) return event; } diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index d96ba22dabfa..0c05a4592047 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -192,12 +192,11 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, static struct tracepoint_entry *get_tracepoint(const char *name) { struct hlist_head *head; - struct hlist_node *node; struct tracepoint_entry *e; u32 hash = jhash(name, strlen(name), 0); head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; - hlist_for_each_entry(e, node, head, hlist) { + hlist_for_each_entry(e, head, hlist) { if (!strcmp(name, e->name)) return e; } @@ -211,13 +210,12 @@ static struct tracepoint_entry *get_tracepoint(const char *name) static struct tracepoint_entry *add_tracepoint(const char *name) { struct hlist_head *head; - struct hlist_node *node; struct tracepoint_entry *e; size_t name_len = strlen(name) + 1; u32 hash = jhash(name, name_len-1, 0); head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; - hlist_for_each_entry(e, node, head, hlist) { + hlist_for_each_entry(e, head, hlist) { if (!strcmp(name, e->name)) { printk(KERN_NOTICE "tracepoint %s busy\n", name); diff --git a/kernel/user-return-notifier.c b/kernel/user-return-notifier.c index 1744bb80f1fb..394f70b17162 100644 --- a/kernel/user-return-notifier.c +++ b/kernel/user-return-notifier.c @@ -34,11 +34,11 @@ EXPORT_SYMBOL_GPL(user_return_notifier_unregister); void fire_user_return_notifiers(void) { struct user_return_notifier *urn; - struct hlist_node *tmp1, *tmp2; + struct hlist_node *tmp2; struct hlist_head *head; head = &get_cpu_var(return_notifier_list); - hlist_for_each_entry_safe(urn, tmp1, tmp2, head, link) + hlist_for_each_entry_safe(urn, tmp2, head, link) urn->on_user_return(urn); put_cpu_var(return_notifier_list); } diff --git a/kernel/user.c b/kernel/user.c index 57ebfd42023c..e81978e8c03b 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -105,9 +105,8 @@ static void uid_hash_remove(struct user_struct *up) static struct user_struct *uid_hash_find(kuid_t uid, struct hlist_head *hashent) { struct user_struct *user; - struct hlist_node *h; - hlist_for_each_entry(user, h, hashent, uidhash_node) { + hlist_for_each_entry(user, hashent, uidhash_node) { if (uid_eq(user->uid, uid)) { atomic_inc(&user->__count); return user; diff --git a/kernel/utsname.c b/kernel/utsname.c index 08b197e8c485..a47fc5de3113 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -30,7 +30,7 @@ static struct uts_namespace *create_uts_ns(void) /* * Clone a new ns copying an original utsname, setting refcount to 1 * @old_ns: namespace to clone - * Return NULL on error (failure to kmalloc), new ns otherwise + * Return ERR_PTR(-ENOMEM) on error (failure to kmalloc), new ns otherwise */ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns, struct uts_namespace *old_ns) diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index 63da38c2d820..4f69f9a5e221 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c @@ -15,6 +15,8 @@ #include <linux/sysctl.h> #include <linux/wait.h> +#ifdef CONFIG_PROC_SYSCTL + static void *get_uts(ctl_table *table, int write) { char *which = table->data; @@ -38,7 +40,6 @@ static void put_uts(ctl_table *table, int write, void *which) up_write(&uts_sem); } -#ifdef CONFIG_PROC_SYSCTL /* * Special case of dostring for the UTS structure. This has locks * to observe. Should this be in kernel/sys.c ???? diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f4feacad3812..81f2457811eb 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -251,8 +251,8 @@ EXPORT_SYMBOL_GPL(system_freezable_wq); for ((pool) = &std_worker_pools(cpu)[0]; \ (pool) < &std_worker_pools(cpu)[NR_STD_WORKER_POOLS]; (pool)++) -#define for_each_busy_worker(worker, i, pos, pool) \ - hash_for_each(pool->busy_hash, i, pos, worker, hentry) +#define for_each_busy_worker(worker, i, pool) \ + hash_for_each(pool->busy_hash, i, worker, hentry) static inline int __next_wq_cpu(int cpu, const struct cpumask *mask, unsigned int sw) @@ -909,9 +909,8 @@ static struct worker *find_worker_executing_work(struct worker_pool *pool, struct work_struct *work) { struct worker *worker; - struct hlist_node *tmp; - hash_for_each_possible(pool->busy_hash, worker, tmp, hentry, + hash_for_each_possible(pool->busy_hash, worker, hentry, (unsigned long)work) if (worker->current_work == work && worker->current_func == work->func) @@ -1626,7 +1625,6 @@ static void busy_worker_rebind_fn(struct work_struct *work) static void rebind_workers(struct worker_pool *pool) { struct worker *worker, *n; - struct hlist_node *pos; int i; lockdep_assert_held(&pool->assoc_mutex); @@ -1648,7 +1646,7 @@ static void rebind_workers(struct worker_pool *pool) } /* rebind busy workers */ - for_each_busy_worker(worker, i, pos, pool) { + for_each_busy_worker(worker, i, pool) { struct work_struct *rebind_work = &worker->rebind_work; struct workqueue_struct *wq; @@ -3423,7 +3421,6 @@ static void wq_unbind_fn(struct work_struct *work) int cpu = smp_processor_id(); struct worker_pool *pool; struct worker *worker; - struct hlist_node *pos; int i; for_each_std_worker_pool(pool, cpu) { @@ -3442,7 +3439,7 @@ static void wq_unbind_fn(struct work_struct *work) list_for_each_entry(worker, &pool->idle_list, entry) worker->flags |= WORKER_UNBOUND; - for_each_busy_worker(worker, i, pos, pool) + for_each_busy_worker(worker, i, pool) worker->flags |= WORKER_UNBOUND; pool->flags |= POOL_DISASSOCIATED; diff --git a/lib/Makefile b/lib/Makefile index 02ed6c04cd7d..d7946ff75b2e 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -23,7 +23,7 @@ lib-y += kobject.o klist.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o \ - bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o + bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o obj-y += kstrtox.o obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o diff --git a/lib/debugobjects.c b/lib/debugobjects.c index d11808ca4bc4..37061ede8b81 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -109,11 +109,10 @@ static void fill_pool(void) */ static struct debug_obj *lookup_object(void *addr, struct debug_bucket *b) { - struct hlist_node *node; struct debug_obj *obj; int cnt = 0; - hlist_for_each_entry(obj, node, &b->list, node) { + hlist_for_each_entry(obj, &b->list, node) { cnt++; if (obj->object == addr) return obj; @@ -213,7 +212,7 @@ static void free_object(struct debug_obj *obj) static void debug_objects_oom(void) { struct debug_bucket *db = obj_hash; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; HLIST_HEAD(freelist); struct debug_obj *obj; unsigned long flags; @@ -227,7 +226,7 @@ static void debug_objects_oom(void) raw_spin_unlock_irqrestore(&db->lock, flags); /* Now free them */ - hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) { + hlist_for_each_entry_safe(obj, tmp, &freelist, node) { hlist_del(&obj->node); free_object(obj); } @@ -658,7 +657,7 @@ debug_object_active_state(void *addr, struct debug_obj_descr *descr, static void __debug_check_no_obj_freed(const void *address, unsigned long size) { unsigned long flags, oaddr, saddr, eaddr, paddr, chunks; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; HLIST_HEAD(freelist); struct debug_obj_descr *descr; enum debug_obj_state state; @@ -678,7 +677,7 @@ static void __debug_check_no_obj_freed(const void *address, unsigned long size) repeat: cnt = 0; raw_spin_lock_irqsave(&db->lock, flags); - hlist_for_each_entry_safe(obj, node, tmp, &db->list, node) { + hlist_for_each_entry_safe(obj, tmp, &db->list, node) { cnt++; oaddr = (unsigned long) obj->object; if (oaddr < saddr || oaddr >= eaddr) @@ -702,7 +701,7 @@ repeat: raw_spin_unlock_irqrestore(&db->lock, flags); /* Now free them */ - hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) { + hlist_for_each_entry_safe(obj, tmp, &freelist, node) { hlist_del(&obj->node); free_object(obj); } @@ -1013,7 +1012,7 @@ void __init debug_objects_early_init(void) static int __init debug_objects_replace_static_objects(void) { struct debug_bucket *db = obj_hash; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; struct debug_obj *obj, *new; HLIST_HEAD(objects); int i, cnt = 0; @@ -1033,7 +1032,7 @@ static int __init debug_objects_replace_static_objects(void) local_irq_disable(); /* Remove the statically allocated objects from the pool */ - hlist_for_each_entry_safe(obj, node, tmp, &obj_pool, node) + hlist_for_each_entry_safe(obj, tmp, &obj_pool, node) hlist_del(&obj->node); /* Move the allocated objects to the pool */ hlist_move_list(&objects, &obj_pool); @@ -1042,7 +1041,7 @@ static int __init debug_objects_replace_static_objects(void) for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) { hlist_move_list(&db->list, &objects); - hlist_for_each_entry(obj, node, &objects, node) { + hlist_for_each_entry(obj, &objects, node) { new = hlist_entry(obj_pool.first, typeof(*obj), node); hlist_del(&new->node); /* copy object data */ @@ -1057,7 +1056,7 @@ static int __init debug_objects_replace_static_objects(void) obj_pool_used); return 0; free: - hlist_for_each_entry_safe(obj, node, tmp, &objects, node) { + hlist_for_each_entry_safe(obj, tmp, &objects, node) { hlist_del(&obj->node); kmem_cache_free(obj_cache, obj); } diff --git a/lib/devres.c b/lib/devres.c index 88ad75952a76..823533138fa0 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -227,6 +227,7 @@ void devm_ioport_unmap(struct device *dev, void __iomem *addr) devm_ioport_map_match, (void *)addr)); } EXPORT_SYMBOL(devm_ioport_unmap); +#endif /* CONFIG_HAS_IOPORT */ #ifdef CONFIG_PCI /* @@ -432,4 +433,3 @@ void pcim_iounmap_regions(struct pci_dev *pdev, int mask) } EXPORT_SYMBOL(pcim_iounmap_regions); #endif /* CONFIG_PCI */ -#endif /* CONFIG_HAS_IOPORT */ diff --git a/lib/idr.c b/lib/idr.c index 648239079dd2..73f4d53c02f3 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -35,10 +35,41 @@ #include <linux/string.h> #include <linux/idr.h> #include <linux/spinlock.h> +#include <linux/percpu.h> +#include <linux/hardirq.h> + +#define MAX_IDR_SHIFT (sizeof(int) * 8 - 1) +#define MAX_IDR_BIT (1U << MAX_IDR_SHIFT) + +/* Leave the possibility of an incomplete final layer */ +#define MAX_IDR_LEVEL ((MAX_IDR_SHIFT + IDR_BITS - 1) / IDR_BITS) + +/* Number of id_layer structs to leave in free list */ +#define MAX_IDR_FREE (MAX_IDR_LEVEL * 2) static struct kmem_cache *idr_layer_cache; +static DEFINE_PER_CPU(struct idr_layer *, idr_preload_head); +static DEFINE_PER_CPU(int, idr_preload_cnt); static DEFINE_SPINLOCK(simple_ida_lock); +/* the maximum ID which can be allocated given idr->layers */ +static int idr_max(int layers) +{ + int bits = min_t(int, layers * IDR_BITS, MAX_IDR_SHIFT); + + return (1 << bits) - 1; +} + +/* + * Prefix mask for an idr_layer at @layer. For layer 0, the prefix mask is + * all bits except for the lower IDR_BITS. For layer 1, 2 * IDR_BITS, and + * so on. + */ +static int idr_layer_prefix_mask(int layer) +{ + return ~idr_max(layer + 1); +} + static struct idr_layer *get_from_free_list(struct idr *idp) { struct idr_layer *p; @@ -54,6 +85,50 @@ static struct idr_layer *get_from_free_list(struct idr *idp) return(p); } +/** + * idr_layer_alloc - allocate a new idr_layer + * @gfp_mask: allocation mask + * @layer_idr: optional idr to allocate from + * + * If @layer_idr is %NULL, directly allocate one using @gfp_mask or fetch + * one from the per-cpu preload buffer. If @layer_idr is not %NULL, fetch + * an idr_layer from @idr->id_free. + * + * @layer_idr is to maintain backward compatibility with the old alloc + * interface - idr_pre_get() and idr_get_new*() - and will be removed + * together with per-pool preload buffer. + */ +static struct idr_layer *idr_layer_alloc(gfp_t gfp_mask, struct idr *layer_idr) +{ + struct idr_layer *new; + + /* this is the old path, bypass to get_from_free_list() */ + if (layer_idr) + return get_from_free_list(layer_idr); + + /* try to allocate directly from kmem_cache */ + new = kmem_cache_zalloc(idr_layer_cache, gfp_mask); + if (new) + return new; + + /* + * Try to fetch one from the per-cpu preload buffer if in process + * context. See idr_preload() for details. + */ + if (in_interrupt()) + return NULL; + + preempt_disable(); + new = __this_cpu_read(idr_preload_head); + if (new) { + __this_cpu_write(idr_preload_head, new->ary[0]); + __this_cpu_dec(idr_preload_cnt); + new->ary[0] = NULL; + } + preempt_enable(); + return new; +} + static void idr_layer_rcu_free(struct rcu_head *head) { struct idr_layer *layer; @@ -62,8 +137,10 @@ static void idr_layer_rcu_free(struct rcu_head *head) kmem_cache_free(idr_layer_cache, layer); } -static inline void free_layer(struct idr_layer *p) +static inline void free_layer(struct idr *idr, struct idr_layer *p) { + if (idr->hint && idr->hint == p) + RCU_INIT_POINTER(idr->hint, NULL); call_rcu(&p->rcu_head, idr_layer_rcu_free); } @@ -92,18 +169,18 @@ static void idr_mark_full(struct idr_layer **pa, int id) struct idr_layer *p = pa[0]; int l = 0; - __set_bit(id & IDR_MASK, &p->bitmap); + __set_bit(id & IDR_MASK, p->bitmap); /* * If this layer is full mark the bit in the layer above to * show that this part of the radix tree is full. This may * complete the layer above and require walking up the radix * tree. */ - while (p->bitmap == IDR_FULL) { + while (bitmap_full(p->bitmap, IDR_SIZE)) { if (!(p = pa[++l])) break; id = id >> IDR_BITS; - __set_bit((id & IDR_MASK), &p->bitmap); + __set_bit((id & IDR_MASK), p->bitmap); } } @@ -133,12 +210,29 @@ int idr_pre_get(struct idr *idp, gfp_t gfp_mask) } EXPORT_SYMBOL(idr_pre_get); -static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) +/** + * sub_alloc - try to allocate an id without growing the tree depth + * @idp: idr handle + * @starting_id: id to start search at + * @id: pointer to the allocated handle + * @pa: idr_layer[MAX_IDR_LEVEL] used as backtrack buffer + * @gfp_mask: allocation mask for idr_layer_alloc() + * @layer_idr: optional idr passed to idr_layer_alloc() + * + * Allocate an id in range [@starting_id, INT_MAX] from @idp without + * growing its depth. Returns + * + * the allocated id >= 0 if successful, + * -EAGAIN if the tree needs to grow for allocation to succeed, + * -ENOSPC if the id space is exhausted, + * -ENOMEM if more idr_layers need to be allocated. + */ +static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa, + gfp_t gfp_mask, struct idr *layer_idr) { int n, m, sh; struct idr_layer *p, *new; int l, id, oid; - unsigned long bm; id = *starting_id; restart: @@ -150,8 +244,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) * We run around this while until we reach the leaf node... */ n = (id >> (IDR_BITS*l)) & IDR_MASK; - bm = ~p->bitmap; - m = find_next_bit(&bm, IDR_SIZE, n); + m = find_next_zero_bit(p->bitmap, IDR_SIZE, n); if (m == IDR_SIZE) { /* no space available go back to previous layer. */ l++; @@ -161,7 +254,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) /* if already at the top layer, we need to grow */ if (id >= 1 << (idp->layers * IDR_BITS)) { *starting_id = id; - return IDR_NEED_TO_GROW; + return -EAGAIN; } p = pa[l]; BUG_ON(!p); @@ -180,17 +273,18 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) id = ((id >> sh) ^ n ^ m) << sh; } if ((id >= MAX_IDR_BIT) || (id < 0)) - return IDR_NOMORE_SPACE; + return -ENOSPC; if (l == 0) break; /* * Create the layer below if it is missing. */ if (!p->ary[m]) { - new = get_from_free_list(idp); + new = idr_layer_alloc(gfp_mask, layer_idr); if (!new) - return -1; + return -ENOMEM; new->layer = l-1; + new->prefix = id & idr_layer_prefix_mask(new->layer); rcu_assign_pointer(p->ary[m], new); p->count++; } @@ -203,7 +297,8 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) } static int idr_get_empty_slot(struct idr *idp, int starting_id, - struct idr_layer **pa) + struct idr_layer **pa, gfp_t gfp_mask, + struct idr *layer_idr) { struct idr_layer *p, *new; int layers, v, id; @@ -214,8 +309,8 @@ build_up: p = idp->top; layers = idp->layers; if (unlikely(!p)) { - if (!(p = get_from_free_list(idp))) - return -1; + if (!(p = idr_layer_alloc(gfp_mask, layer_idr))) + return -ENOMEM; p->layer = 0; layers = 1; } @@ -223,7 +318,7 @@ build_up: * Add a new layer to the top of the tree if the requested * id is larger than the currently allocated space. */ - while ((layers < (MAX_IDR_LEVEL - 1)) && (id >= (1 << (layers*IDR_BITS)))) { + while (id > idr_max(layers)) { layers++; if (!p->count) { /* special case: if the tree is currently empty, @@ -231,9 +326,10 @@ build_up: * upwards. */ p->layer++; + WARN_ON_ONCE(p->prefix); continue; } - if (!(new = get_from_free_list(idp))) { + if (!(new = idr_layer_alloc(gfp_mask, layer_idr))) { /* * The allocation failed. If we built part of * the structure tear it down. @@ -242,45 +338,42 @@ build_up: for (new = p; p && p != idp->top; new = p) { p = p->ary[0]; new->ary[0] = NULL; - new->bitmap = new->count = 0; + new->count = 0; + bitmap_clear(new->bitmap, 0, IDR_SIZE); __move_to_free_list(idp, new); } spin_unlock_irqrestore(&idp->lock, flags); - return -1; + return -ENOMEM; } new->ary[0] = p; new->count = 1; new->layer = layers-1; - if (p->bitmap == IDR_FULL) - __set_bit(0, &new->bitmap); + new->prefix = id & idr_layer_prefix_mask(new->layer); + if (bitmap_full(p->bitmap, IDR_SIZE)) + __set_bit(0, new->bitmap); p = new; } rcu_assign_pointer(idp->top, p); idp->layers = layers; - v = sub_alloc(idp, &id, pa); - if (v == IDR_NEED_TO_GROW) + v = sub_alloc(idp, &id, pa, gfp_mask, layer_idr); + if (v == -EAGAIN) goto build_up; return(v); } -static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id) +/* + * @id and @pa are from a successful allocation from idr_get_empty_slot(). + * Install the user pointer @ptr and mark the slot full. + */ +static void idr_fill_slot(struct idr *idr, void *ptr, int id, + struct idr_layer **pa) { - struct idr_layer *pa[MAX_IDR_LEVEL]; - int id; - - id = idr_get_empty_slot(idp, starting_id, pa); - if (id >= 0) { - /* - * Successfully found an empty slot. Install the user - * pointer and mark the slot full. - */ - rcu_assign_pointer(pa[0]->ary[id & IDR_MASK], - (struct idr_layer *)ptr); - pa[0]->count++; - idr_mark_full(pa, id); - } + /* update hint used for lookup, cleared from free_layer() */ + rcu_assign_pointer(idr->hint, pa[0]); - return id; + rcu_assign_pointer(pa[0]->ary[id & IDR_MASK], (struct idr_layer *)ptr); + pa[0]->count++; + idr_mark_full(pa, id); } /** @@ -303,49 +396,124 @@ static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id) */ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) { + struct idr_layer *pa[MAX_IDR_LEVEL + 1]; int rv; - rv = idr_get_new_above_int(idp, ptr, starting_id); - /* - * This is a cheap hack until the IDR code can be fixed to - * return proper error values. - */ + rv = idr_get_empty_slot(idp, starting_id, pa, 0, idp); if (rv < 0) - return _idr_rc_to_errno(rv); + return rv == -ENOMEM ? -EAGAIN : rv; + + idr_fill_slot(idp, ptr, rv, pa); *id = rv; return 0; } EXPORT_SYMBOL(idr_get_new_above); /** - * idr_get_new - allocate new idr entry - * @idp: idr handle - * @ptr: pointer you want associated with the id - * @id: pointer to the allocated handle + * idr_preload - preload for idr_alloc() + * @gfp_mask: allocation mask to use for preloading * - * If allocation from IDR's private freelist fails, idr_get_new_above() will - * return %-EAGAIN. The caller should retry the idr_pre_get() call to refill - * IDR's preallocation and then retry the idr_get_new_above() call. + * Preload per-cpu layer buffer for idr_alloc(). Can only be used from + * process context and each idr_preload() invocation should be matched with + * idr_preload_end(). Note that preemption is disabled while preloaded. * - * If the idr is full idr_get_new_above() will return %-ENOSPC. + * The first idr_alloc() in the preloaded section can be treated as if it + * were invoked with @gfp_mask used for preloading. This allows using more + * permissive allocation masks for idrs protected by spinlocks. + * + * For example, if idr_alloc() below fails, the failure can be treated as + * if idr_alloc() were called with GFP_KERNEL rather than GFP_NOWAIT. + * + * idr_preload(GFP_KERNEL); + * spin_lock(lock); + * + * id = idr_alloc(idr, ptr, start, end, GFP_NOWAIT); * - * @id returns a value in the range %0 ... %0x7fffffff + * spin_unlock(lock); + * idr_preload_end(); + * if (id < 0) + * error; */ -int idr_get_new(struct idr *idp, void *ptr, int *id) +void idr_preload(gfp_t gfp_mask) { - int rv; + /* + * Consuming preload buffer from non-process context breaks preload + * allocation guarantee. Disallow usage from those contexts. + */ + WARN_ON_ONCE(in_interrupt()); + might_sleep_if(gfp_mask & __GFP_WAIT); + + preempt_disable(); - rv = idr_get_new_above_int(idp, ptr, 0); /* - * This is a cheap hack until the IDR code can be fixed to - * return proper error values. + * idr_alloc() is likely to succeed w/o full idr_layer buffer and + * return value from idr_alloc() needs to be checked for failure + * anyway. Silently give up if allocation fails. The caller can + * treat failures from idr_alloc() as if idr_alloc() were called + * with @gfp_mask which should be enough. */ - if (rv < 0) - return _idr_rc_to_errno(rv); - *id = rv; - return 0; + while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) { + struct idr_layer *new; + + preempt_enable(); + new = kmem_cache_zalloc(idr_layer_cache, gfp_mask); + preempt_disable(); + if (!new) + break; + + /* link the new one to per-cpu preload list */ + new->ary[0] = __this_cpu_read(idr_preload_head); + __this_cpu_write(idr_preload_head, new); + __this_cpu_inc(idr_preload_cnt); + } } -EXPORT_SYMBOL(idr_get_new); +EXPORT_SYMBOL(idr_preload); + +/** + * idr_alloc - allocate new idr entry + * @idr: the (initialized) idr + * @ptr: pointer to be associated with the new id + * @start: the minimum id (inclusive) + * @end: the maximum id (exclusive, <= 0 for max) + * @gfp_mask: memory allocation flags + * + * Allocate an id in [start, end) and associate it with @ptr. If no ID is + * available in the specified range, returns -ENOSPC. On memory allocation + * failure, returns -ENOMEM. + * + * Note that @end is treated as max when <= 0. This is to always allow + * using @start + N as @end as long as N is inside integer range. + * + * The user is responsible for exclusively synchronizing all operations + * which may modify @idr. However, read-only accesses such as idr_find() + * or iteration can be performed under RCU read lock provided the user + * destroys @ptr in RCU-safe way after removal from idr. + */ +int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask) +{ + int max = end > 0 ? end - 1 : INT_MAX; /* inclusive upper limit */ + struct idr_layer *pa[MAX_IDR_LEVEL + 1]; + int id; + + might_sleep_if(gfp_mask & __GFP_WAIT); + + /* sanity checks */ + if (WARN_ON_ONCE(start < 0)) + return -EINVAL; + if (unlikely(max < start)) + return -ENOSPC; + + /* allocate id */ + id = idr_get_empty_slot(idr, start, pa, gfp_mask, NULL); + if (unlikely(id < 0)) + return id; + if (unlikely(id > max)) + return -ENOSPC; + + idr_fill_slot(idr, ptr, id, pa); + return id; +} +EXPORT_SYMBOL_GPL(idr_alloc); static void idr_remove_warning(int id) { @@ -357,7 +525,7 @@ static void idr_remove_warning(int id) static void sub_remove(struct idr *idp, int shift, int id) { struct idr_layer *p = idp->top; - struct idr_layer **pa[MAX_IDR_LEVEL]; + struct idr_layer **pa[MAX_IDR_LEVEL + 1]; struct idr_layer ***paa = &pa[0]; struct idr_layer *to_free; int n; @@ -367,26 +535,26 @@ static void sub_remove(struct idr *idp, int shift, int id) while ((shift > 0) && p) { n = (id >> shift) & IDR_MASK; - __clear_bit(n, &p->bitmap); + __clear_bit(n, p->bitmap); *++paa = &p->ary[n]; p = p->ary[n]; shift -= IDR_BITS; } n = id & IDR_MASK; - if (likely(p != NULL && test_bit(n, &p->bitmap))){ - __clear_bit(n, &p->bitmap); + if (likely(p != NULL && test_bit(n, p->bitmap))) { + __clear_bit(n, p->bitmap); rcu_assign_pointer(p->ary[n], NULL); to_free = NULL; while(*paa && ! --((**paa)->count)){ if (to_free) - free_layer(to_free); + free_layer(idp, to_free); to_free = **paa; **paa-- = NULL; } if (!*paa) idp->layers = 0; if (to_free) - free_layer(to_free); + free_layer(idp, to_free); } else idr_remove_warning(id); } @@ -401,8 +569,9 @@ void idr_remove(struct idr *idp, int id) struct idr_layer *p; struct idr_layer *to_free; - /* Mask off upper bits we don't use for the search. */ - id &= MAX_IDR_MASK; + /* see comment in idr_find_slowpath() */ + if (WARN_ON_ONCE(id < 0)) + return; sub_remove(idp, (idp->layers - 1) * IDR_BITS, id); if (idp->top && idp->top->count == 1 && (idp->layers > 1) && @@ -417,8 +586,9 @@ void idr_remove(struct idr *idp, int id) p = idp->top->ary[0]; rcu_assign_pointer(idp->top, p); --idp->layers; - to_free->bitmap = to_free->count = 0; - free_layer(to_free); + to_free->count = 0; + bitmap_clear(to_free->bitmap, 0, IDR_SIZE); + free_layer(idp, to_free); } while (idp->id_free_cnt >= MAX_IDR_FREE) { p = get_from_free_list(idp); @@ -433,34 +603,21 @@ void idr_remove(struct idr *idp, int id) } EXPORT_SYMBOL(idr_remove); -/** - * idr_remove_all - remove all ids from the given idr tree - * @idp: idr handle - * - * idr_destroy() only frees up unused, cached idp_layers, but this - * function will remove all id mappings and leave all idp_layers - * unused. - * - * A typical clean-up sequence for objects stored in an idr tree will - * use idr_for_each() to free all objects, if necessay, then - * idr_remove_all() to remove all ids, and idr_destroy() to free - * up the cached idr_layers. - */ -void idr_remove_all(struct idr *idp) +void __idr_remove_all(struct idr *idp) { int n, id, max; int bt_mask; struct idr_layer *p; - struct idr_layer *pa[MAX_IDR_LEVEL]; + struct idr_layer *pa[MAX_IDR_LEVEL + 1]; struct idr_layer **paa = &pa[0]; n = idp->layers * IDR_BITS; p = idp->top; rcu_assign_pointer(idp->top, NULL); - max = 1 << n; + max = idr_max(idp->layers); id = 0; - while (id < max) { + while (id >= 0 && id <= max) { while (n > IDR_BITS && p) { n -= IDR_BITS; *paa++ = p; @@ -472,21 +629,32 @@ void idr_remove_all(struct idr *idp) /* Get the highest bit that the above add changed from 0->1. */ while (n < fls(id ^ bt_mask)) { if (p) - free_layer(p); + free_layer(idp, p); n += IDR_BITS; p = *--paa; } } idp->layers = 0; } -EXPORT_SYMBOL(idr_remove_all); +EXPORT_SYMBOL(__idr_remove_all); /** * idr_destroy - release all cached layers within an idr tree * @idp: idr handle + * + * Free all id mappings and all idp_layers. After this function, @idp is + * completely unused and can be freed / recycled. The caller is + * responsible for ensuring that no one else accesses @idp during or after + * idr_destroy(). + * + * A typical clean-up sequence for objects stored in an idr tree will use + * idr_for_each() to free all objects, if necessay, then idr_destroy() to + * free up the id mappings and cached idr_layers. */ void idr_destroy(struct idr *idp) { + __idr_remove_all(idp); + while (idp->id_free_cnt) { struct idr_layer *p = get_from_free_list(idp); kmem_cache_free(idr_layer_cache, p); @@ -494,32 +662,28 @@ void idr_destroy(struct idr *idp) } EXPORT_SYMBOL(idr_destroy); -/** - * idr_find - return pointer for given id - * @idp: idr handle - * @id: lookup key - * - * Return the pointer given the id it has been registered with. A %NULL - * return indicates that @id is not valid or you passed %NULL in - * idr_get_new(). - * - * This function can be called under rcu_read_lock(), given that the leaf - * pointers lifetimes are correctly managed. - */ -void *idr_find(struct idr *idp, int id) +void *idr_find_slowpath(struct idr *idp, int id) { int n; struct idr_layer *p; + /* + * If @id is negative, idr_find() used to ignore the sign bit and + * performed lookup with the rest of bits, which is weird and can + * lead to very obscure bugs. We're now returning NULL for all + * negative IDs but just in case somebody was depending on the sign + * bit being ignored, let's trigger WARN_ON_ONCE() so that they can + * be detected and fixed. WARN_ON_ONCE() can later be removed. + */ + if (WARN_ON_ONCE(id < 0)) + return NULL; + p = rcu_dereference_raw(idp->top); if (!p) return NULL; n = (p->layer+1) * IDR_BITS; - /* Mask off upper bits we don't use for the search. */ - id &= MAX_IDR_MASK; - - if (id >= (1 << n)) + if (id > idr_max(p->layer + 1)) return NULL; BUG_ON(n == 0); @@ -530,7 +694,7 @@ void *idr_find(struct idr *idp, int id) } return((void *)p); } -EXPORT_SYMBOL(idr_find); +EXPORT_SYMBOL(idr_find_slowpath); /** * idr_for_each - iterate through all stored pointers @@ -555,15 +719,15 @@ int idr_for_each(struct idr *idp, { int n, id, max, error = 0; struct idr_layer *p; - struct idr_layer *pa[MAX_IDR_LEVEL]; + struct idr_layer *pa[MAX_IDR_LEVEL + 1]; struct idr_layer **paa = &pa[0]; n = idp->layers * IDR_BITS; p = rcu_dereference_raw(idp->top); - max = 1 << n; + max = idr_max(idp->layers); id = 0; - while (id < max) { + while (id >= 0 && id <= max) { while (n > 0 && p) { n -= IDR_BITS; *paa++ = p; @@ -601,7 +765,7 @@ EXPORT_SYMBOL(idr_for_each); */ void *idr_get_next(struct idr *idp, int *nextidp) { - struct idr_layer *p, *pa[MAX_IDR_LEVEL]; + struct idr_layer *p, *pa[MAX_IDR_LEVEL + 1]; struct idr_layer **paa = &pa[0]; int id = *nextidp; int n, max; @@ -611,9 +775,9 @@ void *idr_get_next(struct idr *idp, int *nextidp) if (!p) return NULL; n = (p->layer + 1) * IDR_BITS; - max = 1 << n; + max = idr_max(p->layer + 1); - while (id < max) { + while (id >= 0 && id <= max) { while (n > 0 && p) { n -= IDR_BITS; *paa++ = p; @@ -625,7 +789,14 @@ void *idr_get_next(struct idr *idp, int *nextidp) return p; } - id += 1 << n; + /* + * Proceed to the next layer at the current level. Unlike + * idr_for_each(), @id isn't guaranteed to be aligned to + * layer boundary at this point and adding 1 << n may + * incorrectly skip IDs. Make sure we jump to the + * beginning of the next layer using round_up(). + */ + id = round_up(id + 1, 1 << n); while (n < fls(id)) { n += IDR_BITS; p = *--paa; @@ -653,14 +824,16 @@ void *idr_replace(struct idr *idp, void *ptr, int id) int n; struct idr_layer *p, *old_p; + /* see comment in idr_find_slowpath() */ + if (WARN_ON_ONCE(id < 0)) + return ERR_PTR(-EINVAL); + p = idp->top; if (!p) return ERR_PTR(-EINVAL); n = (p->layer+1) * IDR_BITS; - id &= MAX_IDR_MASK; - if (id >= (1 << n)) return ERR_PTR(-EINVAL); @@ -671,7 +844,7 @@ void *idr_replace(struct idr *idp, void *ptr, int id) } n = id & IDR_MASK; - if (unlikely(p == NULL || !test_bit(n, &p->bitmap))) + if (unlikely(p == NULL || !test_bit(n, p->bitmap))) return ERR_PTR(-ENOENT); old_p = p->ary[n]; @@ -780,7 +953,7 @@ EXPORT_SYMBOL(ida_pre_get); */ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) { - struct idr_layer *pa[MAX_IDR_LEVEL]; + struct idr_layer *pa[MAX_IDR_LEVEL + 1]; struct ida_bitmap *bitmap; unsigned long flags; int idr_id = starting_id / IDA_BITMAP_BITS; @@ -789,9 +962,9 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) restart: /* get vacant slot */ - t = idr_get_empty_slot(&ida->idr, idr_id, pa); + t = idr_get_empty_slot(&ida->idr, idr_id, pa, 0, &ida->idr); if (t < 0) - return _idr_rc_to_errno(t); + return t == -ENOMEM ? -EAGAIN : t; if (t * IDA_BITMAP_BITS >= MAX_IDR_BIT) return -ENOSPC; @@ -852,25 +1025,6 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) EXPORT_SYMBOL(ida_get_new_above); /** - * ida_get_new - allocate new ID - * @ida: idr handle - * @p_id: pointer to the allocated handle - * - * Allocate new ID. It should be called with any required locks. - * - * If memory is required, it will return %-EAGAIN, you should unlock - * and go back to the idr_pre_get() call. If the idr is full, it will - * return %-ENOSPC. - * - * @p_id returns a value in the range %0 ... %0x7fffffff. - */ -int ida_get_new(struct ida *ida, int *p_id) -{ - return ida_get_new_above(ida, 0, p_id); -} -EXPORT_SYMBOL(ida_get_new); - -/** * ida_remove - remove the given ID * @ida: ida handle * @id: ID to free @@ -887,7 +1041,7 @@ void ida_remove(struct ida *ida, int id) /* clear full bits while looking up the leaf idr_layer */ while ((shift > 0) && p) { n = (idr_id >> shift) & IDR_MASK; - __clear_bit(n, &p->bitmap); + __clear_bit(n, p->bitmap); p = p->ary[n]; shift -= IDR_BITS; } @@ -896,7 +1050,7 @@ void ida_remove(struct ida *ida, int id) goto err; n = idr_id & IDR_MASK; - __clear_bit(n, &p->bitmap); + __clear_bit(n, p->bitmap); bitmap = (void *)p->ary[n]; if (!test_bit(offset, bitmap->bitmap)) @@ -905,7 +1059,7 @@ void ida_remove(struct ida *ida, int id) /* update bitmap and remove it if empty */ __clear_bit(offset, bitmap->bitmap); if (--bitmap->nr_busy == 0) { - __set_bit(n, &p->bitmap); /* to please idr_remove() */ + __set_bit(n, p->bitmap); /* to please idr_remove() */ idr_remove(&ida->idr, idr_id); free_bitmap(ida, bitmap); } diff --git a/kernel/kfifo.c b/lib/kfifo.c index 59dcf5b81d24..7b7f83027b7b 100644 --- a/kernel/kfifo.c +++ b/lib/kfifo.c @@ -42,8 +42,7 @@ int __kfifo_alloc(struct __kfifo *fifo, unsigned int size, * round down to the next power of 2, since our 'let the indices * wrap' technique works only in this case. */ - if (!is_power_of_2(size)) - size = rounddown_pow_of_two(size); + size = roundup_pow_of_two(size); fifo->in = 0; fifo->out = 0; @@ -83,8 +82,7 @@ int __kfifo_init(struct __kfifo *fifo, void *buffer, { size /= esize; - if (!is_power_of_2(size)) - size = rounddown_pow_of_two(size); + size = roundup_pow_of_two(size); fifo->in = 0; fifo->out = 0; diff --git a/lib/lru_cache.c b/lib/lru_cache.c index d71d89498943..8335d39d2ccd 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -262,12 +262,11 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr, bool include_changing) { - struct hlist_node *n; struct lc_element *e; BUG_ON(!lc); BUG_ON(!lc->nr_elements); - hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { + hlist_for_each_entry(e, lc_hash_slot(lc, enr), colision) { /* "about to be changed" elements, pending transaction commit, * are hashed by their "new number". "Normal" elements have * lc_number == lc_new_number. */ diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 7874b01e816e..b83c144d731f 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -394,6 +394,44 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, } EXPORT_SYMBOL(sg_alloc_table_from_pages); +void __sg_page_iter_start(struct sg_page_iter *piter, + struct scatterlist *sglist, unsigned int nents, + unsigned long pgoffset) +{ + piter->__pg_advance = 0; + piter->__nents = nents; + + piter->page = NULL; + piter->sg = sglist; + piter->sg_pgoffset = pgoffset; +} +EXPORT_SYMBOL(__sg_page_iter_start); + +static int sg_page_count(struct scatterlist *sg) +{ + return PAGE_ALIGN(sg->offset + sg->length) >> PAGE_SHIFT; +} + +bool __sg_page_iter_next(struct sg_page_iter *piter) +{ + if (!piter->__nents || !piter->sg) + return false; + + piter->sg_pgoffset += piter->__pg_advance; + piter->__pg_advance = 1; + + while (piter->sg_pgoffset >= sg_page_count(piter->sg)) { + piter->sg_pgoffset -= sg_page_count(piter->sg); + piter->sg = sg_next(piter->sg); + if (!--piter->__nents || !piter->sg) + return false; + } + piter->page = nth_page(sg_page(piter->sg), piter->sg_pgoffset); + + return true; +} +EXPORT_SYMBOL(__sg_page_iter_next); + /** * sg_miter_start - start mapping iteration over a sg list * @miter: sg mapping iter to be started @@ -411,9 +449,7 @@ void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl, { memset(miter, 0, sizeof(struct sg_mapping_iter)); - miter->__sg = sgl; - miter->__nents = nents; - miter->__offset = 0; + __sg_page_iter_start(&miter->piter, sgl, nents, 0); WARN_ON(!(flags & (SG_MITER_TO_SG | SG_MITER_FROM_SG))); miter->__flags = flags; } @@ -438,36 +474,35 @@ EXPORT_SYMBOL(sg_miter_start); */ bool sg_miter_next(struct sg_mapping_iter *miter) { - unsigned int off, len; - - /* check for end and drop resources from the last iteration */ - if (!miter->__nents) - return false; - sg_miter_stop(miter); - /* get to the next sg if necessary. __offset is adjusted by stop */ - while (miter->__offset == miter->__sg->length) { - if (--miter->__nents) { - miter->__sg = sg_next(miter->__sg); - miter->__offset = 0; - } else + /* + * Get to the next page if necessary. + * __remaining, __offset is adjusted by sg_miter_stop + */ + if (!miter->__remaining) { + struct scatterlist *sg; + unsigned long pgoffset; + + if (!__sg_page_iter_next(&miter->piter)) return false; - } - /* map the next page */ - off = miter->__sg->offset + miter->__offset; - len = miter->__sg->length - miter->__offset; + sg = miter->piter.sg; + pgoffset = miter->piter.sg_pgoffset; - miter->page = nth_page(sg_page(miter->__sg), off >> PAGE_SHIFT); - off &= ~PAGE_MASK; - miter->length = min_t(unsigned int, len, PAGE_SIZE - off); - miter->consumed = miter->length; + miter->__offset = pgoffset ? 0 : sg->offset; + miter->__remaining = sg->offset + sg->length - + (pgoffset << PAGE_SHIFT) - miter->__offset; + miter->__remaining = min_t(unsigned long, miter->__remaining, + PAGE_SIZE - miter->__offset); + } + miter->page = miter->piter.page; + miter->consumed = miter->length = miter->__remaining; if (miter->__flags & SG_MITER_ATOMIC) - miter->addr = kmap_atomic(miter->page) + off; + miter->addr = kmap_atomic(miter->page) + miter->__offset; else - miter->addr = kmap(miter->page) + off; + miter->addr = kmap(miter->page) + miter->__offset; return true; } @@ -494,6 +529,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter) /* drop resources from the last iteration */ if (miter->addr) { miter->__offset += miter->consumed; + miter->__remaining -= miter->consumed; if (miter->__flags & SG_MITER_TO_SG) flush_kernel_dcache_page(miter->page); diff --git a/mm/Kconfig b/mm/Kconfig index 2c7aea7106f9..ae55c1e04d10 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -287,7 +287,7 @@ config NR_QUICK config VIRT_TO_BUS def_bool y - depends on !ARCH_NO_VIRT_TO_BUS + depends on HAVE_VIRT_TO_BUS config MMU_NOTIFIER bool diff --git a/mm/huge_memory.c b/mm/huge_memory.c index bfa142e67b1c..e2f7f5aaaafb 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1906,9 +1906,8 @@ static inline void free_mm_slot(struct mm_slot *mm_slot) static struct mm_slot *get_mm_slot(struct mm_struct *mm) { struct mm_slot *mm_slot; - struct hlist_node *node; - hash_for_each_possible(mm_slots_hash, mm_slot, node, hash, (unsigned long)mm) + hash_for_each_possible(mm_slots_hash, mm_slot, hash, (unsigned long)mm) if (mm == mm_slot->mm) return mm_slot; diff --git a/mm/internal.h b/mm/internal.h index 1c0c4cc0fcf7..8562de0a5197 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -195,7 +195,7 @@ static inline int mlocked_vma_newpage(struct vm_area_struct *vma, * must be called with vma's mmap_sem held for read or write, and page locked. */ extern void mlock_vma_page(struct page *page); -extern void munlock_vma_page(struct page *page); +extern unsigned int munlock_vma_page(struct page *page); /* * Clear the page's PageMlocked(). This can be useful in a situation where diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 83dd5fbf5e60..c8d7f3110fd0 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -436,7 +436,7 @@ static int get_object(struct kmemleak_object *object) */ static void free_object_rcu(struct rcu_head *rcu) { - struct hlist_node *elem, *tmp; + struct hlist_node *tmp; struct kmemleak_scan_area *area; struct kmemleak_object *object = container_of(rcu, struct kmemleak_object, rcu); @@ -445,8 +445,8 @@ static void free_object_rcu(struct rcu_head *rcu) * Once use_count is 0 (guaranteed by put_object), there is no other * code accessing this object, hence no need for locking. */ - hlist_for_each_entry_safe(area, elem, tmp, &object->area_list, node) { - hlist_del(elem); + hlist_for_each_entry_safe(area, tmp, &object->area_list, node) { + hlist_del(&area->node); kmem_cache_free(scan_area_cache, area); } kmem_cache_free(object_cache, object); @@ -1177,7 +1177,6 @@ static void scan_block(void *_start, void *_end, static void scan_object(struct kmemleak_object *object) { struct kmemleak_scan_area *area; - struct hlist_node *elem; unsigned long flags; /* @@ -1205,7 +1204,7 @@ static void scan_object(struct kmemleak_object *object) spin_lock_irqsave(&object->lock, flags); } } else - hlist_for_each_entry(area, elem, &object->area_list, node) + hlist_for_each_entry(area, &object->area_list, node) scan_block((void *)area->start, (void *)(area->start + area->size), object, 0); @@ -320,10 +320,9 @@ static inline void free_mm_slot(struct mm_slot *mm_slot) static struct mm_slot *get_mm_slot(struct mm_struct *mm) { - struct hlist_node *node; struct mm_slot *slot; - hash_for_each_possible(mm_slots_hash, slot, node, link, (unsigned long)mm) + hash_for_each_possible(mm_slots_hash, slot, link, (unsigned long)mm) if (slot->mm == mm) return slot; @@ -496,9 +495,8 @@ static inline int get_kpfn_nid(unsigned long kpfn) static void remove_node_from_stable_tree(struct stable_node *stable_node) { struct rmap_item *rmap_item; - struct hlist_node *hlist; - hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { + hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) { if (rmap_item->hlist.next) ksm_pages_sharing--; else @@ -1898,7 +1896,6 @@ int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg, { struct stable_node *stable_node; struct rmap_item *rmap_item; - struct hlist_node *hlist; unsigned int mapcount = page_mapcount(page); int referenced = 0; int search_new_forks = 0; @@ -1910,7 +1907,7 @@ int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg, if (!stable_node) return 0; again: - hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { + hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) { struct anon_vma *anon_vma = rmap_item->anon_vma; struct anon_vma_chain *vmac; struct vm_area_struct *vma; @@ -1952,7 +1949,6 @@ out: int try_to_unmap_ksm(struct page *page, enum ttu_flags flags) { struct stable_node *stable_node; - struct hlist_node *hlist; struct rmap_item *rmap_item; int ret = SWAP_AGAIN; int search_new_forks = 0; @@ -1964,7 +1960,7 @@ int try_to_unmap_ksm(struct page *page, enum ttu_flags flags) if (!stable_node) return SWAP_FAIL; again: - hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { + hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) { struct anon_vma *anon_vma = rmap_item->anon_vma; struct anon_vma_chain *vmac; struct vm_area_struct *vma; @@ -2005,7 +2001,6 @@ int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *, struct vm_area_struct *, unsigned long, void *), void *arg) { struct stable_node *stable_node; - struct hlist_node *hlist; struct rmap_item *rmap_item; int ret = SWAP_AGAIN; int search_new_forks = 0; @@ -2017,7 +2012,7 @@ int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *, if (!stable_node) return ret; again: - hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { + hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) { struct anon_vma *anon_vma = rmap_item->anon_vma; struct anon_vma_chain *vmac; struct vm_area_struct *vma; diff --git a/mm/mlock.c b/mm/mlock.c index e6638f565d42..1c5e33fce639 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -102,13 +102,16 @@ void mlock_vma_page(struct page *page) * can't isolate the page, we leave it for putback_lru_page() and vmscan * [page_referenced()/try_to_unmap()] to deal with. */ -void munlock_vma_page(struct page *page) +unsigned int munlock_vma_page(struct page *page) { + unsigned int page_mask = 0; + BUG_ON(!PageLocked(page)); if (TestClearPageMlocked(page)) { - mod_zone_page_state(page_zone(page), NR_MLOCK, - -hpage_nr_pages(page)); + unsigned int nr_pages = hpage_nr_pages(page); + mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); + page_mask = nr_pages - 1; if (!isolate_lru_page(page)) { int ret = SWAP_AGAIN; @@ -141,6 +144,8 @@ void munlock_vma_page(struct page *page) count_vm_event(UNEVICTABLE_PGMUNLOCKED); } } + + return page_mask; } /** @@ -159,7 +164,6 @@ long __mlock_vma_pages_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, int *nonblocking) { struct mm_struct *mm = vma->vm_mm; - unsigned long addr = start; unsigned long nr_pages = (end - start) / PAGE_SIZE; int gup_flags; @@ -189,7 +193,7 @@ long __mlock_vma_pages_range(struct vm_area_struct *vma, * We made sure addr is within a VMA, so the following will * not result in a stack expansion that recurses back here. */ - return __get_user_pages(current, mm, addr, nr_pages, gup_flags, + return __get_user_pages(current, mm, start, nr_pages, gup_flags, NULL, NULL, nonblocking); } @@ -226,13 +230,12 @@ static int __mlock_posix_error_return(long retval) void munlock_vma_pages_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - unsigned long addr; - - lru_add_drain(); vma->vm_flags &= ~VM_LOCKED; - for (addr = start; addr < end; addr += PAGE_SIZE) { + while (start < end) { struct page *page; + unsigned int page_mask, page_increm; + /* * Although FOLL_DUMP is intended for get_dump_page(), * it just so happens that its special treatment of the @@ -240,13 +243,22 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, * suits munlock very well (and if somehow an abnormal page * has sneaked into the range, we won't oops here: great). */ - page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP); + page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP, + &page_mask); if (page && !IS_ERR(page)) { lock_page(page); - munlock_vma_page(page); + lru_add_drain(); + /* + * Any THP page found by follow_page_mask() may have + * gotten split before reaching munlock_vma_page(), + * so we need to recompute the page_mask here. + */ + page_mask = munlock_vma_page(page); unlock_page(page); put_page(page); } + page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask); + start += page_increm * PAGE_SIZE; cond_resched(); } } diff --git a/mm/mmap.c b/mm/mmap.c index 37a1fcac029d..2664a47cec93 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2185,9 +2185,28 @@ int expand_downwards(struct vm_area_struct *vma, return error; } +/* + * Note how expand_stack() refuses to expand the stack all the way to + * abut the next virtual mapping, *unless* that mapping itself is also + * a stack mapping. We want to leave room for a guard page, after all + * (the guard page itself is not added here, that is done by the + * actual page faulting logic) + * + * This matches the behavior of the guard page logic (see mm/memory.c: + * check_stack_guard_page()), which only allows the guard page to be + * removed under these circumstances. + */ #ifdef CONFIG_STACK_GROWSUP int expand_stack(struct vm_area_struct *vma, unsigned long address) { + struct vm_area_struct *next; + + address &= PAGE_MASK; + next = vma->vm_next; + if (next && next->vm_start == address + PAGE_SIZE) { + if (!(next->vm_flags & VM_GROWSUP)) + return -ENOMEM; + } return expand_upwards(vma, address); } @@ -2209,6 +2228,14 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) #else int expand_stack(struct vm_area_struct *vma, unsigned long address) { + struct vm_area_struct *prev; + + address &= PAGE_MASK; + prev = vma->vm_prev; + if (prev && prev->vm_end == address) { + if (!(prev->vm_flags & VM_GROWSDOWN)) + return -ENOMEM; + } return expand_downwards(vma, address); } diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 2175fb0d501c..be04122fb277 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -95,11 +95,10 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm, unsigned long address) { struct mmu_notifier *mn; - struct hlist_node *n; int young = 0, id; id = srcu_read_lock(&srcu); - hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { + hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { if (mn->ops->clear_flush_young) young |= mn->ops->clear_flush_young(mn, mm, address); } @@ -112,11 +111,10 @@ int __mmu_notifier_test_young(struct mm_struct *mm, unsigned long address) { struct mmu_notifier *mn; - struct hlist_node *n; int young = 0, id; id = srcu_read_lock(&srcu); - hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { + hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { if (mn->ops->test_young) { young = mn->ops->test_young(mn, mm, address); if (young) @@ -132,11 +130,10 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, pte_t pte) { struct mmu_notifier *mn; - struct hlist_node *n; int id; id = srcu_read_lock(&srcu); - hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { + hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { if (mn->ops->change_pte) mn->ops->change_pte(mn, mm, address, pte); } @@ -147,11 +144,10 @@ void __mmu_notifier_invalidate_page(struct mm_struct *mm, unsigned long address) { struct mmu_notifier *mn; - struct hlist_node *n; int id; id = srcu_read_lock(&srcu); - hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { + hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { if (mn->ops->invalidate_page) mn->ops->invalidate_page(mn, mm, address); } @@ -162,11 +158,10 @@ void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, unsigned long start, unsigned long end) { struct mmu_notifier *mn; - struct hlist_node *n; int id; id = srcu_read_lock(&srcu); - hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { + hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { if (mn->ops->invalidate_range_start) mn->ops->invalidate_range_start(mn, mm, start, end); } @@ -178,11 +173,10 @@ void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, unsigned long start, unsigned long end) { struct mmu_notifier *mn; - struct hlist_node *n; int id; id = srcu_read_lock(&srcu); - hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { + hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { if (mn->ops->invalidate_range_end) mn->ops->invalidate_range_end(mn, mm, start, end); } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index cdc377c456c0..efe68148f621 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -696,7 +696,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi, * => fast response on large errors; small oscillation near setpoint */ setpoint = (freerun + limit) / 2; - x = div_s64((setpoint - dirty) << RATELIMIT_CALC_SHIFT, + x = div_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT, limit - setpoint + 1); pos_ratio = x; pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; @@ -1986,6 +1986,8 @@ int __set_page_dirty_no_writeback(struct page *page) */ void account_page_dirtied(struct page *page, struct address_space *mapping) { + trace_writeback_dirty_page(page, mapping); + if (mapping_cap_account_dirty(mapping)) { __inc_zone_page_state(page, NR_FILE_DIRTY); __inc_zone_page_state(page, NR_DIRTIED); diff --git a/net/9p/error.c b/net/9p/error.c index 2ab2de76010f..126fd0dceea2 100644 --- a/net/9p/error.c +++ b/net/9p/error.c @@ -221,15 +221,13 @@ EXPORT_SYMBOL(p9_error_init); int p9_errstr2errno(char *errstr, int len) { int errno; - struct hlist_node *p; struct errormap *c; int bucket; errno = 0; - p = NULL; c = NULL; bucket = jhash(errstr, len, 0) % ERRHASHSZ; - hlist_for_each_entry(c, p, &hash_errmap[bucket], list) { + hlist_for_each_entry(c, &hash_errmap[bucket], list) { if (c->namelen == len && !memcmp(c->name, errstr, len)) { errno = c->val; break; diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index de2e950a0a7a..74dea377fe5b 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -655,7 +655,7 @@ static struct p9_trans_module p9_virtio_trans = { .create = p9_virtio_create, .close = p9_virtio_close, .request = p9_virtio_request, - .zc_request = p9_virtio_zc_request, + //.zc_request = p9_virtio_zc_request, .cancel = p9_virtio_cancel, /* * We leave one entry for input and one entry for response diff --git a/net/9p/util.c b/net/9p/util.c index 6ceeeb384de7..59f278e64f58 100644 --- a/net/9p/util.c +++ b/net/9p/util.c @@ -87,23 +87,18 @@ EXPORT_SYMBOL(p9_idpool_destroy); int p9_idpool_get(struct p9_idpool *p) { - int i = 0; - int error; + int i; unsigned long flags; -retry: - if (idr_pre_get(&p->pool, GFP_NOFS) == 0) - return -1; - + idr_preload(GFP_NOFS); spin_lock_irqsave(&p->lock, flags); /* no need to store exactly p, we just need something non-null */ - error = idr_get_new(&p->pool, p, &i); - spin_unlock_irqrestore(&p->lock, flags); + i = idr_alloc(&p->pool, p, 0, 0, GFP_NOWAIT); - if (error == -EAGAIN) - goto retry; - else if (error) + spin_unlock_irqrestore(&p->lock, flags); + idr_preload_end(); + if (i < 0) return -1; p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", i, p); diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 33475291c9c1..4a141e3cf076 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -93,10 +93,9 @@ static struct sock *atalk_search_socket(struct sockaddr_at *to, struct atalk_iface *atif) { struct sock *s; - struct hlist_node *node; read_lock_bh(&atalk_sockets_lock); - sk_for_each(s, node, &atalk_sockets) { + sk_for_each(s, &atalk_sockets) { struct atalk_sock *at = at_sk(s); if (to->sat_port != at->src_port) @@ -141,11 +140,10 @@ static struct sock *atalk_find_or_insert_socket(struct sock *sk, struct sockaddr_at *sat) { struct sock *s; - struct hlist_node *node; struct atalk_sock *at; write_lock_bh(&atalk_sockets_lock); - sk_for_each(s, node, &atalk_sockets) { + sk_for_each(s, &atalk_sockets) { at = at_sk(s); if (at->src_net == sat->sat_addr.s_net && @@ -1084,9 +1082,8 @@ static int atalk_pick_and_bind_port(struct sock *sk, struct sockaddr_at *sat) sat->sat_port < ATPORT_LAST; sat->sat_port++) { struct sock *s; - struct hlist_node *node; - sk_for_each(s, node, &atalk_sockets) { + sk_for_each(s, &atalk_sockets) { struct atalk_sock *at = at_sk(s); if (at->src_net == sat->sat_addr.s_net && diff --git a/net/atm/common.c b/net/atm/common.c index 806fc0a40051..7b491006eaf4 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -270,11 +270,11 @@ void atm_dev_release_vccs(struct atm_dev *dev) write_lock_irq(&vcc_sklist_lock); for (i = 0; i < VCC_HTABLE_SIZE; i++) { struct hlist_head *head = &vcc_hash[i]; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; struct sock *s; struct atm_vcc *vcc; - sk_for_each_safe(s, node, tmp, head) { + sk_for_each_safe(s, tmp, head) { vcc = atm_sk(s); if (vcc->dev == dev) { vcc_release_async(vcc, -EPIPE); @@ -317,11 +317,10 @@ static int adjust_tp(struct atm_trafprm *tp, unsigned char aal) static int check_ci(const struct atm_vcc *vcc, short vpi, int vci) { struct hlist_head *head = &vcc_hash[vci & (VCC_HTABLE_SIZE - 1)]; - struct hlist_node *node; struct sock *s; struct atm_vcc *walk; - sk_for_each(s, node, head) { + sk_for_each(s, head) { walk = atm_sk(s); if (walk->dev != vcc->dev) continue; diff --git a/net/atm/lec.c b/net/atm/lec.c index 2e3d942e77f1..f23916be18fb 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -842,7 +842,9 @@ static void *lec_tbl_walk(struct lec_state *state, struct hlist_head *tbl, --*l; } - hlist_for_each_entry_from(tmp, e, next) { + tmp = container_of(e, struct lec_arp_table, next); + + hlist_for_each_entry_from(tmp, next) { if (--*l < 0) break; } @@ -1307,7 +1309,6 @@ lec_arp_add(struct lec_priv *priv, struct lec_arp_table *entry) static int lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove) { - struct hlist_node *node; struct lec_arp_table *entry; int i, remove_vcc = 1; @@ -1326,7 +1327,7 @@ lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove) * ESI_FLUSH_PENDING, ESI_FORWARD_DIRECT */ for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry(entry, node, + hlist_for_each_entry(entry, &priv->lec_arp_tables[i], next) { if (memcmp(to_remove->atm_addr, entry->atm_addr, ATM_ESA_LEN) == 0) { @@ -1364,14 +1365,13 @@ static const char *get_status_string(unsigned char st) static void dump_arp_table(struct lec_priv *priv) { - struct hlist_node *node; struct lec_arp_table *rulla; char buf[256]; int i, j, offset; pr_info("Dump %p:\n", priv); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry(rulla, node, + hlist_for_each_entry(rulla, &priv->lec_arp_tables[i], next) { offset = 0; offset += sprintf(buf, "%d: %p\n", i, rulla); @@ -1403,7 +1403,7 @@ static void dump_arp_table(struct lec_priv *priv) if (!hlist_empty(&priv->lec_no_forward)) pr_info("No forward\n"); - hlist_for_each_entry(rulla, node, &priv->lec_no_forward, next) { + hlist_for_each_entry(rulla, &priv->lec_no_forward, next) { offset = 0; offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr); offset += sprintf(buf + offset, " Atm:"); @@ -1428,7 +1428,7 @@ static void dump_arp_table(struct lec_priv *priv) if (!hlist_empty(&priv->lec_arp_empty_ones)) pr_info("Empty ones\n"); - hlist_for_each_entry(rulla, node, &priv->lec_arp_empty_ones, next) { + hlist_for_each_entry(rulla, &priv->lec_arp_empty_ones, next) { offset = 0; offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr); offset += sprintf(buf + offset, " Atm:"); @@ -1453,7 +1453,7 @@ static void dump_arp_table(struct lec_priv *priv) if (!hlist_empty(&priv->mcast_fwds)) pr_info("Multicast Forward VCCs\n"); - hlist_for_each_entry(rulla, node, &priv->mcast_fwds, next) { + hlist_for_each_entry(rulla, &priv->mcast_fwds, next) { offset = 0; offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr); offset += sprintf(buf + offset, " Atm:"); @@ -1487,7 +1487,7 @@ static void dump_arp_table(struct lec_priv *priv) static void lec_arp_destroy(struct lec_priv *priv) { unsigned long flags; - struct hlist_node *node, *next; + struct hlist_node *next; struct lec_arp_table *entry; int i; @@ -1499,7 +1499,7 @@ static void lec_arp_destroy(struct lec_priv *priv) spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_tables[i], next) { lec_arp_remove(priv, entry); lec_arp_put(entry); @@ -1507,7 +1507,7 @@ static void lec_arp_destroy(struct lec_priv *priv) INIT_HLIST_HEAD(&priv->lec_arp_tables[i]); } - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_empty_ones, next) { del_timer_sync(&entry->timer); lec_arp_clear_vccs(entry); @@ -1516,7 +1516,7 @@ static void lec_arp_destroy(struct lec_priv *priv) } INIT_HLIST_HEAD(&priv->lec_arp_empty_ones); - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_no_forward, next) { del_timer_sync(&entry->timer); lec_arp_clear_vccs(entry); @@ -1525,7 +1525,7 @@ static void lec_arp_destroy(struct lec_priv *priv) } INIT_HLIST_HEAD(&priv->lec_no_forward); - hlist_for_each_entry_safe(entry, node, next, &priv->mcast_fwds, next) { + hlist_for_each_entry_safe(entry, next, &priv->mcast_fwds, next) { /* No timer, LANEv2 7.1.20 and 2.3.5.3 */ lec_arp_clear_vccs(entry); hlist_del(&entry->next); @@ -1542,14 +1542,13 @@ static void lec_arp_destroy(struct lec_priv *priv) static struct lec_arp_table *lec_arp_find(struct lec_priv *priv, const unsigned char *mac_addr) { - struct hlist_node *node; struct hlist_head *head; struct lec_arp_table *entry; pr_debug("%pM\n", mac_addr); head = &priv->lec_arp_tables[HASH(mac_addr[ETH_ALEN - 1])]; - hlist_for_each_entry(entry, node, head, next) { + hlist_for_each_entry(entry, head, next) { if (ether_addr_equal(mac_addr, entry->mac_addr)) return entry; } @@ -1686,7 +1685,7 @@ static void lec_arp_check_expire(struct work_struct *work) unsigned long flags; struct lec_priv *priv = container_of(work, struct lec_priv, lec_arp_work.work); - struct hlist_node *node, *next; + struct hlist_node *next; struct lec_arp_table *entry; unsigned long now; int i; @@ -1696,7 +1695,7 @@ static void lec_arp_check_expire(struct work_struct *work) restart: spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_tables[i], next) { if (__lec_arp_check_expire(entry, now, priv)) { struct sk_buff *skb; @@ -1823,14 +1822,14 @@ lec_addr_delete(struct lec_priv *priv, const unsigned char *atm_addr, unsigned long permanent) { unsigned long flags; - struct hlist_node *node, *next; + struct hlist_node *next; struct lec_arp_table *entry; int i; pr_debug("\n"); spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_tables[i], next) { if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN) && (permanent || @@ -1855,7 +1854,7 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr, unsigned int targetless_le_arp) { unsigned long flags; - struct hlist_node *node, *next; + struct hlist_node *next; struct lec_arp_table *entry, *tmp; int i; @@ -1870,7 +1869,7 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr, * we have no entry in the cache. 7.1.30 */ if (!hlist_empty(&priv->lec_arp_empty_ones)) { - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_empty_ones, next) { if (memcmp(entry->atm_addr, atm_addr, ATM_ESA_LEN) == 0) { hlist_del(&entry->next); @@ -1915,7 +1914,7 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr, memcpy(entry->atm_addr, atm_addr, ATM_ESA_LEN); del_timer(&entry->timer); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry(tmp, node, + hlist_for_each_entry(tmp, &priv->lec_arp_tables[i], next) { if (entry != tmp && !memcmp(tmp->atm_addr, atm_addr, ATM_ESA_LEN)) { @@ -1956,7 +1955,6 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data, void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb)) { unsigned long flags; - struct hlist_node *node; struct lec_arp_table *entry; int i, found_entry = 0; @@ -2026,7 +2024,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data, ioc_data->atm_addr[16], ioc_data->atm_addr[17], ioc_data->atm_addr[18], ioc_data->atm_addr[19]); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry(entry, node, + hlist_for_each_entry(entry, &priv->lec_arp_tables[i], next) { if (memcmp (ioc_data->atm_addr, entry->atm_addr, @@ -2103,7 +2101,6 @@ out: static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id) { unsigned long flags; - struct hlist_node *node; struct lec_arp_table *entry; int i; @@ -2111,7 +2108,7 @@ static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id) restart: spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry(entry, node, + hlist_for_each_entry(entry, &priv->lec_arp_tables[i], next) { if (entry->flush_tran_id == tran_id && entry->status == ESI_FLUSH_PENDING) { @@ -2140,13 +2137,12 @@ lec_set_flush_tran_id(struct lec_priv *priv, const unsigned char *atm_addr, unsigned long tran_id) { unsigned long flags; - struct hlist_node *node; struct lec_arp_table *entry; int i; spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) - hlist_for_each_entry(entry, node, + hlist_for_each_entry(entry, &priv->lec_arp_tables[i], next) { if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)) { entry->flush_tran_id = tran_id; @@ -2198,7 +2194,7 @@ out: static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) { unsigned long flags; - struct hlist_node *node, *next; + struct hlist_node *next; struct lec_arp_table *entry; int i; @@ -2208,7 +2204,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_tables[i], next) { if (vcc == entry->vcc) { lec_arp_remove(priv, entry); @@ -2219,7 +2215,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) } } - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_empty_ones, next) { if (entry->vcc == vcc) { lec_arp_clear_vccs(entry); @@ -2229,7 +2225,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) } } - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_no_forward, next) { if (entry->recv_vcc == vcc) { lec_arp_clear_vccs(entry); @@ -2239,7 +2235,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) } } - hlist_for_each_entry_safe(entry, node, next, &priv->mcast_fwds, next) { + hlist_for_each_entry_safe(entry, next, &priv->mcast_fwds, next) { if (entry->recv_vcc == vcc) { lec_arp_clear_vccs(entry); /* No timer, LANEv2 7.1.20 and 2.3.5.3 */ @@ -2257,13 +2253,13 @@ lec_arp_check_empties(struct lec_priv *priv, struct atm_vcc *vcc, struct sk_buff *skb) { unsigned long flags; - struct hlist_node *node, *next; + struct hlist_node *next; struct lec_arp_table *entry, *tmp; struct lecdatahdr_8023 *hdr = (struct lecdatahdr_8023 *)skb->data; unsigned char *src = hdr->h_source; spin_lock_irqsave(&priv->lec_arp_lock, flags); - hlist_for_each_entry_safe(entry, node, next, + hlist_for_each_entry_safe(entry, next, &priv->lec_arp_empty_ones, next) { if (vcc == entry->vcc) { del_timer(&entry->timer); diff --git a/net/atm/signaling.c b/net/atm/signaling.c index 86767ca908a3..4176887e72eb 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -217,7 +217,6 @@ static void purge_vcc(struct atm_vcc *vcc) static void sigd_close(struct atm_vcc *vcc) { - struct hlist_node *node; struct sock *s; int i; @@ -231,7 +230,7 @@ static void sigd_close(struct atm_vcc *vcc) for (i = 0; i < VCC_HTABLE_SIZE; ++i) { struct hlist_head *head = &vcc_hash[i]; - sk_for_each(s, node, head) { + sk_for_each(s, head) { vcc = atm_sk(s); purge_vcc(vcc); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 69a06c47b648..7b11f8bc5071 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -81,14 +81,13 @@ static void ax25_kill_by_device(struct net_device *dev) { ax25_dev *ax25_dev; ax25_cb *s; - struct hlist_node *node; if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) return; spin_lock_bh(&ax25_list_lock); again: - ax25_for_each(s, node, &ax25_list) { + ax25_for_each(s, &ax25_list) { if (s->ax25_dev == ax25_dev) { s->ax25_dev = NULL; spin_unlock_bh(&ax25_list_lock); @@ -158,10 +157,9 @@ struct sock *ax25_find_listener(ax25_address *addr, int digi, struct net_device *dev, int type) { ax25_cb *s; - struct hlist_node *node; spin_lock(&ax25_list_lock); - ax25_for_each(s, node, &ax25_list) { + ax25_for_each(s, &ax25_list) { if ((s->iamdigi && !digi) || (!s->iamdigi && digi)) continue; if (s->sk && !ax25cmp(&s->source_addr, addr) && @@ -187,10 +185,9 @@ struct sock *ax25_get_socket(ax25_address *my_addr, ax25_address *dest_addr, { struct sock *sk = NULL; ax25_cb *s; - struct hlist_node *node; spin_lock(&ax25_list_lock); - ax25_for_each(s, node, &ax25_list) { + ax25_for_each(s, &ax25_list) { if (s->sk && !ax25cmp(&s->source_addr, my_addr) && !ax25cmp(&s->dest_addr, dest_addr) && s->sk->sk_type == type) { @@ -213,10 +210,9 @@ ax25_cb *ax25_find_cb(ax25_address *src_addr, ax25_address *dest_addr, ax25_digi *digi, struct net_device *dev) { ax25_cb *s; - struct hlist_node *node; spin_lock_bh(&ax25_list_lock); - ax25_for_each(s, node, &ax25_list) { + ax25_for_each(s, &ax25_list) { if (s->sk && s->sk->sk_type != SOCK_SEQPACKET) continue; if (s->ax25_dev == NULL) @@ -248,10 +244,9 @@ void ax25_send_to_raw(ax25_address *addr, struct sk_buff *skb, int proto) { ax25_cb *s; struct sk_buff *copy; - struct hlist_node *node; spin_lock(&ax25_list_lock); - ax25_for_each(s, node, &ax25_list) { + ax25_for_each(s, &ax25_list) { if (s->sk != NULL && ax25cmp(&s->source_addr, addr) == 0 && s->sk->sk_type == SOCK_RAW && s->sk->sk_protocol == proto && diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c index 5ea7fd3e2af9..e05bd57b5afd 100644 --- a/net/ax25/ax25_ds_subr.c +++ b/net/ax25/ax25_ds_subr.c @@ -39,7 +39,6 @@ void ax25_ds_nr_error_recovery(ax25_cb *ax25) void ax25_ds_enquiry_response(ax25_cb *ax25) { ax25_cb *ax25o; - struct hlist_node *node; /* Please note that neither DK4EG's nor DG2FEF's * DAMA spec mention the following behaviour as seen @@ -80,7 +79,7 @@ void ax25_ds_enquiry_response(ax25_cb *ax25) ax25_ds_set_timer(ax25->ax25_dev); spin_lock(&ax25_list_lock); - ax25_for_each(ax25o, node, &ax25_list) { + ax25_for_each(ax25o, &ax25_list) { if (ax25o == ax25) continue; @@ -159,10 +158,9 @@ static int ax25_check_dama_slave(ax25_dev *ax25_dev) { ax25_cb *ax25; int res = 0; - struct hlist_node *node; spin_lock(&ax25_list_lock); - ax25_for_each(ax25, node, &ax25_list) + ax25_for_each(ax25, &ax25_list) if (ax25->ax25_dev == ax25_dev && (ax25->condition & AX25_COND_DAMA_MODE) && ax25->state > AX25_STATE_1) { res = 1; break; diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c index 993c439b4f71..951cd57bb07d 100644 --- a/net/ax25/ax25_ds_timer.c +++ b/net/ax25/ax25_ds_timer.c @@ -70,7 +70,6 @@ static void ax25_ds_timeout(unsigned long arg) { ax25_dev *ax25_dev = (struct ax25_dev *) arg; ax25_cb *ax25; - struct hlist_node *node; if (ax25_dev == NULL || !ax25_dev->dama.slave) return; /* Yikes! */ @@ -81,7 +80,7 @@ static void ax25_ds_timeout(unsigned long arg) } spin_lock(&ax25_list_lock); - ax25_for_each(ax25, node, &ax25_list) { + ax25_for_each(ax25, &ax25_list) { if (ax25->ax25_dev != ax25_dev || !(ax25->condition & AX25_COND_DAMA_MODE)) continue; diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c index 7d5f24b82cc8..7f16e8a931b2 100644 --- a/net/ax25/ax25_iface.c +++ b/net/ax25/ax25_iface.c @@ -193,10 +193,9 @@ int ax25_listen_mine(ax25_address *callsign, struct net_device *dev) void ax25_link_failed(ax25_cb *ax25, int reason) { struct ax25_linkfail *lf; - struct hlist_node *node; spin_lock_bh(&linkfail_lock); - hlist_for_each_entry(lf, node, &ax25_linkfail_list, lf_node) + hlist_for_each_entry(lf, &ax25_linkfail_list, lf_node) lf->func(ax25, reason); spin_unlock_bh(&linkfail_lock); } diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c index 957999e43ff7..71c4badbc807 100644 --- a/net/ax25/ax25_uid.c +++ b/net/ax25/ax25_uid.c @@ -54,10 +54,9 @@ EXPORT_SYMBOL(ax25_uid_policy); ax25_uid_assoc *ax25_findbyuid(kuid_t uid) { ax25_uid_assoc *ax25_uid, *res = NULL; - struct hlist_node *node; read_lock(&ax25_uid_lock); - ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { + ax25_uid_for_each(ax25_uid, &ax25_uid_list) { if (uid_eq(ax25_uid->uid, uid)) { ax25_uid_hold(ax25_uid); res = ax25_uid; @@ -74,7 +73,6 @@ EXPORT_SYMBOL(ax25_findbyuid); int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) { ax25_uid_assoc *ax25_uid; - struct hlist_node *node; ax25_uid_assoc *user; unsigned long res; @@ -82,7 +80,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) case SIOCAX25GETUID: res = -ENOENT; read_lock(&ax25_uid_lock); - ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { + ax25_uid_for_each(ax25_uid, &ax25_uid_list) { if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) { res = from_kuid_munged(current_user_ns(), ax25_uid->uid); break; @@ -126,7 +124,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) ax25_uid = NULL; write_lock(&ax25_uid_lock); - ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { + ax25_uid_for_each(ax25_uid, &ax25_uid_list) { if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) break; } @@ -212,11 +210,10 @@ const struct file_operations ax25_uid_fops = { void __exit ax25_uid_free(void) { ax25_uid_assoc *ax25_uid; - struct hlist_node *node; write_lock(&ax25_uid_lock); again: - ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { + ax25_uid_for_each(ax25_uid, &ax25_uid_list) { hlist_del_init(&ax25_uid->uid_node); ax25_uid_put(ax25_uid); goto again; diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 72fe1bbf7721..a0b253ecadaf 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -487,7 +487,6 @@ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, */ struct batadv_forw_packet *forw_packet_aggr = NULL; struct batadv_forw_packet *forw_packet_pos = NULL; - struct hlist_node *tmp_node; struct batadv_ogm_packet *batadv_ogm_packet; bool direct_link; unsigned long max_aggregation_jiffies; @@ -500,7 +499,7 @@ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, spin_lock_bh(&bat_priv->forw_bat_list_lock); /* own packets are not to be aggregated */ if ((atomic_read(&bat_priv->aggregated_ogms)) && (!own_packet)) { - hlist_for_each_entry(forw_packet_pos, tmp_node, + hlist_for_each_entry(forw_packet_pos, &bat_priv->forw_bat_list, list) { if (batadv_iv_ogm_can_aggregate(batadv_ogm_packet, bat_priv, packet_len, @@ -655,7 +654,6 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; struct batadv_neigh_node *router = NULL; struct batadv_orig_node *orig_node_tmp; - struct hlist_node *node; int if_num; uint8_t sum_orig, sum_neigh; uint8_t *neigh_addr; @@ -665,7 +663,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, "update_originator(): Searching and updating originator entry of received packet\n"); rcu_read_lock(); - hlist_for_each_entry_rcu(tmp_neigh_node, node, + hlist_for_each_entry_rcu(tmp_neigh_node, &orig_node->neigh_list, list) { neigh_addr = tmp_neigh_node->addr; if (batadv_compare_eth(neigh_addr, ethhdr->h_source) && @@ -801,7 +799,6 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node; - struct hlist_node *node; uint8_t total_count; uint8_t orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own; unsigned int neigh_rq_inv_cube, neigh_rq_max_cube; @@ -810,7 +807,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, /* find corresponding one hop neighbor */ rcu_read_lock(); - hlist_for_each_entry_rcu(tmp_neigh_node, node, + hlist_for_each_entry_rcu(tmp_neigh_node, &orig_neigh_node->neigh_list, list) { if (!batadv_compare_eth(tmp_neigh_node->addr, orig_neigh_node->orig)) @@ -920,7 +917,6 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_orig_node *orig_node; struct batadv_neigh_node *tmp_neigh_node; - struct hlist_node *node; int is_duplicate = 0; int32_t seq_diff; int need_update = 0; @@ -943,7 +939,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, goto out; rcu_read_lock(); - hlist_for_each_entry_rcu(tmp_neigh_node, node, + hlist_for_each_entry_rcu(tmp_neigh_node, &orig_node->neigh_list, list) { is_duplicate |= batadv_test_bit(tmp_neigh_node->real_bits, orig_node->last_real_seqno, diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 30f46526cbbd..6a4f728680ae 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -144,7 +144,6 @@ static struct batadv_bla_claim { struct batadv_hashtable *hash = bat_priv->bla.claim_hash; struct hlist_head *head; - struct hlist_node *node; struct batadv_bla_claim *claim; struct batadv_bla_claim *claim_tmp = NULL; int index; @@ -156,7 +155,7 @@ static struct batadv_bla_claim head = &hash->table[index]; rcu_read_lock(); - hlist_for_each_entry_rcu(claim, node, head, hash_entry) { + hlist_for_each_entry_rcu(claim, head, hash_entry) { if (!batadv_compare_claim(&claim->hash_entry, data)) continue; @@ -185,7 +184,6 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv, { struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; struct hlist_head *head; - struct hlist_node *node; struct batadv_bla_backbone_gw search_entry, *backbone_gw; struct batadv_bla_backbone_gw *backbone_gw_tmp = NULL; int index; @@ -200,7 +198,7 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv, head = &hash->table[index]; rcu_read_lock(); - hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) { + hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) { if (!batadv_compare_backbone_gw(&backbone_gw->hash_entry, &search_entry)) continue; @@ -221,7 +219,7 @@ static void batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) { struct batadv_hashtable *hash; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; struct batadv_bla_claim *claim; int i; @@ -236,13 +234,13 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(claim, node, node_tmp, + hlist_for_each_entry_safe(claim, node_tmp, head, hash_entry) { if (claim->backbone_gw != backbone_gw) continue; batadv_claim_free_ref(claim); - hlist_del_rcu(node); + hlist_del_rcu(&claim->hash_entry); } spin_unlock_bh(list_lock); } @@ -460,7 +458,6 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, short vid) { - struct hlist_node *node; struct hlist_head *head; struct batadv_hashtable *hash; struct batadv_bla_claim *claim; @@ -481,7 +478,7 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(claim, node, head, hash_entry) { + hlist_for_each_entry_rcu(claim, head, hash_entry) { /* only own claims are interesting */ if (claim->backbone_gw != backbone_gw) continue; @@ -958,7 +955,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now) { struct batadv_bla_backbone_gw *backbone_gw; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; struct batadv_hashtable *hash; spinlock_t *list_lock; /* protects write access to the hash lists */ @@ -973,7 +970,7 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(backbone_gw, node, node_tmp, + hlist_for_each_entry_safe(backbone_gw, node_tmp, head, hash_entry) { if (now) goto purge_now; @@ -992,7 +989,7 @@ purge_now: batadv_bla_del_backbone_claims(backbone_gw); - hlist_del_rcu(node); + hlist_del_rcu(&backbone_gw->hash_entry); batadv_backbone_gw_free_ref(backbone_gw); } spin_unlock_bh(list_lock); @@ -1013,7 +1010,6 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv, int now) { struct batadv_bla_claim *claim; - struct hlist_node *node; struct hlist_head *head; struct batadv_hashtable *hash; int i; @@ -1026,7 +1022,7 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(claim, node, head, hash_entry) { + hlist_for_each_entry_rcu(claim, head, hash_entry) { if (now) goto purge_now; if (!batadv_compare_eth(claim->backbone_gw->orig, @@ -1062,7 +1058,6 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, struct batadv_hard_iface *oldif) { struct batadv_bla_backbone_gw *backbone_gw; - struct hlist_node *node; struct hlist_head *head; struct batadv_hashtable *hash; __be16 group; @@ -1086,7 +1081,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) { + hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) { /* own orig still holds the old value. */ if (!batadv_compare_eth(backbone_gw->orig, oldif->net_dev->dev_addr)) @@ -1112,7 +1107,6 @@ static void batadv_bla_periodic_work(struct work_struct *work) struct delayed_work *delayed_work; struct batadv_priv *bat_priv; struct batadv_priv_bla *priv_bla; - struct hlist_node *node; struct hlist_head *head; struct batadv_bla_backbone_gw *backbone_gw; struct batadv_hashtable *hash; @@ -1140,7 +1134,7 @@ static void batadv_bla_periodic_work(struct work_struct *work) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) { + hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) { if (!batadv_compare_eth(backbone_gw->orig, primary_if->net_dev->dev_addr)) continue; @@ -1322,7 +1316,6 @@ int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig) { struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; struct hlist_head *head; - struct hlist_node *node; struct batadv_bla_backbone_gw *backbone_gw; int i; @@ -1336,7 +1329,7 @@ int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) { + hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) { if (batadv_compare_eth(backbone_gw->orig, orig)) { rcu_read_unlock(); return 1; @@ -1607,7 +1600,6 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) struct batadv_hashtable *hash = bat_priv->bla.claim_hash; struct batadv_bla_claim *claim; struct batadv_hard_iface *primary_if; - struct hlist_node *node; struct hlist_head *head; uint32_t i; bool is_own; @@ -1628,7 +1620,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(claim, node, head, hash_entry) { + hlist_for_each_entry_rcu(claim, head, hash_entry) { is_own = batadv_compare_eth(claim->backbone_gw->orig, primary_addr); seq_printf(seq, " * %pM on % 5d by %pM [%c] (%#.4x)\n", @@ -1652,7 +1644,6 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; struct batadv_bla_backbone_gw *backbone_gw; struct batadv_hard_iface *primary_if; - struct hlist_node *node; struct hlist_head *head; int secs, msecs; uint32_t i; @@ -1674,7 +1665,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) { + hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) { msecs = jiffies_to_msecs(jiffies - backbone_gw->lasttime); secs = msecs / 1000; diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 761a59002e34..d54188a112ea 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -83,7 +83,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv, { spinlock_t *list_lock; /* protects write access to the hash lists */ struct batadv_dat_entry *dat_entry; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; uint32_t i; @@ -95,7 +95,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv, list_lock = &bat_priv->dat.hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(dat_entry, node, node_tmp, head, + hlist_for_each_entry_safe(dat_entry, node_tmp, head, hash_entry) { /* if an helper function has been passed as parameter, * ask it if the entry has to be purged or not @@ -103,7 +103,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv, if (to_purge && !to_purge(dat_entry)) continue; - hlist_del_rcu(node); + hlist_del_rcu(&dat_entry->hash_entry); batadv_dat_entry_free_ref(dat_entry); } spin_unlock_bh(list_lock); @@ -235,7 +235,6 @@ static struct batadv_dat_entry * batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip) { struct hlist_head *head; - struct hlist_node *node; struct batadv_dat_entry *dat_entry, *dat_entry_tmp = NULL; struct batadv_hashtable *hash = bat_priv->dat.hash; uint32_t index; @@ -247,7 +246,7 @@ batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip) head = &hash->table[index]; rcu_read_lock(); - hlist_for_each_entry_rcu(dat_entry, node, head, hash_entry) { + hlist_for_each_entry_rcu(dat_entry, head, hash_entry) { if (dat_entry->ip != ip) continue; @@ -465,7 +464,6 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, batadv_dat_addr_t max = 0, tmp_max = 0; struct batadv_orig_node *orig_node, *max_orig_node = NULL; struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node; struct hlist_head *head; int i; @@ -481,7 +479,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { /* the dht space is a ring and addresses are unsigned */ tmp_max = BATADV_DAT_ADDR_MAX - orig_node->dat_addr + ip_key; @@ -686,7 +684,6 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset) struct batadv_hashtable *hash = bat_priv->dat.hash; struct batadv_dat_entry *dat_entry; struct batadv_hard_iface *primary_if; - struct hlist_node *node; struct hlist_head *head; unsigned long last_seen_jiffies; int last_seen_msecs, last_seen_secs, last_seen_mins; @@ -704,7 +701,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(dat_entry, node, head, hash_entry) { + hlist_for_each_entry_rcu(dat_entry, head, hash_entry) { last_seen_jiffies = jiffies - dat_entry->last_update; last_seen_msecs = jiffies_to_msecs(last_seen_jiffies); last_seen_mins = last_seen_msecs / 60000; diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 074107f2cfaa..34f99a46ec1d 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -114,7 +114,6 @@ static struct batadv_gw_node * batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) { struct batadv_neigh_node *router; - struct hlist_node *node; struct batadv_gw_node *gw_node, *curr_gw = NULL; uint32_t max_gw_factor = 0, tmp_gw_factor = 0; uint32_t gw_divisor; @@ -127,7 +126,7 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) gw_divisor *= 64; rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) { + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { if (gw_node->deleted) continue; @@ -344,7 +343,6 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, uint8_t new_gwflags) { - struct hlist_node *node; struct batadv_gw_node *gw_node, *curr_gw; /* Note: We don't need a NULL check here, since curr_gw never gets @@ -355,7 +353,7 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, curr_gw = batadv_gw_get_selected_gw_node(bat_priv); rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) { + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { if (gw_node->orig_node != orig_node) continue; @@ -403,7 +401,7 @@ void batadv_gw_node_delete(struct batadv_priv *bat_priv, void batadv_gw_node_purge(struct batadv_priv *bat_priv) { struct batadv_gw_node *gw_node, *curr_gw; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; unsigned long timeout = msecs_to_jiffies(2 * BATADV_PURGE_TIMEOUT); int do_deselect = 0; @@ -411,7 +409,7 @@ void batadv_gw_node_purge(struct batadv_priv *bat_priv) spin_lock_bh(&bat_priv->gw.list_lock); - hlist_for_each_entry_safe(gw_node, node, node_tmp, + hlist_for_each_entry_safe(gw_node, node_tmp, &bat_priv->gw.list, list) { if (((!gw_node->deleted) || (time_before(jiffies, gw_node->deleted + timeout))) && @@ -476,7 +474,6 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) struct batadv_priv *bat_priv = netdev_priv(net_dev); struct batadv_hard_iface *primary_if; struct batadv_gw_node *gw_node; - struct hlist_node *node; int gw_count = 0; primary_if = batadv_seq_print_text_primary_if_get(seq); @@ -490,7 +487,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) primary_if->net_dev->dev_addr, net_dev->name); rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) { + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { if (gw_node->deleted) continue; diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 21fe6987733b..0488d70c8c35 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -345,9 +345,8 @@ void batadv_recv_handler_unregister(uint8_t packet_type) static struct batadv_algo_ops *batadv_algo_get(char *name) { struct batadv_algo_ops *bat_algo_ops = NULL, *bat_algo_ops_tmp; - struct hlist_node *node; - hlist_for_each_entry(bat_algo_ops_tmp, node, &batadv_algo_list, list) { + hlist_for_each_entry(bat_algo_ops_tmp, &batadv_algo_list, list) { if (strcmp(bat_algo_ops_tmp->name, name) != 0) continue; @@ -411,11 +410,10 @@ out: int batadv_algo_seq_print_text(struct seq_file *seq, void *offset) { struct batadv_algo_ops *bat_algo_ops; - struct hlist_node *node; seq_printf(seq, "Available routing algorithms:\n"); - hlist_for_each_entry(bat_algo_ops, node, &batadv_algo_list, list) { + hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) { seq_printf(seq, "%s\n", bat_algo_ops->name); } diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 457ea445217c..96fb80b724dc 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -118,7 +118,7 @@ out: static void batadv_orig_node_free_rcu(struct rcu_head *rcu) { - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node, *tmp_neigh_node; struct batadv_orig_node *orig_node; @@ -134,7 +134,7 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu) } /* for all neighbors towards this originator ... */ - hlist_for_each_entry_safe(neigh_node, node, node_tmp, + hlist_for_each_entry_safe(neigh_node, node_tmp, &orig_node->neigh_list, list) { hlist_del_rcu(&neigh_node->list); batadv_neigh_node_free_ref(neigh_node); @@ -161,7 +161,7 @@ void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node) void batadv_originator_free(struct batadv_priv *bat_priv) { struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; spinlock_t *list_lock; /* spinlock to protect write access */ struct batadv_orig_node *orig_node; @@ -179,9 +179,9 @@ void batadv_originator_free(struct batadv_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(orig_node, node, node_tmp, + hlist_for_each_entry_safe(orig_node, node_tmp, head, hash_entry) { - hlist_del_rcu(node); + hlist_del_rcu(&orig_node->hash_entry); batadv_orig_node_free_ref(orig_node); } spin_unlock_bh(list_lock); @@ -274,7 +274,7 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_neigh_node **best_neigh_node) { - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node; bool neigh_purged = false; unsigned long last_seen; @@ -285,7 +285,7 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, spin_lock_bh(&orig_node->neigh_list_lock); /* for all neighbors towards this originator ... */ - hlist_for_each_entry_safe(neigh_node, node, node_tmp, + hlist_for_each_entry_safe(neigh_node, node_tmp, &orig_node->neigh_list, list) { last_seen = neigh_node->last_seen; if_incoming = neigh_node->if_incoming; @@ -348,7 +348,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, static void _batadv_purge_orig(struct batadv_priv *bat_priv) { struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; spinlock_t *list_lock; /* spinlock to protect write access */ struct batadv_orig_node *orig_node; @@ -363,13 +363,13 @@ static void _batadv_purge_orig(struct batadv_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(orig_node, node, node_tmp, + hlist_for_each_entry_safe(orig_node, node_tmp, head, hash_entry) { if (batadv_purge_orig_node(bat_priv, orig_node)) { if (orig_node->gw_flags) batadv_gw_node_delete(bat_priv, orig_node); - hlist_del_rcu(node); + hlist_del_rcu(&orig_node->hash_entry); batadv_orig_node_free_ref(orig_node); continue; } @@ -408,7 +408,6 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) struct net_device *net_dev = (struct net_device *)seq->private; struct batadv_priv *bat_priv = netdev_priv(net_dev); struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node, *node_tmp; struct hlist_head *head; struct batadv_hard_iface *primary_if; struct batadv_orig_node *orig_node; @@ -434,7 +433,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { neigh_node = batadv_orig_node_get_router(orig_node); if (!neigh_node) continue; @@ -453,7 +452,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) neigh_node->addr, neigh_node->if_incoming->net_dev->name); - hlist_for_each_entry_rcu(neigh_node_tmp, node_tmp, + hlist_for_each_entry_rcu(neigh_node_tmp, &orig_node->neigh_list, list) { seq_printf(seq, " %pM (%3i)", neigh_node_tmp->addr, @@ -511,7 +510,6 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node; struct hlist_head *head; struct batadv_orig_node *orig_node; uint32_t i; @@ -524,7 +522,7 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { spin_lock_bh(&orig_node->ogm_cnt_lock); ret = batadv_orig_node_add_if(orig_node, max_if_num); spin_unlock_bh(&orig_node->ogm_cnt_lock); @@ -595,7 +593,6 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node; struct hlist_head *head; struct batadv_hard_iface *hard_iface_tmp; struct batadv_orig_node *orig_node; @@ -609,7 +606,7 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { spin_lock_bh(&orig_node->ogm_cnt_lock); ret = batadv_orig_node_del_if(orig_node, max_if_num, hard_iface->if_num); diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 286bf743e76a..7df48fa7669d 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -68,7 +68,6 @@ batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data) { struct batadv_hashtable *hash = bat_priv->orig_hash; struct hlist_head *head; - struct hlist_node *node; struct batadv_orig_node *orig_node, *orig_node_tmp = NULL; int index; @@ -79,7 +78,7 @@ batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data) head = &hash->table[index]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { if (!batadv_compare_eth(orig_node, data)) continue; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 60ba03fc8390..5ee21cebbbb0 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -37,7 +37,6 @@ void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node; struct hlist_head *head; struct batadv_orig_node *orig_node; unsigned long *word; @@ -49,7 +48,7 @@ void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { spin_lock_bh(&orig_node->ogm_cnt_lock); word_index = hard_iface->if_num * BATADV_NUM_WORDS; word = &(orig_node->bcast_own[word_index]); @@ -146,7 +145,6 @@ out: void batadv_bonding_candidate_add(struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node) { - struct hlist_node *node; struct batadv_neigh_node *tmp_neigh_node, *router = NULL; uint8_t interference_candidate = 0; @@ -169,7 +167,7 @@ void batadv_bonding_candidate_add(struct batadv_orig_node *orig_node, * interface. If we do, we won't select this candidate because of * possible interference. */ - hlist_for_each_entry_rcu(tmp_neigh_node, node, + hlist_for_each_entry_rcu(tmp_neigh_node, &orig_node->neigh_list, list) { if (tmp_neigh_node == neigh_node) continue; diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 80ca65fc89a1..a67cffde37ae 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -316,7 +316,7 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, const struct batadv_hard_iface *hard_iface) { struct batadv_forw_packet *forw_packet; - struct hlist_node *tmp_node, *safe_tmp_node; + struct hlist_node *safe_tmp_node; bool pending; if (hard_iface) @@ -329,7 +329,7 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, /* free bcast list */ spin_lock_bh(&bat_priv->forw_bcast_list_lock); - hlist_for_each_entry_safe(forw_packet, tmp_node, safe_tmp_node, + hlist_for_each_entry_safe(forw_packet, safe_tmp_node, &bat_priv->forw_bcast_list, list) { /* if purge_outstanding_packets() was called with an argument * we delete only packets belonging to the given interface @@ -355,7 +355,7 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, /* free batman packet list */ spin_lock_bh(&bat_priv->forw_bat_list_lock); - hlist_for_each_entry_safe(forw_packet, tmp_node, safe_tmp_node, + hlist_for_each_entry_safe(forw_packet, safe_tmp_node, &bat_priv->forw_bat_list, list) { /* if purge_outstanding_packets() was called with an argument * we delete only packets belonging to the given interface diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index d44672f4a349..98a66a021a60 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -56,7 +56,6 @@ static struct batadv_tt_common_entry * batadv_tt_hash_find(struct batadv_hashtable *hash, const void *data) { struct hlist_head *head; - struct hlist_node *node; struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_common_entry *tt_common_entry_tmp = NULL; uint32_t index; @@ -68,7 +67,7 @@ batadv_tt_hash_find(struct batadv_hashtable *hash, const void *data) head = &hash->table[index]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common_entry, node, head, hash_entry) { + hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) { if (!batadv_compare_eth(tt_common_entry, data)) continue; @@ -257,7 +256,6 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, struct batadv_tt_local_entry *tt_local; struct batadv_tt_global_entry *tt_global; struct hlist_head *head; - struct hlist_node *node; struct batadv_tt_orig_list_entry *orig_entry; int hash_added; bool roamed_back = false; @@ -339,7 +337,7 @@ check_roaming: /* These node are probably going to update their tt table */ head = &tt_global->orig_list; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_entry, node, head, list) { + hlist_for_each_entry_rcu(orig_entry, head, list) { batadv_send_roam_adv(bat_priv, tt_global->common.addr, orig_entry->orig_node); } @@ -470,7 +468,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_local_entry *tt_local; struct batadv_hard_iface *primary_if; - struct hlist_node *node; struct hlist_head *head; uint32_t i; int last_seen_secs; @@ -494,7 +491,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common_entry, node, + hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) { tt_local = container_of(tt_common_entry, struct batadv_tt_local_entry, @@ -605,9 +602,9 @@ static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv, { struct batadv_tt_local_entry *tt_local_entry; struct batadv_tt_common_entry *tt_common_entry; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; - hlist_for_each_entry_safe(tt_common_entry, node, node_tmp, head, + hlist_for_each_entry_safe(tt_common_entry, node_tmp, head, hash_entry) { tt_local_entry = container_of(tt_common_entry, struct batadv_tt_local_entry, @@ -651,7 +648,7 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) spinlock_t *list_lock; /* protects write access to the hash lists */ struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_local_entry *tt_local; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; uint32_t i; @@ -665,9 +662,9 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(tt_common_entry, node, node_tmp, + hlist_for_each_entry_safe(tt_common_entry, node_tmp, head, hash_entry) { - hlist_del_rcu(node); + hlist_del_rcu(&tt_common_entry->hash_entry); tt_local = container_of(tt_common_entry, struct batadv_tt_local_entry, common); @@ -724,11 +721,10 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, { struct batadv_tt_orig_list_entry *tmp_orig_entry, *orig_entry = NULL; const struct hlist_head *head; - struct hlist_node *node; rcu_read_lock(); head = &entry->orig_list; - hlist_for_each_entry_rcu(tmp_orig_entry, node, head, list) { + hlist_for_each_entry_rcu(tmp_orig_entry, head, list) { if (tmp_orig_entry->orig_node != orig_node) continue; if (!atomic_inc_not_zero(&tmp_orig_entry->refcount)) @@ -940,12 +936,11 @@ batadv_transtable_best_orig(struct batadv_tt_global_entry *tt_global_entry) { struct batadv_neigh_node *router = NULL; struct hlist_head *head; - struct hlist_node *node; struct batadv_tt_orig_list_entry *orig_entry, *best_entry = NULL; int best_tq = 0; head = &tt_global_entry->orig_list; - hlist_for_each_entry_rcu(orig_entry, node, head, list) { + hlist_for_each_entry_rcu(orig_entry, head, list) { router = batadv_orig_node_get_router(orig_entry->orig_node); if (!router) continue; @@ -973,7 +968,6 @@ batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry, struct seq_file *seq) { struct hlist_head *head; - struct hlist_node *node; struct batadv_tt_orig_list_entry *orig_entry, *best_entry; struct batadv_tt_common_entry *tt_common_entry; uint16_t flags; @@ -997,7 +991,7 @@ batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry, head = &tt_global_entry->orig_list; - hlist_for_each_entry_rcu(orig_entry, node, head, list) { + hlist_for_each_entry_rcu(orig_entry, head, list) { if (best_entry == orig_entry) continue; @@ -1020,7 +1014,6 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset) struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_global_entry *tt_global; struct batadv_hard_iface *primary_if; - struct hlist_node *node; struct hlist_head *head; uint32_t i; @@ -1039,7 +1032,7 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common_entry, node, + hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) { tt_global = container_of(tt_common_entry, struct batadv_tt_global_entry, @@ -1059,13 +1052,13 @@ static void batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry) { struct hlist_head *head; - struct hlist_node *node, *safe; + struct hlist_node *safe; struct batadv_tt_orig_list_entry *orig_entry; spin_lock_bh(&tt_global_entry->list_lock); head = &tt_global_entry->orig_list; - hlist_for_each_entry_safe(orig_entry, node, safe, head, list) { - hlist_del_rcu(node); + hlist_for_each_entry_safe(orig_entry, safe, head, list) { + hlist_del_rcu(&orig_entry->list); batadv_tt_orig_list_entry_free_ref(orig_entry); } spin_unlock_bh(&tt_global_entry->list_lock); @@ -1078,18 +1071,18 @@ batadv_tt_global_del_orig_entry(struct batadv_priv *bat_priv, const char *message) { struct hlist_head *head; - struct hlist_node *node, *safe; + struct hlist_node *safe; struct batadv_tt_orig_list_entry *orig_entry; spin_lock_bh(&tt_global_entry->list_lock); head = &tt_global_entry->orig_list; - hlist_for_each_entry_safe(orig_entry, node, safe, head, list) { + hlist_for_each_entry_safe(orig_entry, safe, head, list) { if (orig_entry->orig_node == orig_node) { batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting %pM from global tt entry %pM: %s\n", orig_node->orig, tt_global_entry->common.addr, message); - hlist_del_rcu(node); + hlist_del_rcu(&orig_entry->list); batadv_tt_orig_list_entry_free_ref(orig_entry); } } @@ -1108,7 +1101,6 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv, { bool last_entry = true; struct hlist_head *head; - struct hlist_node *node; struct batadv_tt_orig_list_entry *orig_entry; /* no local entry exists, case 1: @@ -1117,7 +1109,7 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv, rcu_read_lock(); head = &tt_global_entry->orig_list; - hlist_for_each_entry_rcu(orig_entry, node, head, list) { + hlist_for_each_entry_rcu(orig_entry, head, list) { if (orig_entry->orig_node != orig_node) { last_entry = false; break; @@ -1202,7 +1194,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, struct batadv_tt_common_entry *tt_common_entry; uint32_t i; struct batadv_hashtable *hash = bat_priv->tt.global_hash; - struct hlist_node *node, *safe; + struct hlist_node *safe; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ @@ -1214,7 +1206,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(tt_common_entry, node, safe, + hlist_for_each_entry_safe(tt_common_entry, safe, head, hash_entry) { tt_global = container_of(tt_common_entry, struct batadv_tt_global_entry, @@ -1227,7 +1219,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting global tt entry %pM: %s\n", tt_global->common.addr, message); - hlist_del_rcu(node); + hlist_del_rcu(&tt_common_entry->hash_entry); batadv_tt_global_entry_free_ref(tt_global); } } @@ -1262,7 +1254,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv) { struct batadv_hashtable *hash = bat_priv->tt.global_hash; struct hlist_head *head; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; spinlock_t *list_lock; /* protects write access to the hash lists */ uint32_t i; char *msg = NULL; @@ -1274,7 +1266,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(tt_common, node, node_tmp, head, + hlist_for_each_entry_safe(tt_common, node_tmp, head, hash_entry) { tt_global = container_of(tt_common, struct batadv_tt_global_entry, @@ -1287,7 +1279,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv) "Deleting global tt entry (%pM): %s\n", tt_global->common.addr, msg); - hlist_del_rcu(node); + hlist_del_rcu(&tt_common->hash_entry); batadv_tt_global_entry_free_ref(tt_global); } @@ -1301,7 +1293,7 @@ static void batadv_tt_global_table_free(struct batadv_priv *bat_priv) spinlock_t *list_lock; /* protects write access to the hash lists */ struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_global_entry *tt_global; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; uint32_t i; @@ -1315,9 +1307,9 @@ static void batadv_tt_global_table_free(struct batadv_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(tt_common_entry, node, node_tmp, + hlist_for_each_entry_safe(tt_common_entry, node_tmp, head, hash_entry) { - hlist_del_rcu(node); + hlist_del_rcu(&tt_common_entry->hash_entry); tt_global = container_of(tt_common_entry, struct batadv_tt_global_entry, common); @@ -1397,7 +1389,6 @@ static uint16_t batadv_tt_global_crc(struct batadv_priv *bat_priv, struct batadv_hashtable *hash = bat_priv->tt.global_hash; struct batadv_tt_common_entry *tt_common; struct batadv_tt_global_entry *tt_global; - struct hlist_node *node; struct hlist_head *head; uint32_t i; int j; @@ -1406,7 +1397,7 @@ static uint16_t batadv_tt_global_crc(struct batadv_priv *bat_priv, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common, node, head, hash_entry) { + hlist_for_each_entry_rcu(tt_common, head, hash_entry) { tt_global = container_of(tt_common, struct batadv_tt_global_entry, common); @@ -1449,7 +1440,6 @@ static uint16_t batadv_tt_local_crc(struct batadv_priv *bat_priv) uint16_t total = 0, total_one; struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common; - struct hlist_node *node; struct hlist_head *head; uint32_t i; int j; @@ -1458,7 +1448,7 @@ static uint16_t batadv_tt_local_crc(struct batadv_priv *bat_priv) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common, node, head, hash_entry) { + hlist_for_each_entry_rcu(tt_common, head, hash_entry) { /* not yet committed clients have not to be taken into * account while computing the CRC */ @@ -1597,7 +1587,6 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_query_packet *tt_response; struct batadv_tt_change *tt_change; - struct hlist_node *node; struct hlist_head *head; struct sk_buff *skb = NULL; uint16_t tt_tot, tt_count; @@ -1627,7 +1616,7 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry_rcu(tt_common_entry, node, + hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) { if (tt_count == tt_tot) break; @@ -2307,7 +2296,6 @@ static uint16_t batadv_tt_set_flags(struct batadv_hashtable *hash, uint32_t i; uint16_t changed_num = 0; struct hlist_head *head; - struct hlist_node *node; struct batadv_tt_common_entry *tt_common_entry; if (!hash) @@ -2317,7 +2305,7 @@ static uint16_t batadv_tt_set_flags(struct batadv_hashtable *hash, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common_entry, node, + hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) { if (enable) { if ((tt_common_entry->flags & flags) == flags) @@ -2342,7 +2330,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common; struct batadv_tt_local_entry *tt_local; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ uint32_t i; @@ -2355,7 +2343,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(tt_common, node, node_tmp, head, + hlist_for_each_entry_safe(tt_common, node_tmp, head, hash_entry) { if (!(tt_common->flags & BATADV_TT_CLIENT_PENDING)) continue; @@ -2365,7 +2353,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) tt_common->addr); atomic_dec(&bat_priv->tt.local_entry_num); - hlist_del_rcu(node); + hlist_del_rcu(&tt_common->hash_entry); tt_local = container_of(tt_common, struct batadv_tt_local_entry, common); diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 22d2785177d1..c053244b97bd 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -97,7 +97,6 @@ batadv_vis_hash_find(struct batadv_priv *bat_priv, const void *data) { struct batadv_hashtable *hash = bat_priv->vis.hash; struct hlist_head *head; - struct hlist_node *node; struct batadv_vis_info *vis_info, *vis_info_tmp = NULL; uint32_t index; @@ -108,8 +107,8 @@ batadv_vis_hash_find(struct batadv_priv *bat_priv, const void *data) head = &hash->table[index]; rcu_read_lock(); - hlist_for_each_entry_rcu(vis_info, node, head, hash_entry) { - if (!batadv_vis_info_cmp(node, data)) + hlist_for_each_entry_rcu(vis_info, head, hash_entry) { + if (!batadv_vis_info_cmp(&vis_info->hash_entry, data)) continue; vis_info_tmp = vis_info; @@ -128,9 +127,8 @@ static void batadv_vis_data_insert_interface(const uint8_t *interface, bool primary) { struct batadv_vis_if_list_entry *entry; - struct hlist_node *pos; - hlist_for_each_entry(entry, pos, if_list, list) { + hlist_for_each_entry(entry, if_list, list) { if (batadv_compare_eth(entry->addr, interface)) return; } @@ -148,9 +146,8 @@ static void batadv_vis_data_read_prim_sec(struct seq_file *seq, const struct hlist_head *if_list) { struct batadv_vis_if_list_entry *entry; - struct hlist_node *pos; - hlist_for_each_entry(entry, pos, if_list, list) { + hlist_for_each_entry(entry, if_list, list) { if (entry->primary) seq_printf(seq, "PRIMARY, "); else @@ -198,9 +195,8 @@ static void batadv_vis_data_read_entries(struct seq_file *seq, { int i; struct batadv_vis_if_list_entry *entry; - struct hlist_node *pos; - hlist_for_each_entry(entry, pos, list, list) { + hlist_for_each_entry(entry, list, list) { seq_printf(seq, "%pM,", entry->addr); for (i = 0; i < packet->entries; i++) @@ -218,17 +214,16 @@ static void batadv_vis_data_read_entries(struct seq_file *seq, static void batadv_vis_seq_print_text_bucket(struct seq_file *seq, const struct hlist_head *head) { - struct hlist_node *node; struct batadv_vis_info *info; struct batadv_vis_packet *packet; uint8_t *entries_pos; struct batadv_vis_info_entry *entries; struct batadv_vis_if_list_entry *entry; - struct hlist_node *pos, *n; + struct hlist_node *n; HLIST_HEAD(vis_if_list); - hlist_for_each_entry_rcu(info, node, head, hash_entry) { + hlist_for_each_entry_rcu(info, head, hash_entry) { packet = (struct batadv_vis_packet *)info->skb_packet->data; entries_pos = (uint8_t *)packet + sizeof(*packet); entries = (struct batadv_vis_info_entry *)entries_pos; @@ -240,7 +235,7 @@ static void batadv_vis_seq_print_text_bucket(struct seq_file *seq, batadv_vis_data_read_entries(seq, &vis_if_list, packet, entries); - hlist_for_each_entry_safe(entry, pos, n, &vis_if_list, list) { + hlist_for_each_entry_safe(entry, n, &vis_if_list, list) { hlist_del(&entry->list); kfree(entry); } @@ -519,7 +514,6 @@ static int batadv_find_best_vis_server(struct batadv_priv *bat_priv, { struct batadv_hashtable *hash = bat_priv->orig_hash; struct batadv_neigh_node *router; - struct hlist_node *node; struct hlist_head *head; struct batadv_orig_node *orig_node; struct batadv_vis_packet *packet; @@ -532,7 +526,7 @@ static int batadv_find_best_vis_server(struct batadv_priv *bat_priv, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { router = batadv_orig_node_get_router(orig_node); if (!router) continue; @@ -571,7 +565,6 @@ static bool batadv_vis_packet_full(const struct batadv_vis_info *info) static int batadv_generate_vis_packet(struct batadv_priv *bat_priv) { struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node; struct hlist_head *head; struct batadv_orig_node *orig_node; struct batadv_neigh_node *router; @@ -605,7 +598,7 @@ static int batadv_generate_vis_packet(struct batadv_priv *bat_priv) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { router = batadv_orig_node_get_router(orig_node); if (!router) continue; @@ -644,7 +637,7 @@ next: head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(tt_common_entry, node, head, + hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) { packet_pos = skb_put(info->skb_packet, sizeof(*entry)); entry = (struct batadv_vis_info_entry *)packet_pos; @@ -673,14 +666,14 @@ static void batadv_purge_vis_packets(struct batadv_priv *bat_priv) { uint32_t i; struct batadv_hashtable *hash = bat_priv->vis.hash; - struct hlist_node *node, *node_tmp; + struct hlist_node *node_tmp; struct hlist_head *head; struct batadv_vis_info *info; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry_safe(info, node, node_tmp, + hlist_for_each_entry_safe(info, node_tmp, head, hash_entry) { /* never purge own data. */ if (info == bat_priv->vis.my_info) @@ -688,7 +681,7 @@ static void batadv_purge_vis_packets(struct batadv_priv *bat_priv) if (batadv_has_timed_out(info->first_seen, BATADV_VIS_TIMEOUT)) { - hlist_del(node); + hlist_del(&info->hash_entry); batadv_send_list_del(info); kref_put(&info->refcount, batadv_free_info); } @@ -700,7 +693,6 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv, struct batadv_vis_info *info) { struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_node *node; struct hlist_head *head; struct batadv_orig_node *orig_node; struct batadv_vis_packet *packet; @@ -715,7 +707,7 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { /* if it's a vis server and reachable, send it. */ if (!(orig_node->flags & BATADV_VIS_SERVER)) continue; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 07f073935811..6a93614f2c49 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -70,14 +70,13 @@ static struct bt_sock_list hci_sk_list = { void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) { struct sock *sk; - struct hlist_node *node; struct sk_buff *skb_copy = NULL; BT_DBG("hdev %p len %d", hdev, skb->len); read_lock(&hci_sk_list.lock); - sk_for_each(sk, node, &hci_sk_list.head) { + sk_for_each(sk, &hci_sk_list.head) { struct hci_filter *flt; struct sk_buff *nskb; @@ -142,13 +141,12 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) void hci_send_to_control(struct sk_buff *skb, struct sock *skip_sk) { struct sock *sk; - struct hlist_node *node; BT_DBG("len %d", skb->len); read_lock(&hci_sk_list.lock); - sk_for_each(sk, node, &hci_sk_list.head) { + sk_for_each(sk, &hci_sk_list.head) { struct sk_buff *nskb; /* Skip the original socket */ @@ -176,7 +174,6 @@ void hci_send_to_control(struct sk_buff *skb, struct sock *skip_sk) void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) { struct sock *sk; - struct hlist_node *node; struct sk_buff *skb_copy = NULL; __le16 opcode; @@ -210,7 +207,7 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) read_lock(&hci_sk_list.lock); - sk_for_each(sk, node, &hci_sk_list.head) { + sk_for_each(sk, &hci_sk_list.head) { struct sk_buff *nskb; if (sk->sk_state != BT_BOUND) @@ -251,13 +248,12 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) static void send_monitor_event(struct sk_buff *skb) { struct sock *sk; - struct hlist_node *node; BT_DBG("len %d", skb->len); read_lock(&hci_sk_list.lock); - sk_for_each(sk, node, &hci_sk_list.head) { + sk_for_each(sk, &hci_sk_list.head) { struct sk_buff *nskb; if (sk->sk_state != BT_BOUND) @@ -393,11 +389,10 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) if (event == HCI_DEV_UNREG) { struct sock *sk; - struct hlist_node *node; /* Detach sockets from device */ read_lock(&hci_sk_list.lock); - sk_for_each(sk, node, &hci_sk_list.head) { + sk_for_each(sk, &hci_sk_list.head) { bh_lock_sock_nested(sk); if (hci_pi(sk)->hdev == hdev) { hci_pi(sk)->hdev = NULL; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index ce3f6658f4b2..c23bae86263b 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -107,15 +107,14 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err) static struct sock *__rfcomm_get_sock_by_addr(u8 channel, bdaddr_t *src) { struct sock *sk = NULL; - struct hlist_node *node; - sk_for_each(sk, node, &rfcomm_sk_list.head) { + sk_for_each(sk, &rfcomm_sk_list.head) { if (rfcomm_pi(sk)->channel == channel && !bacmp(&bt_sk(sk)->src, src)) break; } - return node ? sk : NULL; + return sk ? sk : NULL; } /* Find socket with channel and source bdaddr. @@ -124,11 +123,10 @@ static struct sock *__rfcomm_get_sock_by_addr(u8 channel, bdaddr_t *src) static struct sock *rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t *src) { struct sock *sk = NULL, *sk1 = NULL; - struct hlist_node *node; read_lock(&rfcomm_sk_list.lock); - sk_for_each(sk, node, &rfcomm_sk_list.head) { + sk_for_each(sk, &rfcomm_sk_list.head) { if (state && sk->sk_state != state) continue; @@ -145,7 +143,7 @@ static struct sock *rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t * read_unlock(&rfcomm_sk_list.lock); - return node ? sk : sk1; + return sk ? sk : sk1; } static void rfcomm_sock_destruct(struct sock *sk) @@ -970,11 +968,10 @@ done: static int rfcomm_sock_debugfs_show(struct seq_file *f, void *p) { struct sock *sk; - struct hlist_node *node; read_lock(&rfcomm_sk_list.lock); - sk_for_each(sk, node, &rfcomm_sk_list.head) { + sk_for_each(sk, &rfcomm_sk_list.head) { seq_printf(f, "%pMR %pMR %d %d\n", &bt_sk(sk)->src, &bt_sk(sk)->dst, sk->sk_state, rfcomm_pi(sk)->channel); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index b5178d62064e..79d87d8d4f51 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -259,10 +259,9 @@ drop: /* -------- Socket interface ---------- */ static struct sock *__sco_get_sock_listen_by_addr(bdaddr_t *ba) { - struct hlist_node *node; struct sock *sk; - sk_for_each(sk, node, &sco_sk_list.head) { + sk_for_each(sk, &sco_sk_list.head) { if (sk->sk_state != BT_LISTEN) continue; @@ -279,11 +278,10 @@ static struct sock *__sco_get_sock_listen_by_addr(bdaddr_t *ba) static struct sock *sco_get_sock_listen(bdaddr_t *src) { struct sock *sk = NULL, *sk1 = NULL; - struct hlist_node *node; read_lock(&sco_sk_list.lock); - sk_for_each(sk, node, &sco_sk_list.head) { + sk_for_each(sk, &sco_sk_list.head) { if (sk->sk_state != BT_LISTEN) continue; @@ -298,7 +296,7 @@ static struct sock *sco_get_sock_listen(bdaddr_t *src) read_unlock(&sco_sk_list.lock); - return node ? sk : sk1; + return sk ? sk : sk1; } static void sco_sock_destruct(struct sock *sk) @@ -951,14 +949,13 @@ static void sco_conn_ready(struct sco_conn *conn) int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) { struct sock *sk; - struct hlist_node *node; int lm = 0; BT_DBG("hdev %s, bdaddr %pMR", hdev->name, bdaddr); /* Find listening sockets */ read_lock(&sco_sk_list.lock); - sk_for_each(sk, node, &sco_sk_list.head) { + sk_for_each(sk, &sco_sk_list.head) { if (sk->sk_state != BT_LISTEN) continue; @@ -1018,11 +1015,10 @@ drop: static int sco_debugfs_show(struct seq_file *f, void *p) { struct sock *sk; - struct hlist_node *node; read_lock(&sco_sk_list.lock); - sk_for_each(sk, node, &sco_sk_list.head) { + sk_for_each(sk, &sco_sk_list.head) { seq_printf(f, "%pMR %pMR %d\n", &bt_sk(sk)->src, &bt_sk(sk)->dst, sk->sk_state); } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 8117900af4de..b0812c91c0f0 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -181,9 +181,9 @@ void br_fdb_cleanup(unsigned long _data) spin_lock(&br->hash_lock); for (i = 0; i < BR_HASH_SIZE; i++) { struct net_bridge_fdb_entry *f; - struct hlist_node *h, *n; + struct hlist_node *n; - hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) { + hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) { unsigned long this_timer; if (f->is_static) continue; @@ -207,8 +207,8 @@ void br_fdb_flush(struct net_bridge *br) spin_lock_bh(&br->hash_lock); for (i = 0; i < BR_HASH_SIZE; i++) { struct net_bridge_fdb_entry *f; - struct hlist_node *h, *n; - hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) { + struct hlist_node *n; + hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) { if (!f->is_static) fdb_delete(br, f); } @@ -266,10 +266,9 @@ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, const unsigned char *addr, __u16 vid) { - struct hlist_node *h; struct net_bridge_fdb_entry *fdb; - hlist_for_each_entry_rcu(fdb, h, + hlist_for_each_entry_rcu(fdb, &br->hash[br_mac_hash(addr, vid)], hlist) { if (ether_addr_equal(fdb->addr.addr, addr) && fdb->vlan_id == vid) { @@ -315,14 +314,13 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, { struct __fdb_entry *fe = buf; int i, num = 0; - struct hlist_node *h; struct net_bridge_fdb_entry *f; memset(buf, 0, maxnum*sizeof(struct __fdb_entry)); rcu_read_lock(); for (i = 0; i < BR_HASH_SIZE; i++) { - hlist_for_each_entry_rcu(f, h, &br->hash[i], hlist) { + hlist_for_each_entry_rcu(f, &br->hash[i], hlist) { if (num >= maxnum) goto out; @@ -363,10 +361,9 @@ static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, const unsigned char *addr, __u16 vid) { - struct hlist_node *h; struct net_bridge_fdb_entry *fdb; - hlist_for_each_entry(fdb, h, head, hlist) { + hlist_for_each_entry(fdb, head, hlist) { if (ether_addr_equal(fdb->addr.addr, addr) && fdb->vlan_id == vid) return fdb; @@ -378,10 +375,9 @@ static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head, const unsigned char *addr, __u16 vid) { - struct hlist_node *h; struct net_bridge_fdb_entry *fdb; - hlist_for_each_entry_rcu(fdb, h, head, hlist) { + hlist_for_each_entry_rcu(fdb, head, hlist) { if (ether_addr_equal(fdb->addr.addr, addr) && fdb->vlan_id == vid) return fdb; @@ -593,10 +589,9 @@ int br_fdb_dump(struct sk_buff *skb, goto out; for (i = 0; i < BR_HASH_SIZE; i++) { - struct hlist_node *h; struct net_bridge_fdb_entry *f; - hlist_for_each_entry_rcu(f, h, &br->hash[i], hlist) { + hlist_for_each_entry_rcu(f, &br->hash[i], hlist) { if (idx < cb->args[0]) goto skip; diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 38991e03646d..9f97b850fc65 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -18,7 +18,6 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb, { struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *p; - struct hlist_node *n; struct nlattr *nest; if (!br->multicast_router || hlist_empty(&br->router_list)) @@ -28,7 +27,7 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb, if (nest == NULL) return -EMSGSIZE; - hlist_for_each_entry_rcu(p, n, &br->router_list, rlist) { + hlist_for_each_entry_rcu(p, &br->router_list, rlist) { if (p && nla_put_u32(skb, MDBA_ROUTER_PORT, p->dev->ifindex)) goto fail; } @@ -61,12 +60,11 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, return -EMSGSIZE; for (i = 0; i < mdb->max; i++) { - struct hlist_node *h; struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p, **pp; struct net_bridge_port *port; - hlist_for_each_entry_rcu(mp, h, &mdb->mhash[i], hlist[mdb->ver]) { + hlist_for_each_entry_rcu(mp, &mdb->mhash[i], hlist[mdb->ver]) { if (idx < s_idx) goto skip; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 7d886b0a8b7b..10e6fce1bb62 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -86,9 +86,8 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get( struct net_bridge_mdb_htable *mdb, struct br_ip *dst, int hash) { struct net_bridge_mdb_entry *mp; - struct hlist_node *p; - hlist_for_each_entry_rcu(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) { + hlist_for_each_entry_rcu(mp, &mdb->mhash[hash], hlist[mdb->ver]) { if (br_ip_equal(&mp->addr, dst)) return mp; } @@ -178,13 +177,12 @@ static int br_mdb_copy(struct net_bridge_mdb_htable *new, int elasticity) { struct net_bridge_mdb_entry *mp; - struct hlist_node *p; int maxlen; int len; int i; for (i = 0; i < old->max; i++) - hlist_for_each_entry(mp, p, &old->mhash[i], hlist[old->ver]) + hlist_for_each_entry(mp, &old->mhash[i], hlist[old->ver]) hlist_add_head(&mp->hlist[new->ver], &new->mhash[br_ip_hash(new, &mp->addr)]); @@ -194,7 +192,7 @@ static int br_mdb_copy(struct net_bridge_mdb_htable *new, maxlen = 0; for (i = 0; i < new->max; i++) { len = 0; - hlist_for_each_entry(mp, p, &new->mhash[i], hlist[new->ver]) + hlist_for_each_entry(mp, &new->mhash[i], hlist[new->ver]) len++; if (len > maxlen) maxlen = len; @@ -510,14 +508,13 @@ static struct net_bridge_mdb_entry *br_multicast_get_group( { struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; - struct hlist_node *p; unsigned int count = 0; unsigned int max; int elasticity; int err; mdb = rcu_dereference_protected(br->mdb, 1); - hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) { + hlist_for_each_entry(mp, &mdb->mhash[hash], hlist[mdb->ver]) { count++; if (unlikely(br_ip_equal(group, &mp->addr))) return mp; @@ -882,10 +879,10 @@ void br_multicast_disable_port(struct net_bridge_port *port) { struct net_bridge *br = port->br; struct net_bridge_port_group *pg; - struct hlist_node *p, *n; + struct hlist_node *n; spin_lock(&br->multicast_lock); - hlist_for_each_entry_safe(pg, p, n, &port->mglist, mglist) + hlist_for_each_entry_safe(pg, n, &port->mglist, mglist) br_multicast_del_pg(br, pg); if (!hlist_unhashed(&port->rlist)) @@ -1025,12 +1022,12 @@ static void br_multicast_add_router(struct net_bridge *br, struct net_bridge_port *port) { struct net_bridge_port *p; - struct hlist_node *n, *slot = NULL; + struct hlist_node *slot = NULL; - hlist_for_each_entry(p, n, &br->router_list, rlist) { + hlist_for_each_entry(p, &br->router_list, rlist) { if ((unsigned long) port >= (unsigned long) p) break; - slot = n; + slot = &p->rlist; } if (slot) @@ -1653,7 +1650,7 @@ void br_multicast_stop(struct net_bridge *br) { struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; - struct hlist_node *p, *n; + struct hlist_node *n; u32 ver; int i; @@ -1670,7 +1667,7 @@ void br_multicast_stop(struct net_bridge *br) ver = mdb->ver; for (i = 0; i < mdb->max; i++) { - hlist_for_each_entry_safe(mp, p, n, &mdb->mhash[i], + hlist_for_each_entry_safe(mp, n, &mdb->mhash[i], hlist[ver]) { del_timer(&mp->timer); call_rcu_bh(&mp->rcu, br_multicast_free_group); diff --git a/net/can/af_can.c b/net/can/af_can.c index ddac1ee2ed20..c48e5220bbac 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -516,7 +516,6 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, { struct receiver *r = NULL; struct hlist_head *rl; - struct hlist_node *next; struct dev_rcv_lists *d; if (dev && dev->type != ARPHRD_CAN) @@ -540,7 +539,7 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, * been registered before. */ - hlist_for_each_entry_rcu(r, next, rl, list) { + hlist_for_each_entry_rcu(r, rl, list) { if (r->can_id == can_id && r->mask == mask && r->func == func && r->data == data) break; @@ -552,7 +551,7 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, * will be NULL, while r will point to the last item of the list. */ - if (!next) { + if (!r) { printk(KERN_ERR "BUG: receive list entry not found for " "dev %s, id %03X, mask %03X\n", DNAME(dev), can_id, mask); @@ -590,7 +589,6 @@ static inline void deliver(struct sk_buff *skb, struct receiver *r) static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) { struct receiver *r; - struct hlist_node *n; int matches = 0; struct can_frame *cf = (struct can_frame *)skb->data; canid_t can_id = cf->can_id; @@ -600,7 +598,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) if (can_id & CAN_ERR_FLAG) { /* check for error message frame entries only */ - hlist_for_each_entry_rcu(r, n, &d->rx[RX_ERR], list) { + hlist_for_each_entry_rcu(r, &d->rx[RX_ERR], list) { if (can_id & r->mask) { deliver(skb, r); matches++; @@ -610,13 +608,13 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) } /* check for unfiltered entries */ - hlist_for_each_entry_rcu(r, n, &d->rx[RX_ALL], list) { + hlist_for_each_entry_rcu(r, &d->rx[RX_ALL], list) { deliver(skb, r); matches++; } /* check for can_id/mask entries */ - hlist_for_each_entry_rcu(r, n, &d->rx[RX_FIL], list) { + hlist_for_each_entry_rcu(r, &d->rx[RX_FIL], list) { if ((can_id & r->mask) == r->can_id) { deliver(skb, r); matches++; @@ -624,7 +622,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) } /* check for inverted can_id/mask entries */ - hlist_for_each_entry_rcu(r, n, &d->rx[RX_INV], list) { + hlist_for_each_entry_rcu(r, &d->rx[RX_INV], list) { if ((can_id & r->mask) != r->can_id) { deliver(skb, r); matches++; @@ -636,7 +634,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) return matches; if (can_id & CAN_EFF_FLAG) { - hlist_for_each_entry_rcu(r, n, &d->rx[RX_EFF], list) { + hlist_for_each_entry_rcu(r, &d->rx[RX_EFF], list) { if (r->can_id == can_id) { deliver(skb, r); matches++; @@ -644,7 +642,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) } } else { can_id &= CAN_SFF_MASK; - hlist_for_each_entry_rcu(r, n, &d->rx_sff[can_id], list) { + hlist_for_each_entry_rcu(r, &d->rx_sff[can_id], list) { deliver(skb, r); matches++; } diff --git a/net/can/gw.c b/net/can/gw.c index c185fcd5e828..2d117dc5ebea 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -457,11 +457,11 @@ static int cgw_notifier(struct notifier_block *nb, if (msg == NETDEV_UNREGISTER) { struct cgw_job *gwj = NULL; - struct hlist_node *n, *nx; + struct hlist_node *nx; ASSERT_RTNL(); - hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) { + hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) { if (gwj->src.dev == dev || gwj->dst.dev == dev) { hlist_del(&gwj->list); @@ -575,12 +575,11 @@ cancel: static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb) { struct cgw_job *gwj = NULL; - struct hlist_node *n; int idx = 0; int s_idx = cb->args[0]; rcu_read_lock(); - hlist_for_each_entry_rcu(gwj, n, &cgw_list, list) { + hlist_for_each_entry_rcu(gwj, &cgw_list, list) { if (idx < s_idx) goto cont; @@ -858,11 +857,11 @@ out: static void cgw_remove_all_jobs(void) { struct cgw_job *gwj = NULL; - struct hlist_node *n, *nx; + struct hlist_node *nx; ASSERT_RTNL(); - hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) { + hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) { hlist_del(&gwj->list); cgw_unregister_filter(gwj); kfree(gwj); @@ -872,7 +871,7 @@ static void cgw_remove_all_jobs(void) static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct cgw_job *gwj = NULL; - struct hlist_node *n, *nx; + struct hlist_node *nx; struct rtcanmsg *r; struct cf_mod mod; struct can_can_gw ccgw; @@ -907,7 +906,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ASSERT_RTNL(); /* remove only the first matching entry */ - hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) { + hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) { if (gwj->flags != r->flags) continue; diff --git a/net/can/proc.c b/net/can/proc.c index 497335892146..1ab8c888f102 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -195,9 +195,8 @@ static void can_print_rcvlist(struct seq_file *m, struct hlist_head *rx_list, struct net_device *dev) { struct receiver *r; - struct hlist_node *n; - hlist_for_each_entry_rcu(r, n, rx_list, list) { + hlist_for_each_entry_rcu(r, rx_list, list) { char *fmt = (r->can_id & CAN_EFF_FLAG)? " %-5s %08x %08x %pK %pK %8ld %s\n" : " %-5s %03x %08x %pK %pK %8ld %s\n"; diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 1deb29af82fd..e65e6e4be38b 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -28,6 +28,22 @@ #include "crypto.h" +/* + * Module compatibility interface. For now it doesn't do anything, + * but its existence signals a certain level of functionality. + * + * The data buffer is used to pass information both to and from + * libceph. The return value indicates whether libceph determines + * it is compatible with the caller (from another kernel module), + * given the provided data. + * + * The data pointer can be null. + */ +bool libceph_compatible(void *data) +{ + return true; +} +EXPORT_SYMBOL(libceph_compatible); /* * find filename portion of a path (/foo/bar/baz -> baz) @@ -590,10 +606,8 @@ static int __init init_ceph_lib(void) if (ret < 0) goto out_crypto; - pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n", - CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL, - CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT, - CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT); + pr_info("loaded (mon/osd proto %d/%d)\n", + CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL); return 0; diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c index 3fbda04de29c..1348df96fe15 100644 --- a/net/ceph/ceph_strings.c +++ b/net/ceph/ceph_strings.c @@ -21,9 +21,15 @@ const char *ceph_osd_op_name(int op) switch (op) { case CEPH_OSD_OP_READ: return "read"; case CEPH_OSD_OP_STAT: return "stat"; + case CEPH_OSD_OP_MAPEXT: return "mapext"; + case CEPH_OSD_OP_SPARSE_READ: return "sparse-read"; + case CEPH_OSD_OP_NOTIFY: return "notify"; + case CEPH_OSD_OP_NOTIFY_ACK: return "notify-ack"; + case CEPH_OSD_OP_ASSERT_VER: return "assert-version"; case CEPH_OSD_OP_MASKTRUNC: return "masktrunc"; + case CEPH_OSD_OP_CREATE: return "create"; case CEPH_OSD_OP_WRITE: return "write"; case CEPH_OSD_OP_DELETE: return "delete"; case CEPH_OSD_OP_TRUNCATE: return "truncate"; @@ -39,6 +45,11 @@ const char *ceph_osd_op_name(int op) case CEPH_OSD_OP_TMAPUP: return "tmapup"; case CEPH_OSD_OP_TMAPGET: return "tmapget"; case CEPH_OSD_OP_TMAPPUT: return "tmapput"; + case CEPH_OSD_OP_WATCH: return "watch"; + + case CEPH_OSD_OP_CLONERANGE: return "clonerange"; + case CEPH_OSD_OP_ASSERT_SRC_VERSION: return "assert-src-version"; + case CEPH_OSD_OP_SRC_CMPXATTR: return "src-cmpxattr"; case CEPH_OSD_OP_GETXATTR: return "getxattr"; case CEPH_OSD_OP_GETXATTRS: return "getxattrs"; @@ -53,6 +64,10 @@ const char *ceph_osd_op_name(int op) case CEPH_OSD_OP_BALANCEREADS: return "balance-reads"; case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads"; case CEPH_OSD_OP_SCRUB: return "scrub"; + case CEPH_OSD_OP_SCRUB_RESERVE: return "scrub-reserve"; + case CEPH_OSD_OP_SCRUB_UNRESERVE: return "scrub-unreserve"; + case CEPH_OSD_OP_SCRUB_STOP: return "scrub-stop"; + case CEPH_OSD_OP_SCRUB_MAP: return "scrub-map"; case CEPH_OSD_OP_WRLOCK: return "wrlock"; case CEPH_OSD_OP_WRUNLOCK: return "wrunlock"; @@ -64,10 +79,34 @@ const char *ceph_osd_op_name(int op) case CEPH_OSD_OP_CALL: return "call"; case CEPH_OSD_OP_PGLS: return "pgls"; + case CEPH_OSD_OP_PGLS_FILTER: return "pgls-filter"; + case CEPH_OSD_OP_OMAPGETKEYS: return "omap-get-keys"; + case CEPH_OSD_OP_OMAPGETVALS: return "omap-get-vals"; + case CEPH_OSD_OP_OMAPGETHEADER: return "omap-get-header"; + case CEPH_OSD_OP_OMAPGETVALSBYKEYS: return "omap-get-vals-by-keys"; + case CEPH_OSD_OP_OMAPSETVALS: return "omap-set-vals"; + case CEPH_OSD_OP_OMAPSETHEADER: return "omap-set-header"; + case CEPH_OSD_OP_OMAPCLEAR: return "omap-clear"; + case CEPH_OSD_OP_OMAPRMKEYS: return "omap-rm-keys"; } return "???"; } +const char *ceph_osd_state_name(int s) +{ + switch (s) { + case CEPH_OSD_EXISTS: + return "exists"; + case CEPH_OSD_UP: + return "up"; + case CEPH_OSD_AUTOOUT: + return "autoout"; + case CEPH_OSD_NEW: + return "new"; + default: + return "???"; + } +} const char *ceph_pool_op_name(int op) { diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 35fce755ce10..cbd06a91941c 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -287,6 +287,7 @@ static int is_out(const struct crush_map *map, const __u32 *weight, int item, in * @outpos: our position in that vector * @firstn: true if choosing "first n" items, false if choosing "indep" * @recurse_to_leaf: true if we want one device under each item of given type + * @descend_once: true if we should only try one descent before giving up * @out2: second output vector for leaf items (if @recurse_to_leaf) */ static int crush_choose(const struct crush_map *map, @@ -295,7 +296,7 @@ static int crush_choose(const struct crush_map *map, int x, int numrep, int type, int *out, int outpos, int firstn, int recurse_to_leaf, - int *out2) + int descend_once, int *out2) { int rep; unsigned int ftotal, flocal; @@ -391,7 +392,7 @@ static int crush_choose(const struct crush_map *map, } reject = 0; - if (recurse_to_leaf) { + if (!collide && recurse_to_leaf) { if (item < 0) { if (crush_choose(map, map->buckets[-1-item], @@ -399,6 +400,7 @@ static int crush_choose(const struct crush_map *map, x, outpos+1, 0, out2, outpos, firstn, 0, + map->chooseleaf_descend_once, NULL) <= outpos) /* didn't get leaf */ reject = 1; @@ -422,7 +424,10 @@ reject: ftotal++; flocal++; - if (collide && flocal <= map->choose_local_tries) + if (reject && descend_once) + /* let outer call try again */ + skip_rep = 1; + else if (collide && flocal <= map->choose_local_tries) /* retry locally a few times */ retry_bucket = 1; else if (map->choose_local_fallback_tries > 0 && @@ -485,6 +490,7 @@ int crush_do_rule(const struct crush_map *map, int i, j; int numrep; int firstn; + const int descend_once = 0; if ((__u32)ruleno >= map->max_rules) { dprintk(" bad ruleno %d\n", ruleno); @@ -544,7 +550,8 @@ int crush_do_rule(const struct crush_map *map, curstep->arg2, o+osize, j, firstn, - recurse_to_leaf, c+osize); + recurse_to_leaf, + descend_once, c+osize); } if (recurse_to_leaf) diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index af14cb425164..6e7a236525b6 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -423,7 +423,8 @@ int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, } } -int ceph_key_instantiate(struct key *key, struct key_preparsed_payload *prep) +static int ceph_key_instantiate(struct key *key, + struct key_preparsed_payload *prep) { struct ceph_crypto_key *ckey; size_t datalen = prep->datalen; @@ -458,12 +459,12 @@ err: return ret; } -int ceph_key_match(const struct key *key, const void *description) +static int ceph_key_match(const struct key *key, const void *description) { return strcmp(key->description, description) == 0; } -void ceph_key_destroy(struct key *key) { +static void ceph_key_destroy(struct key *key) { struct ceph_crypto_key *ckey = key->payload.data; ceph_crypto_key_destroy(ckey); diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 38b5dc1823d4..00d051f4894e 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -66,9 +66,9 @@ static int osdmap_show(struct seq_file *s, void *p) for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) { struct ceph_pg_pool_info *pool = rb_entry(n, struct ceph_pg_pool_info, node); - seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n", - pool->id, pool->v.pg_num, pool->pg_num_mask, - pool->v.lpg_num, pool->lpg_num_mask); + seq_printf(s, "pg_pool %llu pg_num %d / %d\n", + (unsigned long long)pool->id, pool->pg_num, + pool->pg_num_mask); } for (i = 0; i < client->osdc.osdmap->max_osd; i++) { struct ceph_entity_addr *addr = @@ -123,26 +123,16 @@ static int osdc_show(struct seq_file *s, void *pp) mutex_lock(&osdc->request_mutex); for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { struct ceph_osd_request *req; - struct ceph_osd_request_head *head; - struct ceph_osd_op *op; - int num_ops; - int opcode, olen; + int opcode; int i; req = rb_entry(p, struct ceph_osd_request, r_node); - seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid, + seq_printf(s, "%lld\tosd%d\t%lld.%x\t", req->r_tid, req->r_osd ? req->r_osd->o_osd : -1, - le32_to_cpu(req->r_pgid.pool), - le16_to_cpu(req->r_pgid.ps)); + req->r_pgid.pool, req->r_pgid.seed); - head = req->r_request->front.iov_base; - op = (void *)(head + 1); - - num_ops = le16_to_cpu(head->num_ops); - olen = le32_to_cpu(head->object_len); - seq_printf(s, "%.*s", olen, - (const char *)(head->ops + num_ops)); + seq_printf(s, "%.*s", req->r_oid_len, req->r_oid); if (req->r_reassert_version.epoch) seq_printf(s, "\t%u'%llu", @@ -151,10 +141,9 @@ static int osdc_show(struct seq_file *s, void *pp) else seq_printf(s, "\t"); - for (i = 0; i < num_ops; i++) { - opcode = le16_to_cpu(op->op); + for (i = 0; i < req->r_num_ops; i++) { + opcode = le16_to_cpu(req->r_request_ops[i].op); seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); - op++; } seq_printf(s, "\n"); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 5ccf87ed8d68..2c0669fb54e3 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -9,8 +9,9 @@ #include <linux/slab.h> #include <linux/socket.h> #include <linux/string.h> +#ifdef CONFIG_BLOCK #include <linux/bio.h> -#include <linux/blkdev.h> +#endif /* CONFIG_BLOCK */ #include <linux/dns_resolver.h> #include <net/tcp.h> @@ -97,6 +98,57 @@ #define CON_FLAG_SOCK_CLOSED 3 /* socket state changed to closed */ #define CON_FLAG_BACKOFF 4 /* need to retry queuing delayed work */ +static bool con_flag_valid(unsigned long con_flag) +{ + switch (con_flag) { + case CON_FLAG_LOSSYTX: + case CON_FLAG_KEEPALIVE_PENDING: + case CON_FLAG_WRITE_PENDING: + case CON_FLAG_SOCK_CLOSED: + case CON_FLAG_BACKOFF: + return true; + default: + return false; + } +} + +static void con_flag_clear(struct ceph_connection *con, unsigned long con_flag) +{ + BUG_ON(!con_flag_valid(con_flag)); + + clear_bit(con_flag, &con->flags); +} + +static void con_flag_set(struct ceph_connection *con, unsigned long con_flag) +{ + BUG_ON(!con_flag_valid(con_flag)); + + set_bit(con_flag, &con->flags); +} + +static bool con_flag_test(struct ceph_connection *con, unsigned long con_flag) +{ + BUG_ON(!con_flag_valid(con_flag)); + + return test_bit(con_flag, &con->flags); +} + +static bool con_flag_test_and_clear(struct ceph_connection *con, + unsigned long con_flag) +{ + BUG_ON(!con_flag_valid(con_flag)); + + return test_and_clear_bit(con_flag, &con->flags); +} + +static bool con_flag_test_and_set(struct ceph_connection *con, + unsigned long con_flag) +{ + BUG_ON(!con_flag_valid(con_flag)); + + return test_and_set_bit(con_flag, &con->flags); +} + /* static tag bytes (protocol control messages) */ static char tag_msg = CEPH_MSGR_TAG_MSG; static char tag_ack = CEPH_MSGR_TAG_ACK; @@ -114,7 +166,7 @@ static struct lock_class_key socket_class; static void queue_con(struct ceph_connection *con); static void con_work(struct work_struct *); -static void ceph_fault(struct ceph_connection *con); +static void con_fault(struct ceph_connection *con); /* * Nicely render a sockaddr as a string. An array of formatted @@ -171,7 +223,7 @@ static void encode_my_addr(struct ceph_messenger *msgr) */ static struct workqueue_struct *ceph_msgr_wq; -void _ceph_msgr_exit(void) +static void _ceph_msgr_exit(void) { if (ceph_msgr_wq) { destroy_workqueue(ceph_msgr_wq); @@ -308,7 +360,7 @@ static void ceph_sock_write_space(struct sock *sk) * buffer. See net/ipv4/tcp_input.c:tcp_check_space() * and net/core/stream.c:sk_stream_write_space(). */ - if (test_bit(CON_FLAG_WRITE_PENDING, &con->flags)) { + if (con_flag_test(con, CON_FLAG_WRITE_PENDING)) { if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { dout("%s %p queueing write work\n", __func__, con); clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); @@ -333,7 +385,7 @@ static void ceph_sock_state_change(struct sock *sk) case TCP_CLOSE_WAIT: dout("%s TCP_CLOSE_WAIT\n", __func__); con_sock_state_closing(con); - set_bit(CON_FLAG_SOCK_CLOSED, &con->flags); + con_flag_set(con, CON_FLAG_SOCK_CLOSED); queue_con(con); break; case TCP_ESTABLISHED: @@ -474,7 +526,7 @@ static int con_close_socket(struct ceph_connection *con) * received a socket close event before we had the chance to * shut the socket down. */ - clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags); + con_flag_clear(con, CON_FLAG_SOCK_CLOSED); con_sock_state_closed(con); return rc; @@ -538,11 +590,10 @@ void ceph_con_close(struct ceph_connection *con) ceph_pr_addr(&con->peer_addr.in_addr)); con->state = CON_STATE_CLOSED; - clear_bit(CON_FLAG_LOSSYTX, &con->flags); /* so we retry next connect */ - clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags); - clear_bit(CON_FLAG_WRITE_PENDING, &con->flags); - clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags); - clear_bit(CON_FLAG_BACKOFF, &con->flags); + con_flag_clear(con, CON_FLAG_LOSSYTX); /* so we retry next connect */ + con_flag_clear(con, CON_FLAG_KEEPALIVE_PENDING); + con_flag_clear(con, CON_FLAG_WRITE_PENDING); + con_flag_clear(con, CON_FLAG_BACKOFF); reset_connection(con); con->peer_global_seq = 0; @@ -798,7 +849,7 @@ static void prepare_write_message(struct ceph_connection *con) /* no, queue up footer too and be done */ prepare_write_message_footer(con); - set_bit(CON_FLAG_WRITE_PENDING, &con->flags); + con_flag_set(con, CON_FLAG_WRITE_PENDING); } /* @@ -819,7 +870,7 @@ static void prepare_write_ack(struct ceph_connection *con) &con->out_temp_ack); con->out_more = 1; /* more will follow.. eventually.. */ - set_bit(CON_FLAG_WRITE_PENDING, &con->flags); + con_flag_set(con, CON_FLAG_WRITE_PENDING); } /* @@ -830,7 +881,7 @@ static void prepare_write_keepalive(struct ceph_connection *con) dout("prepare_write_keepalive %p\n", con); con_out_kvec_reset(con); con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); - set_bit(CON_FLAG_WRITE_PENDING, &con->flags); + con_flag_set(con, CON_FLAG_WRITE_PENDING); } /* @@ -873,7 +924,7 @@ static void prepare_write_banner(struct ceph_connection *con) &con->msgr->my_enc_addr); con->out_more = 0; - set_bit(CON_FLAG_WRITE_PENDING, &con->flags); + con_flag_set(con, CON_FLAG_WRITE_PENDING); } static int prepare_write_connect(struct ceph_connection *con) @@ -923,7 +974,7 @@ static int prepare_write_connect(struct ceph_connection *con) auth->authorizer_buf); con->out_more = 0; - set_bit(CON_FLAG_WRITE_PENDING, &con->flags); + con_flag_set(con, CON_FLAG_WRITE_PENDING); return 0; } @@ -1643,7 +1694,7 @@ static int process_connect(struct ceph_connection *con) le32_to_cpu(con->in_reply.connect_seq)); if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY) - set_bit(CON_FLAG_LOSSYTX, &con->flags); + con_flag_set(con, CON_FLAG_LOSSYTX); con->delay = 0; /* reset backoff memory */ @@ -2080,15 +2131,14 @@ do_next: prepare_write_ack(con); goto more; } - if (test_and_clear_bit(CON_FLAG_KEEPALIVE_PENDING, - &con->flags)) { + if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) { prepare_write_keepalive(con); goto more; } } /* Nothing to do! */ - clear_bit(CON_FLAG_WRITE_PENDING, &con->flags); + con_flag_clear(con, CON_FLAG_WRITE_PENDING); dout("try_write nothing else to write.\n"); ret = 0; out: @@ -2268,7 +2318,7 @@ static void queue_con(struct ceph_connection *con) static bool con_sock_closed(struct ceph_connection *con) { - if (!test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) + if (!con_flag_test_and_clear(con, CON_FLAG_SOCK_CLOSED)) return false; #define CASE(x) \ @@ -2295,6 +2345,41 @@ static bool con_sock_closed(struct ceph_connection *con) return true; } +static bool con_backoff(struct ceph_connection *con) +{ + int ret; + + if (!con_flag_test_and_clear(con, CON_FLAG_BACKOFF)) + return false; + + ret = queue_con_delay(con, round_jiffies_relative(con->delay)); + if (ret) { + dout("%s: con %p FAILED to back off %lu\n", __func__, + con, con->delay); + BUG_ON(ret == -ENOENT); + con_flag_set(con, CON_FLAG_BACKOFF); + } + + return true; +} + +/* Finish fault handling; con->mutex must *not* be held here */ + +static void con_fault_finish(struct ceph_connection *con) +{ + /* + * in case we faulted due to authentication, invalidate our + * current tickets so that we can get new ones. + */ + if (con->auth_retry && con->ops->invalidate_authorizer) { + dout("calling invalidate_authorizer()\n"); + con->ops->invalidate_authorizer(con); + } + + if (con->ops->fault) + con->ops->fault(con); +} + /* * Do some work on a connection. Drop a connection ref when we're done. */ @@ -2302,73 +2387,68 @@ static void con_work(struct work_struct *work) { struct ceph_connection *con = container_of(work, struct ceph_connection, work.work); - int ret; + bool fault; mutex_lock(&con->mutex); -restart: - if (con_sock_closed(con)) - goto fault; + while (true) { + int ret; - if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) { - dout("con_work %p backing off\n", con); - ret = queue_con_delay(con, round_jiffies_relative(con->delay)); - if (ret) { - dout("con_work %p FAILED to back off %lu\n", con, - con->delay); - BUG_ON(ret == -ENOENT); - set_bit(CON_FLAG_BACKOFF, &con->flags); + if ((fault = con_sock_closed(con))) { + dout("%s: con %p SOCK_CLOSED\n", __func__, con); + break; + } + if (con_backoff(con)) { + dout("%s: con %p BACKOFF\n", __func__, con); + break; + } + if (con->state == CON_STATE_STANDBY) { + dout("%s: con %p STANDBY\n", __func__, con); + break; + } + if (con->state == CON_STATE_CLOSED) { + dout("%s: con %p CLOSED\n", __func__, con); + BUG_ON(con->sock); + break; + } + if (con->state == CON_STATE_PREOPEN) { + dout("%s: con %p PREOPEN\n", __func__, con); + BUG_ON(con->sock); } - goto done; - } - if (con->state == CON_STATE_STANDBY) { - dout("con_work %p STANDBY\n", con); - goto done; - } - if (con->state == CON_STATE_CLOSED) { - dout("con_work %p CLOSED\n", con); - BUG_ON(con->sock); - goto done; - } - if (con->state == CON_STATE_PREOPEN) { - dout("con_work OPENING\n"); - BUG_ON(con->sock); - } + ret = try_read(con); + if (ret < 0) { + if (ret == -EAGAIN) + continue; + con->error_msg = "socket error on read"; + fault = true; + break; + } - ret = try_read(con); - if (ret == -EAGAIN) - goto restart; - if (ret < 0) { - con->error_msg = "socket error on read"; - goto fault; - } + ret = try_write(con); + if (ret < 0) { + if (ret == -EAGAIN) + continue; + con->error_msg = "socket error on write"; + fault = true; + } - ret = try_write(con); - if (ret == -EAGAIN) - goto restart; - if (ret < 0) { - con->error_msg = "socket error on write"; - goto fault; + break; /* If we make it to here, we're done */ } - -done: + if (fault) + con_fault(con); mutex_unlock(&con->mutex); -done_unlocked: - con->ops->put(con); - return; -fault: - ceph_fault(con); /* error/fault path */ - goto done_unlocked; -} + if (fault) + con_fault_finish(con); + con->ops->put(con); +} /* * Generic error/fault handler. A retry mechanism is used with * exponential backoff */ -static void ceph_fault(struct ceph_connection *con) - __releases(con->mutex) +static void con_fault(struct ceph_connection *con) { pr_warning("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); @@ -2381,10 +2461,10 @@ static void ceph_fault(struct ceph_connection *con) con_close_socket(con); - if (test_bit(CON_FLAG_LOSSYTX, &con->flags)) { + if (con_flag_test(con, CON_FLAG_LOSSYTX)) { dout("fault on LOSSYTX channel, marking CLOSED\n"); con->state = CON_STATE_CLOSED; - goto out_unlock; + return; } if (con->in_msg) { @@ -2401,9 +2481,9 @@ static void ceph_fault(struct ceph_connection *con) /* If there are no messages queued or keepalive pending, place * the connection in a STANDBY state */ if (list_empty(&con->out_queue) && - !test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags)) { + !con_flag_test(con, CON_FLAG_KEEPALIVE_PENDING)) { dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con); - clear_bit(CON_FLAG_WRITE_PENDING, &con->flags); + con_flag_clear(con, CON_FLAG_WRITE_PENDING); con->state = CON_STATE_STANDBY; } else { /* retry after a delay. */ @@ -2412,23 +2492,9 @@ static void ceph_fault(struct ceph_connection *con) con->delay = BASE_DELAY_INTERVAL; else if (con->delay < MAX_DELAY_INTERVAL) con->delay *= 2; - set_bit(CON_FLAG_BACKOFF, &con->flags); + con_flag_set(con, CON_FLAG_BACKOFF); queue_con(con); } - -out_unlock: - mutex_unlock(&con->mutex); - /* - * in case we faulted due to authentication, invalidate our - * current tickets so that we can get new ones. - */ - if (con->auth_retry && con->ops->invalidate_authorizer) { - dout("calling invalidate_authorizer()\n"); - con->ops->invalidate_authorizer(con); - } - - if (con->ops->fault) - con->ops->fault(con); } @@ -2469,8 +2535,8 @@ static void clear_standby(struct ceph_connection *con) dout("clear_standby %p and ++connect_seq\n", con); con->state = CON_STATE_PREOPEN; con->connect_seq++; - WARN_ON(test_bit(CON_FLAG_WRITE_PENDING, &con->flags)); - WARN_ON(test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags)); + WARN_ON(con_flag_test(con, CON_FLAG_WRITE_PENDING)); + WARN_ON(con_flag_test(con, CON_FLAG_KEEPALIVE_PENDING)); } } @@ -2511,7 +2577,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) /* if there wasn't anything waiting to send before, queue * new work */ - if (test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0) + if (con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0) queue_con(con); } EXPORT_SYMBOL(ceph_con_send); @@ -2600,8 +2666,8 @@ void ceph_con_keepalive(struct ceph_connection *con) mutex_lock(&con->mutex); clear_standby(con); mutex_unlock(&con->mutex); - if (test_and_set_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags) == 0 && - test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0) + if (con_flag_test_and_set(con, CON_FLAG_KEEPALIVE_PENDING) == 0 && + con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0) queue_con(con); } EXPORT_SYMBOL(ceph_con_keepalive); @@ -2651,9 +2717,11 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, m->page_alignment = 0; m->pages = NULL; m->pagelist = NULL; +#ifdef CONFIG_BLOCK m->bio = NULL; m->bio_iter = NULL; m->bio_seg = 0; +#endif /* CONFIG_BLOCK */ m->trail = NULL; /* front */ diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 812eb3b46c1f..aef5b1062bee 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -697,7 +697,7 @@ int ceph_monc_delete_snapid(struct ceph_mon_client *monc, u32 pool, u64 snapid) { return do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, - pool, snapid, 0, 0); + pool, snapid, NULL, 0); } diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index eb9a44478764..d730dd4d8eb2 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -23,7 +23,7 @@ static const struct ceph_connection_operations osd_con_ops; -static void send_queued(struct ceph_osd_client *osdc); +static void __send_queued(struct ceph_osd_client *osdc); static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd); static void __register_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req); @@ -32,64 +32,12 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc, static void __send_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req); -static int op_needs_trail(int op) -{ - switch (op) { - case CEPH_OSD_OP_GETXATTR: - case CEPH_OSD_OP_SETXATTR: - case CEPH_OSD_OP_CMPXATTR: - case CEPH_OSD_OP_CALL: - case CEPH_OSD_OP_NOTIFY: - return 1; - default: - return 0; - } -} - static int op_has_extent(int op) { return (op == CEPH_OSD_OP_READ || op == CEPH_OSD_OP_WRITE); } -int ceph_calc_raw_layout(struct ceph_osd_client *osdc, - struct ceph_file_layout *layout, - u64 snapid, - u64 off, u64 *plen, u64 *bno, - struct ceph_osd_request *req, - struct ceph_osd_req_op *op) -{ - struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; - u64 orig_len = *plen; - u64 objoff, objlen; /* extent in object */ - int r; - - reqhead->snapid = cpu_to_le64(snapid); - - /* object extent? */ - r = ceph_calc_file_object_mapping(layout, off, plen, bno, - &objoff, &objlen); - if (r < 0) - return r; - if (*plen < orig_len) - dout(" skipping last %llu, final file extent %llu~%llu\n", - orig_len - *plen, off, *plen); - - if (op_has_extent(op->op)) { - op->extent.offset = objoff; - op->extent.length = objlen; - } - req->r_num_pages = calc_pages_for(off, *plen); - req->r_page_alignment = off & ~PAGE_MASK; - if (op->op == CEPH_OSD_OP_WRITE) - op->payload_len = *plen; - - dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", - *bno, objoff, objlen, req->r_num_pages); - return 0; -} -EXPORT_SYMBOL(ceph_calc_raw_layout); - /* * Implement client access to distributed object storage cluster. * @@ -115,20 +63,48 @@ EXPORT_SYMBOL(ceph_calc_raw_layout); * * fill osd op in request message. */ -static int calc_layout(struct ceph_osd_client *osdc, - struct ceph_vino vino, +static int calc_layout(struct ceph_vino vino, struct ceph_file_layout *layout, u64 off, u64 *plen, struct ceph_osd_request *req, struct ceph_osd_req_op *op) { - u64 bno; + u64 orig_len = *plen; + u64 bno = 0; + u64 objoff = 0; + u64 objlen = 0; int r; - r = ceph_calc_raw_layout(osdc, layout, vino.snap, off, - plen, &bno, req, op); + /* object extent? */ + r = ceph_calc_file_object_mapping(layout, off, orig_len, &bno, + &objoff, &objlen); if (r < 0) return r; + if (objlen < orig_len) { + *plen = objlen; + dout(" skipping last %llu, final file extent %llu~%llu\n", + orig_len - *plen, off, *plen); + } + + if (op_has_extent(op->op)) { + u32 osize = le32_to_cpu(layout->fl_object_size); + op->extent.offset = objoff; + op->extent.length = objlen; + if (op->extent.truncate_size <= off - objoff) { + op->extent.truncate_size = 0; + } else { + op->extent.truncate_size -= off - objoff; + if (op->extent.truncate_size > osize) + op->extent.truncate_size = osize; + } + } + req->r_num_pages = calc_pages_for(off, *plen); + req->r_page_alignment = off & ~PAGE_MASK; + if (op->op == CEPH_OSD_OP_WRITE) + op->payload_len = *plen; + + dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", + bno, objoff, objlen, req->r_num_pages); snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno); req->r_oid_len = strlen(req->r_oid); @@ -148,25 +124,19 @@ void ceph_osdc_release_request(struct kref *kref) if (req->r_request) ceph_msg_put(req->r_request); if (req->r_con_filling_msg) { - dout("%s revoking pages %p from con %p\n", __func__, - req->r_pages, req->r_con_filling_msg); + dout("%s revoking msg %p from con %p\n", __func__, + req->r_reply, req->r_con_filling_msg); ceph_msg_revoke_incoming(req->r_reply); req->r_con_filling_msg->ops->put(req->r_con_filling_msg); + req->r_con_filling_msg = NULL; } if (req->r_reply) ceph_msg_put(req->r_reply); if (req->r_own_pages) ceph_release_page_vector(req->r_pages, req->r_num_pages); -#ifdef CONFIG_BLOCK - if (req->r_bio) - bio_put(req->r_bio); -#endif ceph_put_snap_context(req->r_snapc); - if (req->r_trail) { - ceph_pagelist_release(req->r_trail); - kfree(req->r_trail); - } + ceph_pagelist_release(&req->r_trail); if (req->r_mempool) mempool_free(req, req->r_osdc->req_mempool); else @@ -174,37 +144,25 @@ void ceph_osdc_release_request(struct kref *kref) } EXPORT_SYMBOL(ceph_osdc_release_request); -static int get_num_ops(struct ceph_osd_req_op *ops, int *needs_trail) -{ - int i = 0; - - if (needs_trail) - *needs_trail = 0; - while (ops[i].op) { - if (needs_trail && op_needs_trail(ops[i].op)) - *needs_trail = 1; - i++; - } - - return i; -} - struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, - int flags, struct ceph_snap_context *snapc, - struct ceph_osd_req_op *ops, + unsigned int num_ops, bool use_mempool, - gfp_t gfp_flags, - struct page **pages, - struct bio *bio) + gfp_t gfp_flags) { struct ceph_osd_request *req; struct ceph_msg *msg; - int needs_trail; - int num_op = get_num_ops(ops, &needs_trail); - size_t msg_size = sizeof(struct ceph_osd_request_head); - - msg_size += num_op*sizeof(struct ceph_osd_op); + size_t msg_size; + + msg_size = 4 + 4 + 8 + 8 + 4+8; + msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */ + msg_size += 1 + 8 + 4 + 4; /* pg_t */ + msg_size += 4 + MAX_OBJ_NAME_SIZE; + msg_size += 2 + num_ops*sizeof(struct ceph_osd_op); + msg_size += 8; /* snapid */ + msg_size += 8; /* snap_seq */ + msg_size += 8 * (snapc ? snapc->num_snaps : 0); /* snaps */ + msg_size += 4; if (use_mempool) { req = mempool_alloc(osdc->req_mempool, gfp_flags); @@ -228,10 +186,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, INIT_LIST_HEAD(&req->r_req_lru_item); INIT_LIST_HEAD(&req->r_osd_item); - req->r_flags = flags; - - WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); - /* create reply message */ if (use_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); @@ -244,20 +198,9 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, } req->r_reply = msg; - /* allocate space for the trailing data */ - if (needs_trail) { - req->r_trail = kmalloc(sizeof(struct ceph_pagelist), gfp_flags); - if (!req->r_trail) { - ceph_osdc_put_request(req); - return NULL; - } - ceph_pagelist_init(req->r_trail); - } + ceph_pagelist_init(&req->r_trail); /* create request message; allow space for oid */ - msg_size += MAX_OBJ_NAME_SIZE; - if (snapc) - msg_size += sizeof(u64) * snapc->num_snaps; if (use_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op, 0); else @@ -270,13 +213,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, memset(msg->front.iov_base, 0, msg->front.iov_len); req->r_request = msg; - req->r_pages = pages; -#ifdef CONFIG_BLOCK - if (bio) { - req->r_bio = bio; - bio_get(req->r_bio); - } -#endif return req; } @@ -289,6 +225,8 @@ static void osd_req_encode_op(struct ceph_osd_request *req, dst->op = cpu_to_le16(src->op); switch (src->op) { + case CEPH_OSD_OP_STAT: + break; case CEPH_OSD_OP_READ: case CEPH_OSD_OP_WRITE: dst->extent.offset = @@ -300,52 +238,20 @@ static void osd_req_encode_op(struct ceph_osd_request *req, dst->extent.truncate_seq = cpu_to_le32(src->extent.truncate_seq); break; - - case CEPH_OSD_OP_GETXATTR: - case CEPH_OSD_OP_SETXATTR: - case CEPH_OSD_OP_CMPXATTR: - BUG_ON(!req->r_trail); - - dst->xattr.name_len = cpu_to_le32(src->xattr.name_len); - dst->xattr.value_len = cpu_to_le32(src->xattr.value_len); - dst->xattr.cmp_op = src->xattr.cmp_op; - dst->xattr.cmp_mode = src->xattr.cmp_mode; - ceph_pagelist_append(req->r_trail, src->xattr.name, - src->xattr.name_len); - ceph_pagelist_append(req->r_trail, src->xattr.val, - src->xattr.value_len); - break; case CEPH_OSD_OP_CALL: - BUG_ON(!req->r_trail); - dst->cls.class_len = src->cls.class_len; dst->cls.method_len = src->cls.method_len; dst->cls.indata_len = cpu_to_le32(src->cls.indata_len); - ceph_pagelist_append(req->r_trail, src->cls.class_name, + ceph_pagelist_append(&req->r_trail, src->cls.class_name, src->cls.class_len); - ceph_pagelist_append(req->r_trail, src->cls.method_name, + ceph_pagelist_append(&req->r_trail, src->cls.method_name, src->cls.method_len); - ceph_pagelist_append(req->r_trail, src->cls.indata, + ceph_pagelist_append(&req->r_trail, src->cls.indata, src->cls.indata_len); break; - case CEPH_OSD_OP_ROLLBACK: - dst->snap.snapid = cpu_to_le64(src->snap.snapid); - break; case CEPH_OSD_OP_STARTSYNC: break; - case CEPH_OSD_OP_NOTIFY: - { - __le32 prot_ver = cpu_to_le32(src->watch.prot_ver); - __le32 timeout = cpu_to_le32(src->watch.timeout); - - BUG_ON(!req->r_trail); - - ceph_pagelist_append(req->r_trail, - &prot_ver, sizeof(prot_ver)); - ceph_pagelist_append(req->r_trail, - &timeout, sizeof(timeout)); - } case CEPH_OSD_OP_NOTIFY_ACK: case CEPH_OSD_OP_WATCH: dst->watch.cookie = cpu_to_le64(src->watch.cookie); @@ -356,6 +262,64 @@ static void osd_req_encode_op(struct ceph_osd_request *req, pr_err("unrecognized osd opcode %d\n", dst->op); WARN_ON(1); break; + case CEPH_OSD_OP_MAPEXT: + case CEPH_OSD_OP_MASKTRUNC: + case CEPH_OSD_OP_SPARSE_READ: + case CEPH_OSD_OP_NOTIFY: + case CEPH_OSD_OP_ASSERT_VER: + case CEPH_OSD_OP_WRITEFULL: + case CEPH_OSD_OP_TRUNCATE: + case CEPH_OSD_OP_ZERO: + case CEPH_OSD_OP_DELETE: + case CEPH_OSD_OP_APPEND: + case CEPH_OSD_OP_SETTRUNC: + case CEPH_OSD_OP_TRIMTRUNC: + case CEPH_OSD_OP_TMAPUP: + case CEPH_OSD_OP_TMAPPUT: + case CEPH_OSD_OP_TMAPGET: + case CEPH_OSD_OP_CREATE: + case CEPH_OSD_OP_ROLLBACK: + case CEPH_OSD_OP_OMAPGETKEYS: + case CEPH_OSD_OP_OMAPGETVALS: + case CEPH_OSD_OP_OMAPGETHEADER: + case CEPH_OSD_OP_OMAPGETVALSBYKEYS: + case CEPH_OSD_OP_MODE_RD: + case CEPH_OSD_OP_OMAPSETVALS: + case CEPH_OSD_OP_OMAPSETHEADER: + case CEPH_OSD_OP_OMAPCLEAR: + case CEPH_OSD_OP_OMAPRMKEYS: + case CEPH_OSD_OP_OMAP_CMP: + case CEPH_OSD_OP_CLONERANGE: + case CEPH_OSD_OP_ASSERT_SRC_VERSION: + case CEPH_OSD_OP_SRC_CMPXATTR: + case CEPH_OSD_OP_GETXATTR: + case CEPH_OSD_OP_GETXATTRS: + case CEPH_OSD_OP_CMPXATTR: + case CEPH_OSD_OP_SETXATTR: + case CEPH_OSD_OP_SETXATTRS: + case CEPH_OSD_OP_RESETXATTRS: + case CEPH_OSD_OP_RMXATTR: + case CEPH_OSD_OP_PULL: + case CEPH_OSD_OP_PUSH: + case CEPH_OSD_OP_BALANCEREADS: + case CEPH_OSD_OP_UNBALANCEREADS: + case CEPH_OSD_OP_SCRUB: + case CEPH_OSD_OP_SCRUB_RESERVE: + case CEPH_OSD_OP_SCRUB_UNRESERVE: + case CEPH_OSD_OP_SCRUB_STOP: + case CEPH_OSD_OP_SCRUB_MAP: + case CEPH_OSD_OP_WRLOCK: + case CEPH_OSD_OP_WRUNLOCK: + case CEPH_OSD_OP_RDLOCK: + case CEPH_OSD_OP_RDUNLOCK: + case CEPH_OSD_OP_UPLOCK: + case CEPH_OSD_OP_DNLOCK: + case CEPH_OSD_OP_PGLS: + case CEPH_OSD_OP_PGLS_FILTER: + pr_err("unsupported osd opcode %s\n", + ceph_osd_op_name(dst->op)); + WARN_ON(1); + break; } dst->payload_len = cpu_to_le32(src->payload_len); } @@ -365,75 +329,95 @@ static void osd_req_encode_op(struct ceph_osd_request *req, * */ void ceph_osdc_build_request(struct ceph_osd_request *req, - u64 off, u64 *plen, + u64 off, u64 len, unsigned int num_ops, struct ceph_osd_req_op *src_ops, - struct ceph_snap_context *snapc, - struct timespec *mtime, - const char *oid, - int oid_len) + struct ceph_snap_context *snapc, u64 snap_id, + struct timespec *mtime) { struct ceph_msg *msg = req->r_request; - struct ceph_osd_request_head *head; struct ceph_osd_req_op *src_op; - struct ceph_osd_op *op; void *p; - int num_op = get_num_ops(src_ops, NULL); - size_t msg_size = sizeof(*head) + num_op*sizeof(*op); + size_t msg_size; int flags = req->r_flags; - u64 data_len = 0; + u64 data_len; int i; - head = msg->front.iov_base; - op = (void *)(head + 1); - p = (void *)(op + num_op); - + req->r_num_ops = num_ops; + req->r_snapid = snap_id; req->r_snapc = ceph_get_snap_context(snapc); - head->client_inc = cpu_to_le32(1); /* always, for now. */ - head->flags = cpu_to_le32(flags); - if (flags & CEPH_OSD_FLAG_WRITE) - ceph_encode_timespec(&head->mtime, mtime); - head->num_ops = cpu_to_le16(num_op); - - - /* fill in oid */ - head->object_len = cpu_to_le32(oid_len); - memcpy(p, oid, oid_len); - p += oid_len; + /* encode request */ + msg->hdr.version = cpu_to_le16(4); + p = msg->front.iov_base; + ceph_encode_32(&p, 1); /* client_inc is always 1 */ + req->r_request_osdmap_epoch = p; + p += 4; + req->r_request_flags = p; + p += 4; + if (req->r_flags & CEPH_OSD_FLAG_WRITE) + ceph_encode_timespec(p, mtime); + p += sizeof(struct ceph_timespec); + req->r_request_reassert_version = p; + p += sizeof(struct ceph_eversion); /* will get filled in */ + + /* oloc */ + ceph_encode_8(&p, 4); + ceph_encode_8(&p, 4); + ceph_encode_32(&p, 8 + 4 + 4); + req->r_request_pool = p; + p += 8; + ceph_encode_32(&p, -1); /* preferred */ + ceph_encode_32(&p, 0); /* key len */ + + ceph_encode_8(&p, 1); + req->r_request_pgid = p; + p += 8 + 4; + ceph_encode_32(&p, -1); /* preferred */ + + /* oid */ + ceph_encode_32(&p, req->r_oid_len); + memcpy(p, req->r_oid, req->r_oid_len); + dout("oid '%.*s' len %d\n", req->r_oid_len, req->r_oid, req->r_oid_len); + p += req->r_oid_len; + + /* ops */ + ceph_encode_16(&p, num_ops); src_op = src_ops; - while (src_op->op) { - osd_req_encode_op(req, op, src_op); - src_op++; - op++; + req->r_request_ops = p; + for (i = 0; i < num_ops; i++, src_op++) { + osd_req_encode_op(req, p, src_op); + p += sizeof(struct ceph_osd_op); } - if (req->r_trail) - data_len += req->r_trail->length; - - if (snapc) { - head->snap_seq = cpu_to_le64(snapc->seq); - head->num_snaps = cpu_to_le32(snapc->num_snaps); + /* snaps */ + ceph_encode_64(&p, req->r_snapid); + ceph_encode_64(&p, req->r_snapc ? req->r_snapc->seq : 0); + ceph_encode_32(&p, req->r_snapc ? req->r_snapc->num_snaps : 0); + if (req->r_snapc) { for (i = 0; i < snapc->num_snaps; i++) { - put_unaligned_le64(snapc->snaps[i], p); - p += sizeof(u64); + ceph_encode_64(&p, req->r_snapc->snaps[i]); } } + req->r_request_attempts = p; + p += 4; + + data_len = req->r_trail.length; if (flags & CEPH_OSD_FLAG_WRITE) { req->r_request->hdr.data_off = cpu_to_le16(off); - req->r_request->hdr.data_len = cpu_to_le32(*plen + data_len); - } else if (data_len) { - req->r_request->hdr.data_off = 0; - req->r_request->hdr.data_len = cpu_to_le32(data_len); + data_len += len; } - + req->r_request->hdr.data_len = cpu_to_le32(data_len); req->r_request->page_alignment = req->r_page_alignment; BUG_ON(p > msg->front.iov_base + msg->front.iov_len); msg_size = p - msg->front.iov_base; msg->front.iov_len = msg_size; msg->hdr.front_len = cpu_to_le32(msg_size); + + dout("build_request msg_size was %d num_ops %d\n", (int)msg_size, + num_ops); return; } EXPORT_SYMBOL(ceph_osdc_build_request); @@ -459,34 +443,33 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - bool use_mempool, int num_reply, + bool use_mempool, int page_align) { - struct ceph_osd_req_op ops[3]; + struct ceph_osd_req_op ops[2]; struct ceph_osd_request *req; + unsigned int num_op = 1; int r; + memset(&ops, 0, sizeof ops); + ops[0].op = opcode; ops[0].extent.truncate_seq = truncate_seq; ops[0].extent.truncate_size = truncate_size; - ops[0].payload_len = 0; if (do_sync) { ops[1].op = CEPH_OSD_OP_STARTSYNC; - ops[1].payload_len = 0; - ops[2].op = 0; - } else - ops[1].op = 0; - - req = ceph_osdc_alloc_request(osdc, flags, - snapc, ops, - use_mempool, - GFP_NOFS, NULL, NULL); + num_op++; + } + + req = ceph_osdc_alloc_request(osdc, snapc, num_op, use_mempool, + GFP_NOFS); if (!req) return ERR_PTR(-ENOMEM); + req->r_flags = flags; /* calculate max write size */ - r = calc_layout(osdc, vino, layout, off, plen, req, ops); + r = calc_layout(vino, layout, off, plen, req, ops); if (r < 0) return ERR_PTR(r); req->r_file_layout = *layout; /* keep a copy */ @@ -496,10 +479,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, req->r_num_pages = calc_pages_for(page_align, *plen); req->r_page_alignment = page_align; - ceph_osdc_build_request(req, off, plen, ops, - snapc, - mtime, - req->r_oid, req->r_oid_len); + ceph_osdc_build_request(req, off, *plen, num_op, ops, + snapc, vino.snap, mtime); return req; } @@ -623,8 +604,8 @@ static void osd_reset(struct ceph_connection *con) down_read(&osdc->map_sem); mutex_lock(&osdc->request_mutex); __kick_osd_requests(osdc, osd); + __send_queued(osdc); mutex_unlock(&osdc->request_mutex); - send_queued(osdc); up_read(&osdc->map_sem); } @@ -739,31 +720,35 @@ static void remove_old_osds(struct ceph_osd_client *osdc) */ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) { - struct ceph_osd_request *req; - int ret = 0; + struct ceph_entity_addr *peer_addr; dout("__reset_osd %p osd%d\n", osd, osd->o_osd); if (list_empty(&osd->o_requests) && list_empty(&osd->o_linger_requests)) { __remove_osd(osdc, osd); - ret = -ENODEV; - } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd], - &osd->o_con.peer_addr, - sizeof(osd->o_con.peer_addr)) == 0 && - !ceph_con_opened(&osd->o_con)) { + + return -ENODEV; + } + + peer_addr = &osdc->osdmap->osd_addr[osd->o_osd]; + if (!memcmp(peer_addr, &osd->o_con.peer_addr, sizeof (*peer_addr)) && + !ceph_con_opened(&osd->o_con)) { + struct ceph_osd_request *req; + dout(" osd addr hasn't changed and connection never opened," " letting msgr retry"); /* touch each r_stamp for handle_timeout()'s benfit */ list_for_each_entry(req, &osd->o_requests, r_osd_item) req->r_stamp = jiffies; - ret = -EAGAIN; - } else { - ceph_con_close(&osd->o_con); - ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd, - &osdc->osdmap->osd_addr[osd->o_osd]); - osd->o_incarnation++; + + return -EAGAIN; } - return ret; + + ceph_con_close(&osd->o_con); + ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd, peer_addr); + osd->o_incarnation++; + + return 0; } static void __insert_osd(struct ceph_osd_client *osdc, struct ceph_osd *new) @@ -961,20 +946,18 @@ EXPORT_SYMBOL(ceph_osdc_set_request_linger); static int __map_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req, int force_resend) { - struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; struct ceph_pg pgid; int acting[CEPH_PG_MAX_SIZE]; int o = -1, num = 0; int err; dout("map_request %p tid %lld\n", req, req->r_tid); - err = ceph_calc_object_layout(&reqhead->layout, req->r_oid, + err = ceph_calc_object_layout(&pgid, req->r_oid, &req->r_file_layout, osdc->osdmap); if (err) { list_move(&req->r_req_lru_item, &osdc->req_notarget); return err; } - pgid = reqhead->layout.ol_pgid; req->r_pgid = pgid; err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting); @@ -991,8 +974,8 @@ static int __map_request(struct ceph_osd_client *osdc, (req->r_osd == NULL && o == -1)) return 0; /* no change */ - dout("map_request tid %llu pgid %d.%x osd%d (was osd%d)\n", - req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, + dout("map_request tid %llu pgid %lld.%x osd%d (was osd%d)\n", + req->r_tid, pgid.pool, pgid.seed, o, req->r_osd ? req->r_osd->o_osd : -1); /* record full pg acting set */ @@ -1041,15 +1024,22 @@ out: static void __send_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req) { - struct ceph_osd_request_head *reqhead; - - dout("send_request %p tid %llu to osd%d flags %d\n", - req, req->r_tid, req->r_osd->o_osd, req->r_flags); + void *p; - reqhead = req->r_request->front.iov_base; - reqhead->osdmap_epoch = cpu_to_le32(osdc->osdmap->epoch); - reqhead->flags |= cpu_to_le32(req->r_flags); /* e.g., RETRY */ - reqhead->reassert_version = req->r_reassert_version; + dout("send_request %p tid %llu to osd%d flags %d pg %lld.%x\n", + req, req->r_tid, req->r_osd->o_osd, req->r_flags, + (unsigned long long)req->r_pgid.pool, req->r_pgid.seed); + + /* fill in message content that changes each time we send it */ + put_unaligned_le32(osdc->osdmap->epoch, req->r_request_osdmap_epoch); + put_unaligned_le32(req->r_flags, req->r_request_flags); + put_unaligned_le64(req->r_pgid.pool, req->r_request_pool); + p = req->r_request_pgid; + ceph_encode_64(&p, req->r_pgid.pool); + ceph_encode_32(&p, req->r_pgid.seed); + put_unaligned_le64(1, req->r_request_attempts); /* FIXME */ + memcpy(req->r_request_reassert_version, &req->r_reassert_version, + sizeof(req->r_reassert_version)); req->r_stamp = jiffies; list_move_tail(&req->r_req_lru_item, &osdc->req_lru); @@ -1062,16 +1052,13 @@ static void __send_request(struct ceph_osd_client *osdc, /* * Send any requests in the queue (req_unsent). */ -static void send_queued(struct ceph_osd_client *osdc) +static void __send_queued(struct ceph_osd_client *osdc) { struct ceph_osd_request *req, *tmp; - dout("send_queued\n"); - mutex_lock(&osdc->request_mutex); - list_for_each_entry_safe(req, tmp, &osdc->req_unsent, r_req_lru_item) { + dout("__send_queued\n"); + list_for_each_entry_safe(req, tmp, &osdc->req_unsent, r_req_lru_item) __send_request(osdc, req); - } - mutex_unlock(&osdc->request_mutex); } /* @@ -1123,8 +1110,8 @@ static void handle_timeout(struct work_struct *work) } __schedule_osd_timeout(osdc); + __send_queued(osdc); mutex_unlock(&osdc->request_mutex); - send_queued(osdc); up_read(&osdc->map_sem); } @@ -1152,6 +1139,26 @@ static void complete_request(struct ceph_osd_request *req) complete_all(&req->r_safe_completion); /* fsync waiter */ } +static int __decode_pgid(void **p, void *end, struct ceph_pg *pgid) +{ + __u8 v; + + ceph_decode_need(p, end, 1 + 8 + 4 + 4, bad); + v = ceph_decode_8(p); + if (v > 1) { + pr_warning("do not understand pg encoding %d > 1", v); + return -EINVAL; + } + pgid->pool = ceph_decode_64(p); + pgid->seed = ceph_decode_32(p); + *p += 4; + return 0; + +bad: + pr_warning("incomplete pg encoding"); + return -EINVAL; +} + /* * handle osd op reply. either call the callback if it is specified, * or do the completion to wake up the waiting thread. @@ -1159,22 +1166,42 @@ static void complete_request(struct ceph_osd_request *req) static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, struct ceph_connection *con) { - struct ceph_osd_reply_head *rhead = msg->front.iov_base; + void *p, *end; struct ceph_osd_request *req; u64 tid; - int numops, object_len, flags; + int object_len; + int numops, payload_len, flags; s32 result; + s32 retry_attempt; + struct ceph_pg pg; + int err; + u32 reassert_epoch; + u64 reassert_version; + u32 osdmap_epoch; + int i; tid = le64_to_cpu(msg->hdr.tid); - if (msg->front.iov_len < sizeof(*rhead)) - goto bad; - numops = le32_to_cpu(rhead->num_ops); - object_len = le32_to_cpu(rhead->object_len); - result = le32_to_cpu(rhead->result); - if (msg->front.iov_len != sizeof(*rhead) + object_len + - numops * sizeof(struct ceph_osd_op)) + dout("handle_reply %p tid %llu\n", msg, tid); + + p = msg->front.iov_base; + end = p + msg->front.iov_len; + + ceph_decode_need(&p, end, 4, bad); + object_len = ceph_decode_32(&p); + ceph_decode_need(&p, end, object_len, bad); + p += object_len; + + err = __decode_pgid(&p, end, &pg); + if (err) goto bad; - dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result); + + ceph_decode_need(&p, end, 8 + 4 + 4 + 8 + 4, bad); + flags = ceph_decode_64(&p); + result = ceph_decode_32(&p); + reassert_epoch = ceph_decode_32(&p); + reassert_version = ceph_decode_64(&p); + osdmap_epoch = ceph_decode_32(&p); + /* lookup */ mutex_lock(&osdc->request_mutex); req = __lookup_request(osdc, tid); @@ -1184,7 +1211,38 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, return; } ceph_osdc_get_request(req); - flags = le32_to_cpu(rhead->flags); + + dout("handle_reply %p tid %llu req %p result %d\n", msg, tid, + req, result); + + ceph_decode_need(&p, end, 4, bad); + numops = ceph_decode_32(&p); + if (numops > CEPH_OSD_MAX_OP) + goto bad_put; + if (numops != req->r_num_ops) + goto bad_put; + payload_len = 0; + ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad); + for (i = 0; i < numops; i++) { + struct ceph_osd_op *op = p; + int len; + + len = le32_to_cpu(op->payload_len); + req->r_reply_op_len[i] = len; + dout(" op %d has %d bytes\n", i, len); + payload_len += len; + p += sizeof(*op); + } + if (payload_len != le32_to_cpu(msg->hdr.data_len)) { + pr_warning("sum of op payload lens %d != data_len %d", + payload_len, le32_to_cpu(msg->hdr.data_len)); + goto bad_put; + } + + ceph_decode_need(&p, end, 4 + numops * 4, bad); + retry_attempt = ceph_decode_32(&p); + for (i = 0; i < numops; i++) + req->r_reply_op_result[i] = ceph_decode_32(&p); /* * if this connection filled our message, drop our reference now, to @@ -1199,7 +1257,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, if (!req->r_got_reply) { unsigned int bytes; - req->r_result = le32_to_cpu(rhead->result); + req->r_result = result; bytes = le32_to_cpu(msg->hdr.data_len); dout("handle_reply result %d bytes %d\n", req->r_result, bytes); @@ -1207,7 +1265,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, req->r_result = bytes; /* in case this is a write and we need to replay, */ - req->r_reassert_version = rhead->reassert_version; + req->r_reassert_version.epoch = cpu_to_le32(reassert_epoch); + req->r_reassert_version.version = cpu_to_le64(reassert_version); req->r_got_reply = 1; } else if ((flags & CEPH_OSD_FLAG_ONDISK) == 0) { @@ -1242,10 +1301,11 @@ done: ceph_osdc_put_request(req); return; +bad_put: + ceph_osdc_put_request(req); bad: - pr_err("corrupt osd_op_reply got %d %d expected %d\n", - (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len), - (int)sizeof(*rhead)); + pr_err("corrupt osd_op_reply got %d %d\n", + (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len)); ceph_msg_dump(msg); } @@ -1462,7 +1522,9 @@ done: if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) ceph_monc_request_next_osdmap(&osdc->client->monc); - send_queued(osdc); + mutex_lock(&osdc->request_mutex); + __send_queued(osdc); + mutex_unlock(&osdc->request_mutex); up_read(&osdc->map_sem); wake_up_all(&osdc->client->auth_wq); return; @@ -1556,8 +1618,7 @@ static void __remove_event(struct ceph_osd_event *event) int ceph_osdc_create_event(struct ceph_osd_client *osdc, void (*event_cb)(u64, u64, u8, void *), - int one_shot, void *data, - struct ceph_osd_event **pevent) + void *data, struct ceph_osd_event **pevent) { struct ceph_osd_event *event; @@ -1567,14 +1628,13 @@ int ceph_osdc_create_event(struct ceph_osd_client *osdc, dout("create_event %p\n", event); event->cb = event_cb; - event->one_shot = one_shot; + event->one_shot = 0; event->data = data; event->osdc = osdc; INIT_LIST_HEAD(&event->osd_node); RB_CLEAR_NODE(&event->node); kref_init(&event->kref); /* one ref for us */ kref_get(&event->kref); /* one ref for the caller */ - init_completion(&event->completion); spin_lock(&osdc->event_lock); event->cookie = ++osdc->event_count; @@ -1610,7 +1670,6 @@ static void do_event_work(struct work_struct *work) dout("do_event_work completing %p\n", event); event->cb(ver, notify_id, opcode, event->data); - complete(&event->completion); dout("do_event_work completed %p\n", event); ceph_osdc_put_event(event); kfree(event_work); @@ -1620,7 +1679,8 @@ static void do_event_work(struct work_struct *work) /* * Process osd watch notifications */ -void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg) +static void handle_watch_notify(struct ceph_osd_client *osdc, + struct ceph_msg *msg) { void *p, *end; u8 proto_ver; @@ -1641,9 +1701,8 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg) spin_lock(&osdc->event_lock); event = __find_event(osdc, cookie); if (event) { + BUG_ON(event->one_shot); get_event(event); - if (event->one_shot) - __remove_event(event); } spin_unlock(&osdc->event_lock); dout("handle_watch_notify cookie %lld ver %lld event %p\n", @@ -1668,7 +1727,6 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg) return; done_err: - complete(&event->completion); ceph_osdc_put_event(event); return; @@ -1677,21 +1735,6 @@ bad: return; } -int ceph_osdc_wait_event(struct ceph_osd_event *event, unsigned long timeout) -{ - int err; - - dout("wait_event %p\n", event); - err = wait_for_completion_interruptible_timeout(&event->completion, - timeout * HZ); - ceph_osdc_put_event(event); - if (err > 0) - err = 0; - dout("wait_event %p returns %d\n", event, err); - return err; -} -EXPORT_SYMBOL(ceph_osdc_wait_event); - /* * Register request, send initial attempt. */ @@ -1706,7 +1749,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, #ifdef CONFIG_BLOCK req->r_request->bio = req->r_bio; #endif - req->r_request->trail = req->r_trail; + req->r_request->trail = &req->r_trail; register_request(osdc, req); @@ -1865,7 +1908,6 @@ out_mempool: out: return err; } -EXPORT_SYMBOL(ceph_osdc_init); void ceph_osdc_stop(struct ceph_osd_client *osdc) { @@ -1882,7 +1924,6 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc) ceph_msgpool_destroy(&osdc->msgpool_op); ceph_msgpool_destroy(&osdc->msgpool_op_reply); } -EXPORT_SYMBOL(ceph_osdc_stop); /* * Read some contiguous pages. If we cross a stripe boundary, shorten @@ -1902,7 +1943,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, req = ceph_osdc_new_request(osdc, layout, vino, off, plen, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, 0, truncate_seq, truncate_size, NULL, - false, 1, page_align); + false, page_align); if (IS_ERR(req)) return PTR_ERR(req); @@ -1931,8 +1972,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, u64 off, u64 len, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - struct page **pages, int num_pages, - int flags, int do_sync, bool nofail) + struct page **pages, int num_pages) { struct ceph_osd_request *req; int rc = 0; @@ -1941,11 +1981,10 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, BUG_ON(vino.snap != CEPH_NOSNAP); req = ceph_osdc_new_request(osdc, layout, vino, off, &len, CEPH_OSD_OP_WRITE, - flags | CEPH_OSD_FLAG_ONDISK | - CEPH_OSD_FLAG_WRITE, - snapc, do_sync, + CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, + snapc, 0, truncate_seq, truncate_size, mtime, - nofail, 1, page_align); + true, page_align); if (IS_ERR(req)) return PTR_ERR(req); @@ -1954,7 +1993,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, dout("writepages %llu~%llu (%d pages)\n", off, len, req->r_num_pages); - rc = ceph_osdc_start_request(osdc, req, nofail); + rc = ceph_osdc_start_request(osdc, req, true); if (!rc) rc = ceph_osdc_wait_request(osdc, req); @@ -2047,7 +2086,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, if (data_len > 0) { int want = calc_pages_for(req->r_page_alignment, data_len); - if (unlikely(req->r_num_pages < want)) { + if (req->r_pages && unlikely(req->r_num_pages < want)) { pr_warning("tid %lld reply has %d bytes %d pages, we" " had only %d pages ready\n", tid, data_len, want, req->r_num_pages); diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index de73214b5d26..69bc4bf89e3e 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -13,26 +13,18 @@ char *ceph_osdmap_state_str(char *str, int len, int state) { - int flag = 0; - if (!len) - goto done; - - *str = '\0'; - if (state) { - if (state & CEPH_OSD_EXISTS) { - snprintf(str, len, "exists"); - flag = 1; - } - if (state & CEPH_OSD_UP) { - snprintf(str, len, "%s%s%s", str, (flag ? ", " : ""), - "up"); - flag = 1; - } - } else { + return str; + + if ((state & CEPH_OSD_EXISTS) && (state & CEPH_OSD_UP)) + snprintf(str, len, "exists, up"); + else if (state & CEPH_OSD_EXISTS) + snprintf(str, len, "exists"); + else if (state & CEPH_OSD_UP) + snprintf(str, len, "up"); + else snprintf(str, len, "doesn't exist"); - } -done: + return str; } @@ -53,13 +45,8 @@ static int calc_bits_of(unsigned int t) */ static void calc_pg_masks(struct ceph_pg_pool_info *pi) { - pi->pg_num_mask = (1 << calc_bits_of(le32_to_cpu(pi->v.pg_num)-1)) - 1; - pi->pgp_num_mask = - (1 << calc_bits_of(le32_to_cpu(pi->v.pgp_num)-1)) - 1; - pi->lpg_num_mask = - (1 << calc_bits_of(le32_to_cpu(pi->v.lpg_num)-1)) - 1; - pi->lpgp_num_mask = - (1 << calc_bits_of(le32_to_cpu(pi->v.lpgp_num)-1)) - 1; + pi->pg_num_mask = (1 << calc_bits_of(pi->pg_num-1)) - 1; + pi->pgp_num_mask = (1 << calc_bits_of(pi->pgp_num-1)) - 1; } /* @@ -170,6 +157,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end) c->choose_local_tries = 2; c->choose_local_fallback_tries = 5; c->choose_total_tries = 19; + c->chooseleaf_descend_once = 0; ceph_decode_need(p, end, 4*sizeof(u32), bad); magic = ceph_decode_32(p); @@ -336,6 +324,11 @@ static struct crush_map *crush_decode(void *pbyval, void *end) dout("crush decode tunable choose_total_tries = %d", c->choose_total_tries); + ceph_decode_need(p, end, sizeof(u32), done); + c->chooseleaf_descend_once = ceph_decode_32(p); + dout("crush decode tunable chooseleaf_descend_once = %d", + c->chooseleaf_descend_once); + done: dout("crush_decode success\n"); return c; @@ -354,12 +347,13 @@ bad: */ static int pgid_cmp(struct ceph_pg l, struct ceph_pg r) { - u64 a = *(u64 *)&l; - u64 b = *(u64 *)&r; - - if (a < b) + if (l.pool < r.pool) + return -1; + if (l.pool > r.pool) + return 1; + if (l.seed < r.seed) return -1; - if (a > b) + if (l.seed > r.seed) return 1; return 0; } @@ -405,8 +399,8 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root, } else if (c > 0) { n = n->rb_right; } else { - dout("__lookup_pg_mapping %llx got %p\n", - *(u64 *)&pgid, pg); + dout("__lookup_pg_mapping %lld.%x got %p\n", + pgid.pool, pgid.seed, pg); return pg; } } @@ -418,12 +412,13 @@ static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid) struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid); if (pg) { - dout("__remove_pg_mapping %llx %p\n", *(u64 *)&pgid, pg); + dout("__remove_pg_mapping %lld.%x %p\n", pgid.pool, pgid.seed, + pg); rb_erase(&pg->node, root); kfree(pg); return 0; } - dout("__remove_pg_mapping %llx dne\n", *(u64 *)&pgid); + dout("__remove_pg_mapping %lld.%x dne\n", pgid.pool, pgid.seed); return -ENOENT; } @@ -452,7 +447,7 @@ static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new) return 0; } -static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) +static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, u64 id) { struct ceph_pg_pool_info *pi; struct rb_node *n = root->rb_node; @@ -508,24 +503,57 @@ static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) { - unsigned int n, m; + u8 ev, cv; + unsigned len, num; + void *pool_end; + + ceph_decode_need(p, end, 2 + 4, bad); + ev = ceph_decode_8(p); /* encoding version */ + cv = ceph_decode_8(p); /* compat version */ + if (ev < 5) { + pr_warning("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv); + return -EINVAL; + } + if (cv > 7) { + pr_warning("got v %d cv %d > 7 of ceph_pg_pool\n", ev, cv); + return -EINVAL; + } + len = ceph_decode_32(p); + ceph_decode_need(p, end, len, bad); + pool_end = *p + len; - ceph_decode_copy(p, &pi->v, sizeof(pi->v)); - calc_pg_masks(pi); + pi->type = ceph_decode_8(p); + pi->size = ceph_decode_8(p); + pi->crush_ruleset = ceph_decode_8(p); + pi->object_hash = ceph_decode_8(p); + + pi->pg_num = ceph_decode_32(p); + pi->pgp_num = ceph_decode_32(p); + + *p += 4 + 4; /* skip lpg* */ + *p += 4; /* skip last_change */ + *p += 8 + 4; /* skip snap_seq, snap_epoch */ - /* num_snaps * snap_info_t */ - n = le32_to_cpu(pi->v.num_snaps); - while (n--) { - ceph_decode_need(p, end, sizeof(u64) + 1 + sizeof(u64) + - sizeof(struct ceph_timespec), bad); - *p += sizeof(u64) + /* key */ - 1 + sizeof(u64) + /* u8, snapid */ - sizeof(struct ceph_timespec); - m = ceph_decode_32(p); /* snap name */ - *p += m; + /* skip snaps */ + num = ceph_decode_32(p); + while (num--) { + *p += 8; /* snapid key */ + *p += 1 + 1; /* versions */ + len = ceph_decode_32(p); + *p += len; } - *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; + /* skip removed snaps */ + num = ceph_decode_32(p); + *p += num * (8 + 8); + + *p += 8; /* skip auid */ + pi->flags = ceph_decode_64(p); + + /* ignore the rest */ + + *p = pool_end; + calc_pg_masks(pi); return 0; bad: @@ -535,14 +563,15 @@ bad: static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) { struct ceph_pg_pool_info *pi; - u32 num, len, pool; + u32 num, len; + u64 pool; ceph_decode_32_safe(p, end, num, bad); dout(" %d pool names\n", num); while (num--) { - ceph_decode_32_safe(p, end, pool, bad); + ceph_decode_64_safe(p, end, pool, bad); ceph_decode_32_safe(p, end, len, bad); - dout(" pool %d len %d\n", pool, len); + dout(" pool %llu len %d\n", pool, len); ceph_decode_need(p, end, len, bad); pi = __lookup_pg_pool(&map->pg_pools, pool); if (pi) { @@ -633,7 +662,6 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) struct ceph_osdmap *map; u16 version; u32 len, max, i; - u8 ev; int err = -EINVAL; void *start = *p; struct ceph_pg_pool_info *pi; @@ -646,9 +674,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) map->pg_temp = RB_ROOT; ceph_decode_16_safe(p, end, version, bad); - if (version > CEPH_OSDMAP_VERSION) { - pr_warning("got unknown v %d > %d of osdmap\n", version, - CEPH_OSDMAP_VERSION); + if (version > 6) { + pr_warning("got unknown v %d > 6 of osdmap\n", version); + goto bad; + } + if (version < 6) { + pr_warning("got old v %d < 6 of osdmap\n", version); goto bad; } @@ -660,20 +691,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) ceph_decode_32_safe(p, end, max, bad); while (max--) { - ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad); + ceph_decode_need(p, end, 8 + 2, bad); err = -ENOMEM; pi = kzalloc(sizeof(*pi), GFP_NOFS); if (!pi) goto bad; - pi->id = ceph_decode_32(p); - err = -EINVAL; - ev = ceph_decode_8(p); /* encoding version */ - if (ev > CEPH_PG_POOL_VERSION) { - pr_warning("got unknown v %d > %d of ceph_pg_pool\n", - ev, CEPH_PG_POOL_VERSION); - kfree(pi); - goto bad; - } + pi->id = ceph_decode_64(p); err = __decode_pool(p, end, pi); if (err < 0) { kfree(pi); @@ -682,12 +705,10 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) __insert_pg_pool(&map->pg_pools, pi); } - if (version >= 5) { - err = __decode_pool_names(p, end, map); - if (err < 0) { - dout("fail to decode pool names"); - goto bad; - } + err = __decode_pool_names(p, end, map); + if (err < 0) { + dout("fail to decode pool names"); + goto bad; } ceph_decode_32_safe(p, end, map->pool_max, bad); @@ -724,10 +745,13 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) for (i = 0; i < len; i++) { int n, j; struct ceph_pg pgid; + struct ceph_pg_v1 pgid_v1; struct ceph_pg_mapping *pg; ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad); - ceph_decode_copy(p, &pgid, sizeof(pgid)); + ceph_decode_copy(p, &pgid_v1, sizeof(pgid_v1)); + pgid.pool = le32_to_cpu(pgid_v1.pool); + pgid.seed = le16_to_cpu(pgid_v1.ps); n = ceph_decode_32(p); err = -EINVAL; if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) @@ -745,7 +769,8 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) err = __insert_pg_mapping(pg, &map->pg_temp); if (err) goto bad; - dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, len); + dout(" added pg_temp %lld.%x len %d\n", pgid.pool, pgid.seed, + len); } /* crush */ @@ -784,16 +809,17 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, struct ceph_fsid fsid; u32 epoch = 0; struct ceph_timespec modified; - u32 len, pool; - __s32 new_pool_max, new_flags, max; + s32 len; + u64 pool; + __s64 new_pool_max; + __s32 new_flags, max; void *start = *p; int err = -EINVAL; u16 version; ceph_decode_16_safe(p, end, version, bad); - if (version > CEPH_OSDMAP_INC_VERSION) { - pr_warning("got unknown v %d > %d of inc osdmap\n", version, - CEPH_OSDMAP_INC_VERSION); + if (version > 6) { + pr_warning("got unknown v %d > %d of inc osdmap\n", version, 6); goto bad; } @@ -803,7 +829,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, epoch = ceph_decode_32(p); BUG_ON(epoch != map->epoch+1); ceph_decode_copy(p, &modified, sizeof(modified)); - new_pool_max = ceph_decode_32(p); + new_pool_max = ceph_decode_64(p); new_flags = ceph_decode_32(p); /* full map? */ @@ -853,18 +879,9 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, /* new_pool */ ceph_decode_32_safe(p, end, len, bad); while (len--) { - __u8 ev; struct ceph_pg_pool_info *pi; - ceph_decode_32_safe(p, end, pool, bad); - ceph_decode_need(p, end, 1 + sizeof(pi->v), bad); - ev = ceph_decode_8(p); /* encoding version */ - if (ev > CEPH_PG_POOL_VERSION) { - pr_warning("got unknown v %d > %d of ceph_pg_pool\n", - ev, CEPH_PG_POOL_VERSION); - err = -EINVAL; - goto bad; - } + ceph_decode_64_safe(p, end, pool, bad); pi = __lookup_pg_pool(&map->pg_pools, pool); if (!pi) { pi = kzalloc(sizeof(*pi), GFP_NOFS); @@ -890,7 +907,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, while (len--) { struct ceph_pg_pool_info *pi; - ceph_decode_32_safe(p, end, pool, bad); + ceph_decode_64_safe(p, end, pool, bad); pi = __lookup_pg_pool(&map->pg_pools, pool); if (pi) __remove_pg_pool(&map->pg_pools, pi); @@ -946,10 +963,13 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, while (len--) { struct ceph_pg_mapping *pg; int j; + struct ceph_pg_v1 pgid_v1; struct ceph_pg pgid; u32 pglen; ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad); - ceph_decode_copy(p, &pgid, sizeof(pgid)); + ceph_decode_copy(p, &pgid_v1, sizeof(pgid_v1)); + pgid.pool = le32_to_cpu(pgid_v1.pool); + pgid.seed = le16_to_cpu(pgid_v1.ps); pglen = ceph_decode_32(p); if (pglen) { @@ -975,8 +995,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, kfree(pg); goto bad; } - dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, - pglen); + dout(" added pg_temp %lld.%x len %d\n", pgid.pool, + pgid.seed, pglen); } else { /* remove */ __remove_pg_mapping(&map->pg_temp, pgid); @@ -1010,7 +1030,7 @@ bad: * pass a stride back to the caller. */ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, - u64 off, u64 *plen, + u64 off, u64 len, u64 *ono, u64 *oxoff, u64 *oxlen) { @@ -1021,7 +1041,7 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, u32 su_per_object; u64 t, su_offset; - dout("mapping %llu~%llu osize %u fl_su %u\n", off, *plen, + dout("mapping %llu~%llu osize %u fl_su %u\n", off, len, osize, su); if (su == 0 || sc == 0) goto invalid; @@ -1054,11 +1074,10 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, /* * Calculate the length of the extent being written to the selected - * object. This is the minimum of the full length requested (plen) or + * object. This is the minimum of the full length requested (len) or * the remainder of the current stripe being written to. */ - *oxlen = min_t(u64, *plen, su - su_offset); - *plen = *oxlen; + *oxlen = min_t(u64, len, su - su_offset); dout(" obj extent %llu~%llu\n", *oxoff, *oxlen); return 0; @@ -1076,33 +1095,24 @@ EXPORT_SYMBOL(ceph_calc_file_object_mapping); * calculate an object layout (i.e. pgid) from an oid, * file_layout, and osdmap */ -int ceph_calc_object_layout(struct ceph_object_layout *ol, +int ceph_calc_object_layout(struct ceph_pg *pg, const char *oid, struct ceph_file_layout *fl, struct ceph_osdmap *osdmap) { unsigned int num, num_mask; - struct ceph_pg pgid; - int poolid = le32_to_cpu(fl->fl_pg_pool); struct ceph_pg_pool_info *pool; - unsigned int ps; BUG_ON(!osdmap); - - pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); + pg->pool = le32_to_cpu(fl->fl_pg_pool); + pool = __lookup_pg_pool(&osdmap->pg_pools, pg->pool); if (!pool) return -EIO; - ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid)); - num = le32_to_cpu(pool->v.pg_num); + pg->seed = ceph_str_hash(pool->object_hash, oid, strlen(oid)); + num = pool->pg_num; num_mask = pool->pg_num_mask; - pgid.ps = cpu_to_le16(ps); - pgid.preferred = cpu_to_le16(-1); - pgid.pool = fl->fl_pg_pool; - dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps); - - ol->ol_pgid = pgid; - ol->ol_stripe_unit = fl->fl_object_stripe_unit; + dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pg->pool, pg->seed); return 0; } EXPORT_SYMBOL(ceph_calc_object_layout); @@ -1117,19 +1127,16 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, struct ceph_pg_mapping *pg; struct ceph_pg_pool_info *pool; int ruleno; - unsigned int poolid, ps, pps, t, r; - - poolid = le32_to_cpu(pgid.pool); - ps = le16_to_cpu(pgid.ps); + int r; + u32 pps; - pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); + pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool); if (!pool) return NULL; /* pg_temp? */ - t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num), - pool->pgp_num_mask); - pgid.ps = cpu_to_le16(t); + pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num, + pool->pgp_num_mask); pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); if (pg) { *num = pg->len; @@ -1137,26 +1144,39 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, } /* crush */ - ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, - pool->v.type, pool->v.size); + ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset, + pool->type, pool->size); if (ruleno < 0) { - pr_err("no crush rule pool %d ruleset %d type %d size %d\n", - poolid, pool->v.crush_ruleset, pool->v.type, - pool->v.size); + pr_err("no crush rule pool %lld ruleset %d type %d size %d\n", + pgid.pool, pool->crush_ruleset, pool->type, + pool->size); return NULL; } - pps = ceph_stable_mod(ps, - le32_to_cpu(pool->v.pgp_num), - pool->pgp_num_mask); - pps += poolid; + if (pool->flags & CEPH_POOL_FLAG_HASHPSPOOL) { + /* hash pool id and seed sothat pool PGs do not overlap */ + pps = crush_hash32_2(CRUSH_HASH_RJENKINS1, + ceph_stable_mod(pgid.seed, pool->pgp_num, + pool->pgp_num_mask), + pgid.pool); + } else { + /* + * legacy ehavior: add ps and pool together. this is + * not a great approach because the PGs from each pool + * will overlap on top of each other: 0.5 == 1.4 == + * 2.3 == ... + */ + pps = ceph_stable_mod(pgid.seed, pool->pgp_num, + pool->pgp_num_mask) + + (unsigned)pgid.pool; + } r = crush_do_rule(osdmap->crush, ruleno, pps, osds, - min_t(int, pool->v.size, *num), + min_t(int, pool->size, *num), osdmap->osd_weight); if (r < 0) { - pr_err("error %d from crush rule: pool %d ruleset %d type %d" - " size %d\n", r, poolid, pool->v.crush_ruleset, - pool->v.type, pool->v.size); + pr_err("error %d from crush rule: pool %lld ruleset %d type %d" + " size %d\n", r, pgid.pool, pool->crush_ruleset, + pool->type, pool->size); return NULL; } *num = r; diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index cd9c21df87d1..815a2249cfa9 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -12,7 +12,7 @@ /* * build a vector of user pages */ -struct page **ceph_get_direct_page_vector(const char __user *data, +struct page **ceph_get_direct_page_vector(const void __user *data, int num_pages, bool write_page) { struct page **pages; @@ -93,7 +93,7 @@ EXPORT_SYMBOL(ceph_alloc_page_vector); * copy user data into a page vector */ int ceph_copy_user_to_page_vector(struct page **pages, - const char __user *data, + const void __user *data, loff_t off, size_t len) { int i = 0; @@ -118,17 +118,17 @@ int ceph_copy_user_to_page_vector(struct page **pages, } EXPORT_SYMBOL(ceph_copy_user_to_page_vector); -int ceph_copy_to_page_vector(struct page **pages, - const char *data, +void ceph_copy_to_page_vector(struct page **pages, + const void *data, loff_t off, size_t len) { int i = 0; size_t po = off & ~PAGE_CACHE_MASK; size_t left = len; - size_t l; while (left > 0) { - l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + memcpy(page_address(pages[i]) + po, data, l); data += l; left -= l; @@ -138,21 +138,20 @@ int ceph_copy_to_page_vector(struct page **pages, i++; } } - return len; } EXPORT_SYMBOL(ceph_copy_to_page_vector); -int ceph_copy_from_page_vector(struct page **pages, - char *data, +void ceph_copy_from_page_vector(struct page **pages, + void *data, loff_t off, size_t len) { int i = 0; size_t po = off & ~PAGE_CACHE_MASK; size_t left = len; - size_t l; while (left > 0) { - l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + memcpy(data, page_address(pages[i]) + po, l); data += l; left -= l; @@ -162,7 +161,6 @@ int ceph_copy_from_page_vector(struct page **pages, i++; } } - return len; } EXPORT_SYMBOL(ceph_copy_from_page_vector); @@ -170,7 +168,7 @@ EXPORT_SYMBOL(ceph_copy_from_page_vector); * copy user data from a page vector into a user pointer */ int ceph_copy_page_vector_to_user(struct page **pages, - char __user *data, + void __user *data, loff_t off, size_t len) { int i = 0; diff --git a/net/core/dev.c b/net/core/dev.c index 18d8b5acc343..a06a7a58dd11 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -658,11 +658,10 @@ __setup("netdev=", netdev_boot_setup); struct net_device *__dev_get_by_name(struct net *net, const char *name) { - struct hlist_node *p; struct net_device *dev; struct hlist_head *head = dev_name_hash(net, name); - hlist_for_each_entry(dev, p, head, name_hlist) + hlist_for_each_entry(dev, head, name_hlist) if (!strncmp(dev->name, name, IFNAMSIZ)) return dev; @@ -684,11 +683,10 @@ EXPORT_SYMBOL(__dev_get_by_name); struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) { - struct hlist_node *p; struct net_device *dev; struct hlist_head *head = dev_name_hash(net, name); - hlist_for_each_entry_rcu(dev, p, head, name_hlist) + hlist_for_each_entry_rcu(dev, head, name_hlist) if (!strncmp(dev->name, name, IFNAMSIZ)) return dev; @@ -735,11 +733,10 @@ EXPORT_SYMBOL(dev_get_by_name); struct net_device *__dev_get_by_index(struct net *net, int ifindex) { - struct hlist_node *p; struct net_device *dev; struct hlist_head *head = dev_index_hash(net, ifindex); - hlist_for_each_entry(dev, p, head, index_hlist) + hlist_for_each_entry(dev, head, index_hlist) if (dev->ifindex == ifindex) return dev; @@ -760,11 +757,10 @@ EXPORT_SYMBOL(__dev_get_by_index); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex) { - struct hlist_node *p; struct net_device *dev; struct hlist_head *head = dev_index_hash(net, ifindex); - hlist_for_each_entry_rcu(dev, p, head, index_hlist) + hlist_for_each_entry_rcu(dev, head, index_hlist) if (dev->ifindex == ifindex) return dev; diff --git a/net/core/flow.c b/net/core/flow.c index 43f7495df27a..c56ea6f7f6c7 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -132,14 +132,14 @@ static void __flow_cache_shrink(struct flow_cache *fc, int shrink_to) { struct flow_cache_entry *fle; - struct hlist_node *entry, *tmp; + struct hlist_node *tmp; LIST_HEAD(gc_list); int i, deleted = 0; for (i = 0; i < flow_cache_hash_size(fc); i++) { int saved = 0; - hlist_for_each_entry_safe(fle, entry, tmp, + hlist_for_each_entry_safe(fle, tmp, &fcp->hash_table[i], u.hlist) { if (saved < shrink_to && flow_entry_valid(fle)) { @@ -211,7 +211,6 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, struct flow_cache *fc = &flow_cache_global; struct flow_cache_percpu *fcp; struct flow_cache_entry *fle, *tfle; - struct hlist_node *entry; struct flow_cache_object *flo; size_t keysize; unsigned int hash; @@ -235,7 +234,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, flow_new_hash_rnd(fc, fcp); hash = flow_hash_code(fc, fcp, key, keysize); - hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) { + hlist_for_each_entry(tfle, &fcp->hash_table[hash], u.hlist) { if (tfle->net == net && tfle->family == family && tfle->dir == dir && @@ -301,13 +300,13 @@ static void flow_cache_flush_tasklet(unsigned long data) struct flow_cache *fc = info->cache; struct flow_cache_percpu *fcp; struct flow_cache_entry *fle; - struct hlist_node *entry, *tmp; + struct hlist_node *tmp; LIST_HEAD(gc_list); int i, deleted = 0; fcp = this_cpu_ptr(fc->percpu); for (i = 0; i < flow_cache_hash_size(fc); i++) { - hlist_for_each_entry_safe(fle, entry, tmp, + hlist_for_each_entry_safe(fle, tmp, &fcp->hash_table[i], u.hlist) { if (flow_entry_valid(fle)) continue; diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 0f6bb6f8d391..3174f1998ee6 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -16,12 +16,11 @@ static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff { struct net *net = seq_file_net(seq); struct net_device *dev; - struct hlist_node *p; struct hlist_head *h; unsigned int count = 0, offset = get_offset(*pos); h = &net->dev_name_head[get_bucket(*pos)]; - hlist_for_each_entry_rcu(dev, p, h, name_hlist) { + hlist_for_each_entry_rcu(dev, h, name_hlist) { if (++count == offset) return dev; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d8aa20f6a46e..b376410ff259 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1060,7 +1060,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) int idx = 0, s_idx; struct net_device *dev; struct hlist_head *head; - struct hlist_node *node; struct nlattr *tb[IFLA_MAX+1]; u32 ext_filter_mask = 0; @@ -1080,7 +1079,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; - hlist_for_each_entry_rcu(dev, node, head, index_hlist) { + hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index c4a2def5b7bd..c21f200eed93 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -175,12 +175,11 @@ static struct hlist_head *dn_find_list(struct sock *sk) static int check_port(__le16 port) { struct sock *sk; - struct hlist_node *node; if (port == 0) return -1; - sk_for_each(sk, node, &dn_sk_hash[le16_to_cpu(port) & DN_SK_HASH_MASK]) { + sk_for_each(sk, &dn_sk_hash[le16_to_cpu(port) & DN_SK_HASH_MASK]) { struct dn_scp *scp = DN_SK(sk); if (scp->addrloc == port) return -1; @@ -374,11 +373,10 @@ int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *sdn, struct sock *dn_sklist_find_listener(struct sockaddr_dn *addr) { struct hlist_head *list = listen_hash(addr); - struct hlist_node *node; struct sock *sk; read_lock(&dn_hash_lock); - sk_for_each(sk, node, list) { + sk_for_each(sk, list) { struct dn_scp *scp = DN_SK(sk); if (sk->sk_state != TCP_LISTEN) continue; @@ -414,11 +412,10 @@ struct sock *dn_find_by_skb(struct sk_buff *skb) { struct dn_skb_cb *cb = DN_SKB_CB(skb); struct sock *sk; - struct hlist_node *node; struct dn_scp *scp; read_lock(&dn_hash_lock); - sk_for_each(sk, node, &dn_sk_hash[le16_to_cpu(cb->dst_port) & DN_SK_HASH_MASK]) { + sk_for_each(sk, &dn_sk_hash[le16_to_cpu(cb->dst_port) & DN_SK_HASH_MASK]) { scp = DN_SK(sk); if (cb->src != dn_saddr2dn(&scp->peer)) continue; diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index f968c1b58f47..6c2445bcaba1 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -483,7 +483,6 @@ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) unsigned int h, s_h; unsigned int e = 0, s_e; struct dn_fib_table *tb; - struct hlist_node *node; int dumped = 0; if (!net_eq(net, &init_net)) @@ -498,7 +497,7 @@ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < DN_FIB_TABLE_HASHSZ; h++, s_h = 0) { e = 0; - hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist) { + hlist_for_each_entry(tb, &dn_fib_table_hash[h], hlist) { if (e < s_e) goto next; if (dumped) @@ -828,7 +827,6 @@ out: struct dn_fib_table *dn_fib_get_table(u32 n, int create) { struct dn_fib_table *t; - struct hlist_node *node; unsigned int h; if (n < RT_TABLE_MIN) @@ -839,7 +837,7 @@ struct dn_fib_table *dn_fib_get_table(u32 n, int create) h = n & (DN_FIB_TABLE_HASHSZ - 1); rcu_read_lock(); - hlist_for_each_entry_rcu(t, node, &dn_fib_table_hash[h], hlist) { + hlist_for_each_entry_rcu(t, &dn_fib_table_hash[h], hlist) { if (t->n == n) { rcu_read_unlock(); return t; @@ -885,11 +883,10 @@ void dn_fib_flush(void) { int flushed = 0; struct dn_fib_table *tb; - struct hlist_node *node; unsigned int h; for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) { - hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist) + hlist_for_each_entry(tb, &dn_fib_table_hash[h], hlist) flushed += tb->flush(tb); } @@ -908,12 +905,12 @@ void __init dn_fib_table_init(void) void __exit dn_fib_table_cleanup(void) { struct dn_fib_table *t; - struct hlist_node *node, *next; + struct hlist_node *next; unsigned int h; write_lock(&dn_fib_tables_lock); for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) { - hlist_for_each_entry_safe(t, node, next, &dn_fib_table_hash[h], + hlist_for_each_entry_safe(t, next, &dn_fib_table_hash[h], hlist) { hlist_del(&t->hlist); kfree(t); diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index 16705611589a..e0da175f8e5b 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -350,7 +350,6 @@ static inline int ieee802154_match_sock(u8 *hw_addr, u16 pan_id, int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) { struct sock *sk, *prev = NULL; - struct hlist_node *node; int ret = NET_RX_SUCCESS; u16 pan_id, short_addr; @@ -361,7 +360,7 @@ int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev); read_lock(&dgram_lock); - sk_for_each(sk, node, &dgram_head) { + sk_for_each(sk, &dgram_head) { if (ieee802154_match_sock(dev->dev_addr, pan_id, short_addr, dgram_sk(sk))) { if (prev) { diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c index 50e823927d49..41f538b8e59c 100644 --- a/net/ieee802154/raw.c +++ b/net/ieee802154/raw.c @@ -221,10 +221,9 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb) { struct sock *sk; - struct hlist_node *node; read_lock(&raw_lock); - sk_for_each(sk, node, &raw_head) { + sk_for_each(sk, &raw_head) { bh_lock_sock(sk); if (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dev->ifindex) { diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5281314886c1..f678507bc829 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -139,10 +139,9 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) u32 hash = inet_addr_hash(net, addr); struct net_device *result = NULL; struct in_ifaddr *ifa; - struct hlist_node *node; rcu_read_lock(); - hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) { + hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) { if (ifa->ifa_local == addr) { struct net_device *dev = ifa->ifa_dev->dev; @@ -588,7 +587,6 @@ static void check_lifetime(struct work_struct *work) { unsigned long now, next, next_sec, next_sched; struct in_ifaddr *ifa; - struct hlist_node *node; int i; now = jiffies; @@ -596,8 +594,7 @@ static void check_lifetime(struct work_struct *work) rcu_read_lock(); for (i = 0; i < IN4_ADDR_HSIZE; i++) { - hlist_for_each_entry_rcu(ifa, node, - &inet_addr_lst[i], hash) { + hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) { unsigned long age; if (ifa->ifa_flags & IFA_F_PERMANENT) @@ -1493,7 +1490,6 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) struct in_device *in_dev; struct in_ifaddr *ifa; struct hlist_head *head; - struct hlist_node *node; s_h = cb->args[0]; s_idx = idx = cb->args[1]; @@ -1503,7 +1499,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); - hlist_for_each_entry_rcu(dev, node, head, index_hlist) { + hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; if (h > s_h || idx > s_idx) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 99f00d39d10b..eb4bb12b3eb4 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -112,7 +112,6 @@ struct fib_table *fib_new_table(struct net *net, u32 id) struct fib_table *fib_get_table(struct net *net, u32 id) { struct fib_table *tb; - struct hlist_node *node; struct hlist_head *head; unsigned int h; @@ -122,7 +121,7 @@ struct fib_table *fib_get_table(struct net *net, u32 id) rcu_read_lock(); head = &net->ipv4.fib_table_hash[h]; - hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { + hlist_for_each_entry_rcu(tb, head, tb_hlist) { if (tb->tb_id == id) { rcu_read_unlock(); return tb; @@ -137,13 +136,12 @@ static void fib_flush(struct net *net) { int flushed = 0; struct fib_table *tb; - struct hlist_node *node; struct hlist_head *head; unsigned int h; for (h = 0; h < FIB_TABLE_HASHSZ; h++) { head = &net->ipv4.fib_table_hash[h]; - hlist_for_each_entry(tb, node, head, tb_hlist) + hlist_for_each_entry(tb, head, tb_hlist) flushed += fib_table_flush(tb); } @@ -656,7 +654,6 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) unsigned int h, s_h; unsigned int e = 0, s_e; struct fib_table *tb; - struct hlist_node *node; struct hlist_head *head; int dumped = 0; @@ -670,7 +667,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { e = 0; head = &net->ipv4.fib_table_hash[h]; - hlist_for_each_entry(tb, node, head, tb_hlist) { + hlist_for_each_entry(tb, head, tb_hlist) { if (e < s_e) goto next; if (dumped) @@ -1117,11 +1114,11 @@ static void ip_fib_net_exit(struct net *net) for (i = 0; i < FIB_TABLE_HASHSZ; i++) { struct fib_table *tb; struct hlist_head *head; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; head = &net->ipv4.fib_table_hash[i]; - hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { - hlist_del(node); + hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) { + hlist_del(&tb->tb_hlist); fib_table_flush(tb); fib_free_table(tb); } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 4797a800faf8..8f6cb7a87cd6 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -298,14 +298,13 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi) static struct fib_info *fib_find_info(const struct fib_info *nfi) { struct hlist_head *head; - struct hlist_node *node; struct fib_info *fi; unsigned int hash; hash = fib_info_hashfn(nfi); head = &fib_info_hash[hash]; - hlist_for_each_entry(fi, node, head, fib_hash) { + hlist_for_each_entry(fi, head, fib_hash) { if (!net_eq(fi->fib_net, nfi->fib_net)) continue; if (fi->fib_nhs != nfi->fib_nhs) @@ -331,7 +330,6 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) int ip_fib_check_default(__be32 gw, struct net_device *dev) { struct hlist_head *head; - struct hlist_node *node; struct fib_nh *nh; unsigned int hash; @@ -339,7 +337,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev) hash = fib_devindex_hashfn(dev->ifindex); head = &fib_info_devhash[hash]; - hlist_for_each_entry(nh, node, head, nh_hash) { + hlist_for_each_entry(nh, head, nh_hash) { if (nh->nh_dev == dev && nh->nh_gw == gw && !(nh->nh_flags & RTNH_F_DEAD)) { @@ -721,10 +719,10 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, for (i = 0; i < old_size; i++) { struct hlist_head *head = &fib_info_hash[i]; - struct hlist_node *node, *n; + struct hlist_node *n; struct fib_info *fi; - hlist_for_each_entry_safe(fi, node, n, head, fib_hash) { + hlist_for_each_entry_safe(fi, n, head, fib_hash) { struct hlist_head *dest; unsigned int new_hash; @@ -739,10 +737,10 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, for (i = 0; i < old_size; i++) { struct hlist_head *lhead = &fib_info_laddrhash[i]; - struct hlist_node *node, *n; + struct hlist_node *n; struct fib_info *fi; - hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) { + hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) { struct hlist_head *ldest; unsigned int new_hash; @@ -1096,13 +1094,12 @@ int fib_sync_down_addr(struct net *net, __be32 local) int ret = 0; unsigned int hash = fib_laddr_hashfn(local); struct hlist_head *head = &fib_info_laddrhash[hash]; - struct hlist_node *node; struct fib_info *fi; if (fib_info_laddrhash == NULL || local == 0) return 0; - hlist_for_each_entry(fi, node, head, fib_lhash) { + hlist_for_each_entry(fi, head, fib_lhash) { if (!net_eq(fi->fib_net, net)) continue; if (fi->fib_prefsrc == local) { @@ -1120,13 +1117,12 @@ int fib_sync_down_dev(struct net_device *dev, int force) struct fib_info *prev_fi = NULL; unsigned int hash = fib_devindex_hashfn(dev->ifindex); struct hlist_head *head = &fib_info_devhash[hash]; - struct hlist_node *node; struct fib_nh *nh; if (force) scope = -1; - hlist_for_each_entry(nh, node, head, nh_hash) { + hlist_for_each_entry(nh, head, nh_hash) { struct fib_info *fi = nh->nh_parent; int dead; @@ -1232,7 +1228,6 @@ int fib_sync_up(struct net_device *dev) struct fib_info *prev_fi; unsigned int hash; struct hlist_head *head; - struct hlist_node *node; struct fib_nh *nh; int ret; @@ -1244,7 +1239,7 @@ int fib_sync_up(struct net_device *dev) head = &fib_info_devhash[hash]; ret = 0; - hlist_for_each_entry(nh, node, head, nh_hash) { + hlist_for_each_entry(nh, head, nh_hash) { struct fib_info *fi = nh->nh_parent; int alive; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 61e03da3e1f5..ff06b7543d9f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -920,10 +920,9 @@ nomem: static struct leaf_info *find_leaf_info(struct leaf *l, int plen) { struct hlist_head *head = &l->list; - struct hlist_node *node; struct leaf_info *li; - hlist_for_each_entry_rcu(li, node, head, hlist) + hlist_for_each_entry_rcu(li, head, hlist) if (li->plen == plen) return li; @@ -943,12 +942,11 @@ static inline struct list_head *get_fa_head(struct leaf *l, int plen) static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new) { struct leaf_info *li = NULL, *last = NULL; - struct hlist_node *node; if (hlist_empty(head)) { hlist_add_head_rcu(&new->hlist, head); } else { - hlist_for_each_entry(li, node, head, hlist) { + hlist_for_each_entry(li, head, hlist) { if (new->plen > li->plen) break; @@ -1354,9 +1352,8 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, { struct leaf_info *li; struct hlist_head *hhead = &l->list; - struct hlist_node *node; - hlist_for_each_entry_rcu(li, node, hhead, hlist) { + hlist_for_each_entry_rcu(li, hhead, hlist) { struct fib_alias *fa; if (l->key != (key & li->mask_plen)) @@ -1740,10 +1737,10 @@ static int trie_flush_leaf(struct leaf *l) { int found = 0; struct hlist_head *lih = &l->list; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; struct leaf_info *li = NULL; - hlist_for_each_entry_safe(li, node, tmp, lih, hlist) { + hlist_for_each_entry_safe(li, tmp, lih, hlist) { found += trie_flush_list(&li->falh); if (list_empty(&li->falh)) { @@ -1895,14 +1892,13 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) { struct leaf_info *li; - struct hlist_node *node; int i, s_i; s_i = cb->args[4]; i = 0; /* rcu_read_lock is hold by caller */ - hlist_for_each_entry_rcu(li, node, &l->list, hlist) { + hlist_for_each_entry_rcu(li, &l->list, hlist) { if (i < s_i) { i++; continue; @@ -2092,14 +2088,13 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s) if (IS_LEAF(n)) { struct leaf *l = (struct leaf *)n; struct leaf_info *li; - struct hlist_node *tmp; s->leaves++; s->totdepth += iter.depth; if (iter.depth > s->maxdepth) s->maxdepth = iter.depth; - hlist_for_each_entry_rcu(li, tmp, &l->list, hlist) + hlist_for_each_entry_rcu(li, &l->list, hlist) ++s->prefixes; } else { const struct tnode *tn = (const struct tnode *) n; @@ -2200,10 +2195,9 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) for (h = 0; h < FIB_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; - struct hlist_node *node; struct fib_table *tb; - hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { + hlist_for_each_entry_rcu(tb, head, tb_hlist) { struct trie *t = (struct trie *) tb->tb_data; struct trie_stat stat; @@ -2245,10 +2239,9 @@ static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) for (h = 0; h < FIB_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; - struct hlist_node *node; struct fib_table *tb; - hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { + hlist_for_each_entry_rcu(tb, head, tb_hlist) { struct rt_trie_node *n; for (n = fib_trie_get_first(iter, @@ -2298,7 +2291,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) /* new hash chain */ while (++h < FIB_TABLE_HASHSZ) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; - hlist_for_each_entry_rcu(tb, tb_node, head, tb_hlist) { + hlist_for_each_entry_rcu(tb, head, tb_hlist) { n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); if (n) goto found; @@ -2381,13 +2374,12 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) } else { struct leaf *l = (struct leaf *) n; struct leaf_info *li; - struct hlist_node *node; __be32 val = htonl(l->key); seq_indent(seq, iter->depth); seq_printf(seq, " |-- %pI4\n", &val); - hlist_for_each_entry_rcu(li, node, &l->list, hlist) { + hlist_for_each_entry_rcu(li, &l->list, hlist) { struct fib_alias *fa; list_for_each_entry_rcu(fa, &li->falh, fa_list) { @@ -2532,7 +2524,6 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) { struct leaf *l = v; struct leaf_info *li; - struct hlist_node *node; if (v == SEQ_START_TOKEN) { seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " @@ -2541,7 +2532,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) return 0; } - hlist_for_each_entry_rcu(li, node, &l->list, hlist) { + hlist_for_each_entry_rcu(li, &l->list, hlist) { struct fib_alias *fa; __be32 mask, prefix; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 11cb4979a465..7d1874be1df3 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -57,7 +57,6 @@ int inet_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb, bool relax) { struct sock *sk2; - struct hlist_node *node; int reuse = sk->sk_reuse; int reuseport = sk->sk_reuseport; kuid_t uid = sock_i_uid((struct sock *)sk); @@ -69,7 +68,7 @@ int inet_csk_bind_conflict(const struct sock *sk, * one this bucket belongs to. */ - sk_for_each_bound(sk2, node, &tb->owners) { + sk_for_each_bound(sk2, &tb->owners) { if (sk != sk2 && !inet_v6_ipv6only(sk2) && (!sk->sk_bound_dev_if || @@ -95,7 +94,7 @@ int inet_csk_bind_conflict(const struct sock *sk, } } } - return node != NULL; + return sk2 != NULL; } EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); @@ -106,7 +105,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct inet_bind_hashbucket *head; - struct hlist_node *node; struct inet_bind_bucket *tb; int ret, attempts = 5; struct net *net = sock_net(sk); @@ -129,7 +127,7 @@ again: head = &hashinfo->bhash[inet_bhashfn(net, rover, hashinfo->bhash_size)]; spin_lock(&head->lock); - inet_bind_bucket_for_each(tb, node, &head->chain) + inet_bind_bucket_for_each(tb, &head->chain) if (net_eq(ib_net(tb), net) && tb->port == rover) { if (((tb->fastreuse > 0 && sk->sk_reuse && @@ -183,7 +181,7 @@ have_snum: head = &hashinfo->bhash[inet_bhashfn(net, snum, hashinfo->bhash_size)]; spin_lock(&head->lock); - inet_bind_bucket_for_each(tb, node, &head->chain) + inet_bind_bucket_for_each(tb, &head->chain) if (net_eq(ib_net(tb), net) && tb->port == snum) goto tb_found; } diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 2e453bde6992..245ae078a07f 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -33,9 +33,9 @@ static void inet_frag_secret_rebuild(unsigned long dummy) get_random_bytes(&f->rnd, sizeof(u32)); for (i = 0; i < INETFRAGS_HASHSZ; i++) { struct inet_frag_queue *q; - struct hlist_node *p, *n; + struct hlist_node *n; - hlist_for_each_entry_safe(q, p, n, &f->hash[i], list) { + hlist_for_each_entry_safe(q, n, &f->hash[i], list) { unsigned int hval = f->hashfn(q); if (hval != i) { @@ -203,7 +203,6 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, { struct inet_frag_queue *qp; #ifdef CONFIG_SMP - struct hlist_node *n; #endif unsigned int hash; @@ -219,7 +218,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, * such entry could be created on other cpu, while we * promoted read lock to write lock. */ - hlist_for_each_entry(qp, n, &f->hash[hash], list) { + hlist_for_each_entry(qp, &f->hash[hash], list) { if (qp->net == nf && f->match(qp, arg)) { atomic_inc(&qp->refcnt); write_unlock(&f->lock); @@ -278,9 +277,8 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, __releases(&f->lock) { struct inet_frag_queue *q; - struct hlist_node *n; - hlist_for_each_entry(q, n, &f->hash[hash], list) { + hlist_for_each_entry(q, &f->hash[hash], list) { if (q->net == nf && f->match(q, key)) { atomic_inc(&q->refcnt); read_unlock(&f->lock); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 0ce0595d9861..6af375afeeef 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -120,13 +120,12 @@ int __inet_inherit_port(struct sock *sk, struct sock *child) * that the listener socket's icsk_bind_hash is the same * as that of the child socket. We have to look up or * create a new bind bucket for the child here. */ - struct hlist_node *node; - inet_bind_bucket_for_each(tb, node, &head->chain) { + inet_bind_bucket_for_each(tb, &head->chain) { if (net_eq(ib_net(tb), sock_net(sk)) && tb->port == port) break; } - if (!node) { + if (!tb) { tb = inet_bind_bucket_create(table->bind_bucket_cachep, sock_net(sk), head, port); if (!tb) { @@ -493,7 +492,6 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, int i, remaining, low, high, port; static u32 hint; u32 offset = hint + port_offset; - struct hlist_node *node; struct inet_timewait_sock *tw = NULL; inet_get_local_port_range(&low, &high); @@ -512,7 +510,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, * because the established check is already * unique enough. */ - inet_bind_bucket_for_each(tb, node, &head->chain) { + inet_bind_bucket_for_each(tb, &head->chain) { if (net_eq(ib_net(tb), net) && tb->port == port) { if (tb->fastreuse >= 0 || diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 2784db3155fb..1f27c9f4afd0 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -216,7 +216,6 @@ static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, const int slot) { struct inet_timewait_sock *tw; - struct hlist_node *node; unsigned int killed; int ret; @@ -229,7 +228,7 @@ static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, killed = 0; ret = 0; rescan: - inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) { + inet_twsk_for_each_inmate(tw, &twdr->cells[slot]) { __inet_twsk_del_dead_node(tw); spin_unlock(&twdr->death_lock); __inet_twsk_kill(tw, twdr->hashinfo); @@ -438,10 +437,10 @@ void inet_twdr_twcal_tick(unsigned long data) for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) { if (time_before_eq(j, now)) { - struct hlist_node *node, *safe; + struct hlist_node *safe; struct inet_timewait_sock *tw; - inet_twsk_for_each_inmate_safe(tw, node, safe, + inet_twsk_for_each_inmate_safe(tw, safe, &twdr->twcal_row[slot]) { __inet_twsk_del_dead_node(tw); __inet_twsk_kill(tw, twdr->hashinfo); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 53ddebc292b6..dd44e0ab600c 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -111,9 +111,7 @@ EXPORT_SYMBOL_GPL(raw_unhash_sk); static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, unsigned short num, __be32 raddr, __be32 laddr, int dif) { - struct hlist_node *node; - - sk_for_each_from(sk, node) { + sk_for_each_from(sk) { struct inet_sock *inet = inet_sk(sk); if (net_eq(sock_net(sk), net) && inet->inet_num == num && @@ -914,9 +912,7 @@ static struct sock *raw_get_first(struct seq_file *seq) for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE; ++state->bucket) { - struct hlist_node *node; - - sk_for_each(sk, node, &state->h->ht[state->bucket]) + sk_for_each(sk, &state->h->ht[state->bucket]) if (sock_net(sk) == seq_file_net(seq)) goto found; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 145d3bf8df86..4a8ec457310f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -954,7 +954,6 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, { struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; - struct hlist_node *pos; unsigned int size = sizeof(struct in_addr); struct tcp_md5sig_info *md5sig; @@ -968,7 +967,7 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, if (family == AF_INET6) size = sizeof(struct in6_addr); #endif - hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) { + hlist_for_each_entry_rcu(key, &md5sig->head, node) { if (key->family != family) continue; if (!memcmp(&key->addr, addr, size)) @@ -1069,14 +1068,14 @@ static void tcp_clear_md5_list(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; - struct hlist_node *pos, *n; + struct hlist_node *n; struct tcp_md5sig_info *md5sig; md5sig = rcu_dereference_protected(tp->md5sig_info, 1); if (!hlist_empty(&md5sig->head)) tcp_free_md5sig_pool(); - hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) { + hlist_for_each_entry_safe(key, n, &md5sig->head, node) { hlist_del_rcu(&key->node); atomic_sub(sizeof(*key), &sk->sk_omem_alloc); kfree_rcu(key, rcu); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4dc0d44a5d31..f2c7e615f902 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1419,11 +1419,10 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, struct net_device *dev, int strict) { struct inet6_ifaddr *ifp; - struct hlist_node *node; unsigned int hash = inet6_addr_hash(addr); rcu_read_lock_bh(); - hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) { + hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr) && @@ -1445,9 +1444,8 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, { unsigned int hash = inet6_addr_hash(addr); struct inet6_ifaddr *ifp; - struct hlist_node *node; - hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) { + hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr)) { @@ -1487,10 +1485,9 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add { struct inet6_ifaddr *ifp, *result = NULL; unsigned int hash = inet6_addr_hash(addr); - struct hlist_node *node; rcu_read_lock_bh(); - hlist_for_each_entry_rcu_bh(ifp, node, &inet6_addr_lst[hash], addr_lst) { + hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr)) { @@ -2907,11 +2904,10 @@ static int addrconf_ifdown(struct net_device *dev, int how) /* Step 2: clear hash table */ for (i = 0; i < IN6_ADDR_HSIZE; i++) { struct hlist_head *h = &inet6_addr_lst[i]; - struct hlist_node *n; spin_lock_bh(&addrconf_hash_lock); restart: - hlist_for_each_entry_rcu(ifa, n, h, addr_lst) { + hlist_for_each_entry_rcu(ifa, h, addr_lst) { if (ifa->idev == idev) { hlist_del_init_rcu(&ifa->addr_lst); addrconf_del_timer(ifa); @@ -3218,8 +3214,7 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos) } for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { - struct hlist_node *n; - hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket], + hlist_for_each_entry_rcu_bh(ifa, &inet6_addr_lst[state->bucket], addr_lst) { if (!net_eq(dev_net(ifa->idev->dev), net)) continue; @@ -3244,9 +3239,8 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, { struct if6_iter_state *state = seq->private; struct net *net = seq_file_net(seq); - struct hlist_node *n = &ifa->addr_lst; - hlist_for_each_entry_continue_rcu_bh(ifa, n, addr_lst) { + hlist_for_each_entry_continue_rcu_bh(ifa, addr_lst) { if (!net_eq(dev_net(ifa->idev->dev), net)) continue; state->offset++; @@ -3255,7 +3249,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, while (++state->bucket < IN6_ADDR_HSIZE) { state->offset = 0; - hlist_for_each_entry_rcu_bh(ifa, n, + hlist_for_each_entry_rcu_bh(ifa, &inet6_addr_lst[state->bucket], addr_lst) { if (!net_eq(dev_net(ifa->idev->dev), net)) continue; @@ -3357,11 +3351,10 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) { int ret = 0; struct inet6_ifaddr *ifp = NULL; - struct hlist_node *n; unsigned int hash = inet6_addr_hash(addr); rcu_read_lock_bh(); - hlist_for_each_entry_rcu_bh(ifp, n, &inet6_addr_lst[hash], addr_lst) { + hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr) && @@ -3383,7 +3376,6 @@ static void addrconf_verify(unsigned long foo) { unsigned long now, next, next_sec, next_sched; struct inet6_ifaddr *ifp; - struct hlist_node *node; int i; rcu_read_lock_bh(); @@ -3395,7 +3387,7 @@ static void addrconf_verify(unsigned long foo) for (i = 0; i < IN6_ADDR_HSIZE; i++) { restart: - hlist_for_each_entry_rcu_bh(ifp, node, + hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) { unsigned long age; @@ -3866,7 +3858,6 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev; struct inet6_dev *idev; struct hlist_head *head; - struct hlist_node *node; s_h = cb->args[0]; s_idx = idx = cb->args[1]; @@ -3876,7 +3867,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; - hlist_for_each_entry_rcu(dev, node, head, index_hlist) { + hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; if (h > s_h || idx > s_idx) @@ -4222,7 +4213,6 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; struct inet6_dev *idev; struct hlist_head *head; - struct hlist_node *node; s_h = cb->args[0]; s_idx = cb->args[1]; @@ -4231,7 +4221,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; - hlist_for_each_entry_rcu(dev, node, head, index_hlist) { + hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; idev = __in6_dev_get(dev); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index ff76eecfd622..aad64352cb60 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -173,9 +173,8 @@ static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net, const struct in6_addr *addr, int type, int ifindex) { - struct hlist_node *pos; struct ip6addrlbl_entry *p; - hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { + hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { if (__ip6addrlbl_match(net, p, addr, type, ifindex)) return p; } @@ -261,9 +260,9 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) if (hlist_empty(&ip6addrlbl_table.head)) { hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); } else { - struct hlist_node *pos, *n; + struct hlist_node *n; struct ip6addrlbl_entry *p = NULL; - hlist_for_each_entry_safe(p, pos, n, + hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { if (p->prefixlen == newp->prefixlen && net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) && @@ -319,13 +318,13 @@ static int __ip6addrlbl_del(struct net *net, int ifindex) { struct ip6addrlbl_entry *p = NULL; - struct hlist_node *pos, *n; + struct hlist_node *n; int ret = -ESRCH; ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", __func__, prefix, prefixlen, ifindex); - hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { + hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { if (p->prefixlen == prefixlen && net_eq(ip6addrlbl_net(p), net) && p->ifindex == ifindex && @@ -380,11 +379,11 @@ static int __net_init ip6addrlbl_net_init(struct net *net) static void __net_exit ip6addrlbl_net_exit(struct net *net) { struct ip6addrlbl_entry *p = NULL; - struct hlist_node *pos, *n; + struct hlist_node *n; /* Remove all labels belonging to the exiting net */ spin_lock(&ip6addrlbl_table.lock); - hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { + hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { if (net_eq(ip6addrlbl_net(p), net)) { hlist_del_rcu(&p->list); ip6addrlbl_put(p); @@ -505,12 +504,11 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct ip6addrlbl_entry *p; - struct hlist_node *pos; int idx = 0, s_idx = cb->args[0]; int err; rcu_read_lock(); - hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { + hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { if (idx >= s_idx && net_eq(ip6addrlbl_net(p), net)) { if ((err = ip6addrlbl_fill(skb, p, diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index b386a2ce4c6f..9bfab19ff3c0 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -31,7 +31,6 @@ int inet6_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb, bool relax) { const struct sock *sk2; - const struct hlist_node *node; int reuse = sk->sk_reuse; int reuseport = sk->sk_reuseport; kuid_t uid = sock_i_uid((struct sock *)sk); @@ -41,7 +40,7 @@ int inet6_csk_bind_conflict(const struct sock *sk, * See comment in inet_csk_bind_conflict about sock lookup * vs net namespaces issues. */ - sk_for_each_bound(sk2, node, &tb->owners) { + sk_for_each_bound(sk2, &tb->owners) { if (sk != sk2 && (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || @@ -58,7 +57,7 @@ int inet6_csk_bind_conflict(const struct sock *sk, } } - return node != NULL; + return sk2 != NULL; } EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 710cafd2e1a9..192dd1a0e188 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -224,7 +224,6 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) { struct fib6_table *tb; struct hlist_head *head; - struct hlist_node *node; unsigned int h; if (id == 0) @@ -232,7 +231,7 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) h = id & (FIB6_TABLE_HASHSZ - 1); rcu_read_lock(); head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) { + hlist_for_each_entry_rcu(tb, head, tb6_hlist) { if (tb->tb6_id == id) { rcu_read_unlock(); return tb; @@ -363,7 +362,6 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) struct rt6_rtnl_dump_arg arg; struct fib6_walker_t *w; struct fib6_table *tb; - struct hlist_node *node; struct hlist_head *head; int res = 0; @@ -398,7 +396,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { e = 0; head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) { + hlist_for_each_entry_rcu(tb, head, tb6_hlist) { if (e < s_e) goto next; res = fib6_dump_table(tb, skb, cb); @@ -1520,14 +1518,13 @@ void fib6_clean_all_ro(struct net *net, int (*func)(struct rt6_info *, void *arg int prune, void *arg) { struct fib6_table *table; - struct hlist_node *node; struct hlist_head *head; unsigned int h; rcu_read_lock(); for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(table, node, head, tb6_hlist) { + hlist_for_each_entry_rcu(table, head, tb6_hlist) { read_lock_bh(&table->tb6_lock); fib6_clean_tree(net, &table->tb6_root, func, prune, arg); @@ -1540,14 +1537,13 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), int prune, void *arg) { struct fib6_table *table; - struct hlist_node *node; struct hlist_head *head; unsigned int h; rcu_read_lock(); for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(table, node, head, tb6_hlist) { + hlist_for_each_entry_rcu(table, head, tb6_hlist) { write_lock_bh(&table->tb6_lock); fib6_clean_tree(net, &table->tb6_root, func, prune, arg); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index c65907db8c44..330b5e7b7df6 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -71,10 +71,9 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, unsigned short num, const struct in6_addr *loc_addr, const struct in6_addr *rmt_addr, int dif) { - struct hlist_node *node; bool is_multicast = ipv6_addr_is_multicast(loc_addr); - sk_for_each_from(sk, node) + sk_for_each_from(sk) if (inet_sk(sk)->inet_num == num) { struct ipv6_pinfo *np = inet6_sk(sk); diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 6cc48012b730..de2bcfaaf759 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -89,9 +89,8 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, const { struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); struct xfrm6_tunnel_spi *x6spi; - struct hlist_node *pos; - hlist_for_each_entry_rcu(x6spi, pos, + hlist_for_each_entry_rcu(x6spi, &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)], list_byaddr) { if (xfrm6_addr_equal(&x6spi->addr, saddr)) @@ -120,9 +119,8 @@ static int __xfrm6_tunnel_spi_check(struct net *net, u32 spi) struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); struct xfrm6_tunnel_spi *x6spi; int index = xfrm6_tunnel_spi_hash_byspi(spi); - struct hlist_node *pos; - hlist_for_each_entry(x6spi, pos, + hlist_for_each_entry(x6spi, &xfrm6_tn->spi_byspi[index], list_byspi) { if (x6spi->spi == spi) @@ -203,11 +201,11 @@ static void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr) { struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); struct xfrm6_tunnel_spi *x6spi; - struct hlist_node *pos, *n; + struct hlist_node *n; spin_lock_bh(&xfrm6_tunnel_spi_lock); - hlist_for_each_entry_safe(x6spi, pos, n, + hlist_for_each_entry_safe(x6spi, n, &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)], list_byaddr) { diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index dfd6faaf0ea7..f547a47d381c 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -228,9 +228,8 @@ static struct sock *__ipxitf_find_socket(struct ipx_interface *intrfc, __be16 port) { struct sock *s; - struct hlist_node *node; - sk_for_each(s, node, &intrfc->if_sklist) + sk_for_each(s, &intrfc->if_sklist) if (ipx_sk(s)->port == port) goto found; s = NULL; @@ -259,12 +258,11 @@ static struct sock *ipxitf_find_internal_socket(struct ipx_interface *intrfc, __be16 port) { struct sock *s; - struct hlist_node *node; ipxitf_hold(intrfc); spin_lock_bh(&intrfc->if_sklist_lock); - sk_for_each(s, node, &intrfc->if_sklist) { + sk_for_each(s, &intrfc->if_sklist) { struct ipx_sock *ipxs = ipx_sk(s); if (ipxs->port == port && @@ -282,14 +280,14 @@ found: static void __ipxitf_down(struct ipx_interface *intrfc) { struct sock *s; - struct hlist_node *node, *t; + struct hlist_node *t; /* Delete all routes associated with this interface */ ipxrtr_del_routes(intrfc); spin_lock_bh(&intrfc->if_sklist_lock); /* error sockets */ - sk_for_each_safe(s, node, t, &intrfc->if_sklist) { + sk_for_each_safe(s, t, &intrfc->if_sklist) { struct ipx_sock *ipxs = ipx_sk(s); s->sk_err = ENOLINK; @@ -385,12 +383,11 @@ static int ipxitf_demux_socket(struct ipx_interface *intrfc, int is_broadcast = !memcmp(ipx->ipx_dest.node, ipx_broadcast_node, IPX_NODE_LEN); struct sock *s; - struct hlist_node *node; int rc; spin_lock_bh(&intrfc->if_sklist_lock); - sk_for_each(s, node, &intrfc->if_sklist) { + sk_for_each(s, &intrfc->if_sklist) { struct ipx_sock *ipxs = ipx_sk(s); if (ipxs->port == ipx->ipx_dest.sock && @@ -446,12 +443,11 @@ static struct sock *ncp_connection_hack(struct ipx_interface *intrfc, connection = (((int) *(ncphdr + 9)) << 8) | (int) *(ncphdr + 8); if (connection) { - struct hlist_node *node; /* Now we have to look for a special NCP connection handling * socket. Only these sockets have ipx_ncp_conn != 0, set by * SIOCIPXNCPCONN. */ spin_lock_bh(&intrfc->if_sklist_lock); - sk_for_each(sk, node, &intrfc->if_sklist) + sk_for_each(sk, &intrfc->if_sklist) if (ipx_sk(sk)->ipx_ncp_conn == connection) { sock_hold(sk); goto found; diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index 02ff7f2f60d4..65e8833a2510 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -103,19 +103,18 @@ out: static __inline__ struct sock *ipx_get_socket_idx(loff_t pos) { struct sock *s = NULL; - struct hlist_node *node; struct ipx_interface *i; list_for_each_entry(i, &ipx_interfaces, node) { spin_lock_bh(&i->if_sklist_lock); - sk_for_each(s, node, &i->if_sklist) { + sk_for_each(s, &i->if_sklist) { if (!pos) break; --pos; } spin_unlock_bh(&i->if_sklist_lock); if (!pos) { - if (node) + if (s) goto found; break; } diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index cd6f7a991d80..a7d11ffe4284 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -156,14 +156,13 @@ static int afiucv_pm_freeze(struct device *dev) { struct iucv_sock *iucv; struct sock *sk; - struct hlist_node *node; int err = 0; #ifdef CONFIG_PM_DEBUG printk(KERN_WARNING "afiucv_pm_freeze\n"); #endif read_lock(&iucv_sk_list.lock); - sk_for_each(sk, node, &iucv_sk_list.head) { + sk_for_each(sk, &iucv_sk_list.head) { iucv = iucv_sk(sk); switch (sk->sk_state) { case IUCV_DISCONN: @@ -194,13 +193,12 @@ static int afiucv_pm_freeze(struct device *dev) static int afiucv_pm_restore_thaw(struct device *dev) { struct sock *sk; - struct hlist_node *node; #ifdef CONFIG_PM_DEBUG printk(KERN_WARNING "afiucv_pm_restore_thaw\n"); #endif read_lock(&iucv_sk_list.lock); - sk_for_each(sk, node, &iucv_sk_list.head) { + sk_for_each(sk, &iucv_sk_list.head) { switch (sk->sk_state) { case IUCV_CONNECTED: sk->sk_err = EPIPE; @@ -390,9 +388,8 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, static struct sock *__iucv_get_sock_by_name(char *nm) { struct sock *sk; - struct hlist_node *node; - sk_for_each(sk, node, &iucv_sk_list.head) + sk_for_each(sk, &iucv_sk_list.head) if (!memcmp(&iucv_sk(sk)->src_name, nm, 8)) return sk; @@ -1678,7 +1675,6 @@ static int iucv_callback_connreq(struct iucv_path *path, unsigned char user_data[16]; unsigned char nuser_data[16]; unsigned char src_name[8]; - struct hlist_node *node; struct sock *sk, *nsk; struct iucv_sock *iucv, *niucv; int err; @@ -1689,7 +1685,7 @@ static int iucv_callback_connreq(struct iucv_path *path, read_lock(&iucv_sk_list.lock); iucv = NULL; sk = NULL; - sk_for_each(sk, node, &iucv_sk_list.head) + sk_for_each(sk, &iucv_sk_list.head) if (sk->sk_state == IUCV_LISTEN && !memcmp(&iucv_sk(sk)->src_name, src_name, 8)) { /* @@ -2115,7 +2111,6 @@ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb) static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - struct hlist_node *node; struct sock *sk; struct iucv_sock *iucv; struct af_iucv_trans_hdr *trans_hdr; @@ -2132,7 +2127,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, iucv = NULL; sk = NULL; read_lock(&iucv_sk_list.lock); - sk_for_each(sk, node, &iucv_sk_list.head) { + sk_for_each(sk, &iucv_sk_list.head) { if (trans_hdr->flags == AF_IUCV_FLAG_SYN) { if ((!memcmp(&iucv_sk(sk)->src_name, trans_hdr->destAppName, 8)) && @@ -2225,10 +2220,9 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *skb, struct sk_buff *list_skb; struct sk_buff *nskb; unsigned long flags; - struct hlist_node *node; read_lock_irqsave(&iucv_sk_list.lock, flags); - sk_for_each(sk, node, &iucv_sk_list.head) + sk_for_each(sk, &iucv_sk_list.head) if (sk == isk) { iucv = iucv_sk(sk); break; @@ -2299,14 +2293,13 @@ static int afiucv_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *event_dev = (struct net_device *)ptr; - struct hlist_node *node; struct sock *sk; struct iucv_sock *iucv; switch (event) { case NETDEV_REBOOT: case NETDEV_GOING_DOWN: - sk_for_each(sk, node, &iucv_sk_list.head) { + sk_for_each(sk, &iucv_sk_list.head) { iucv = iucv_sk(sk); if ((iucv->hs_dev == event_dev) && (sk->sk_state == IUCV_CONNECTED)) { diff --git a/net/key/af_key.c b/net/key/af_key.c index 9ef79851f297..556fdafdd1ea 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -225,7 +225,6 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; - struct hlist_node *node; struct sk_buff *skb2 = NULL; int err = -ESRCH; @@ -236,7 +235,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, return -ENOMEM; rcu_read_lock(); - sk_for_each_rcu(sk, node, &net_pfkey->table) { + sk_for_each_rcu(sk, &net_pfkey->table) { struct pfkey_sock *pfk = pfkey_sk(sk); int err2; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index dcfd64e83ab7..d36875f3427e 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -221,10 +221,9 @@ static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id) struct hlist_head *session_list = l2tp_session_id_hash_2(pn, session_id); struct l2tp_session *session; - struct hlist_node *walk; rcu_read_lock_bh(); - hlist_for_each_entry_rcu(session, walk, session_list, global_hlist) { + hlist_for_each_entry_rcu(session, session_list, global_hlist) { if (session->session_id == session_id) { rcu_read_unlock_bh(); return session; @@ -253,7 +252,6 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunn { struct hlist_head *session_list; struct l2tp_session *session; - struct hlist_node *walk; /* In L2TPv3, session_ids are unique over all tunnels and we * sometimes need to look them up before we know the @@ -264,7 +262,7 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunn session_list = l2tp_session_id_hash(tunnel, session_id); read_lock_bh(&tunnel->hlist_lock); - hlist_for_each_entry(session, walk, session_list, hlist) { + hlist_for_each_entry(session, session_list, hlist) { if (session->session_id == session_id) { read_unlock_bh(&tunnel->hlist_lock); return session; @@ -279,13 +277,12 @@ EXPORT_SYMBOL_GPL(l2tp_session_find); struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) { int hash; - struct hlist_node *walk; struct l2tp_session *session; int count = 0; read_lock_bh(&tunnel->hlist_lock); for (hash = 0; hash < L2TP_HASH_SIZE; hash++) { - hlist_for_each_entry(session, walk, &tunnel->session_hlist[hash], hlist) { + hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) { if (++count > nth) { read_unlock_bh(&tunnel->hlist_lock); return session; @@ -306,12 +303,11 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname) { struct l2tp_net *pn = l2tp_pernet(net); int hash; - struct hlist_node *walk; struct l2tp_session *session; rcu_read_lock_bh(); for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) { - hlist_for_each_entry_rcu(session, walk, &pn->l2tp_session_hlist[hash], global_hlist) { + hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) { if (!strcmp(session->ifname, ifname)) { rcu_read_unlock_bh(); return session; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index f7ac8f42fee2..7f41b7051269 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -49,10 +49,9 @@ static inline struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk) static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id) { - struct hlist_node *node; struct sock *sk; - sk_for_each_bound(sk, node, &l2tp_ip_bind_table) { + sk_for_each_bound(sk, &l2tp_ip_bind_table) { struct inet_sock *inet = inet_sk(sk); struct l2tp_ip_sock *l2tp = l2tp_ip_sk(sk); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 8ee4a86ae996..41f2f8126ebc 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -60,10 +60,9 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net, struct in6_addr *laddr, int dif, u32 tunnel_id) { - struct hlist_node *node; struct sock *sk; - sk_for_each_bound(sk, node, &l2tp_ip6_bind_table) { + sk_for_each_bound(sk, &l2tp_ip6_bind_table) { struct in6_addr *addr = inet6_rcv_saddr(sk); struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk); diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 7c5073badc73..78be45cda5c1 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -393,12 +393,11 @@ static void llc_sap_mcast(struct llc_sap *sap, { int i = 0, count = 256 / sizeof(struct sock *); struct sock *sk, *stack[count]; - struct hlist_node *node; struct llc_sock *llc; struct hlist_head *dev_hb = llc_sk_dev_hash(sap, skb->dev->ifindex); spin_lock_bh(&sap->sk_lock); - hlist_for_each_entry(llc, node, dev_hb, dev_hash_node) { + hlist_for_each_entry(llc, dev_hb, dev_hash_node) { sk = &llc->sk; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index d0dd11153a6c..1a8591b77a13 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -647,8 +647,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, spin_lock_init(&local->ack_status_lock); idr_init(&local->ack_status_frames); - /* preallocate at least one entry */ - idr_pre_get(&local->ack_status_frames, GFP_KERNEL); sta_info_init(local); diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 6b3c4e119c63..dc7c8df40c2c 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -72,9 +72,9 @@ static inline struct mesh_table *resize_dereference_mpp_paths(void) * it's used twice. So it is illegal to do * for_each_mesh_entry(rcu_dereference(...), ...) */ -#define for_each_mesh_entry(tbl, p, node, i) \ +#define for_each_mesh_entry(tbl, node, i) \ for (i = 0; i <= tbl->hash_mask; i++) \ - hlist_for_each_entry_rcu(node, p, &tbl->hash_buckets[i], list) + hlist_for_each_entry_rcu(node, &tbl->hash_buckets[i], list) static struct mesh_table *mesh_table_alloc(int size_order) @@ -139,7 +139,7 @@ static void mesh_table_free(struct mesh_table *tbl, bool free_leafs) } if (free_leafs) { spin_lock_bh(&tbl->gates_lock); - hlist_for_each_entry_safe(gate, p, q, + hlist_for_each_entry_safe(gate, q, tbl->known_gates, list) { hlist_del(&gate->list); kfree(gate); @@ -333,12 +333,11 @@ static struct mesh_path *mpath_lookup(struct mesh_table *tbl, const u8 *dst, struct ieee80211_sub_if_data *sdata) { struct mesh_path *mpath; - struct hlist_node *n; struct hlist_head *bucket; struct mpath_node *node; bucket = &tbl->hash_buckets[mesh_table_hash(dst, sdata, tbl)]; - hlist_for_each_entry_rcu(node, n, bucket, list) { + hlist_for_each_entry_rcu(node, bucket, list) { mpath = node->mpath; if (mpath->sdata == sdata && ether_addr_equal(dst, mpath->dst)) { @@ -389,11 +388,10 @@ mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) { struct mesh_table *tbl = rcu_dereference(mesh_paths); struct mpath_node *node; - struct hlist_node *p; int i; int j = 0; - for_each_mesh_entry(tbl, p, node, i) { + for_each_mesh_entry(tbl, node, i) { if (sdata && node->mpath->sdata != sdata) continue; if (j++ == idx) { @@ -417,13 +415,12 @@ int mesh_path_add_gate(struct mesh_path *mpath) { struct mesh_table *tbl; struct mpath_node *gate, *new_gate; - struct hlist_node *n; int err; rcu_read_lock(); tbl = rcu_dereference(mesh_paths); - hlist_for_each_entry_rcu(gate, n, tbl->known_gates, list) + hlist_for_each_entry_rcu(gate, tbl->known_gates, list) if (gate->mpath == mpath) { err = -EEXIST; goto err_rcu; @@ -460,9 +457,9 @@ err_rcu: static void mesh_gate_del(struct mesh_table *tbl, struct mesh_path *mpath) { struct mpath_node *gate; - struct hlist_node *p, *q; + struct hlist_node *q; - hlist_for_each_entry_safe(gate, p, q, tbl->known_gates, list) { + hlist_for_each_entry_safe(gate, q, tbl->known_gates, list) { if (gate->mpath != mpath) continue; spin_lock_bh(&tbl->gates_lock); @@ -504,7 +501,6 @@ int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst) struct mesh_path *mpath, *new_mpath; struct mpath_node *node, *new_node; struct hlist_head *bucket; - struct hlist_node *n; int grow = 0; int err = 0; u32 hash_idx; @@ -550,7 +546,7 @@ int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst) spin_lock(&tbl->hashwlock[hash_idx]); err = -EEXIST; - hlist_for_each_entry(node, n, bucket, list) { + hlist_for_each_entry(node, bucket, list) { mpath = node->mpath; if (mpath->sdata == sdata && ether_addr_equal(dst, mpath->dst)) @@ -640,7 +636,6 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata, struct mesh_path *mpath, *new_mpath; struct mpath_node *node, *new_node; struct hlist_head *bucket; - struct hlist_node *n; int grow = 0; int err = 0; u32 hash_idx; @@ -680,7 +675,7 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata, spin_lock(&tbl->hashwlock[hash_idx]); err = -EEXIST; - hlist_for_each_entry(node, n, bucket, list) { + hlist_for_each_entry(node, bucket, list) { mpath = node->mpath; if (mpath->sdata == sdata && ether_addr_equal(dst, mpath->dst)) @@ -725,14 +720,13 @@ void mesh_plink_broken(struct sta_info *sta) static const u8 bcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; struct mesh_path *mpath; struct mpath_node *node; - struct hlist_node *p; struct ieee80211_sub_if_data *sdata = sta->sdata; int i; __le16 reason = cpu_to_le16(WLAN_REASON_MESH_PATH_DEST_UNREACHABLE); rcu_read_lock(); tbl = rcu_dereference(mesh_paths); - for_each_mesh_entry(tbl, p, node, i) { + for_each_mesh_entry(tbl, node, i) { mpath = node->mpath; if (rcu_dereference(mpath->next_hop) == sta && mpath->flags & MESH_PATH_ACTIVE && @@ -792,13 +786,12 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta) struct mesh_table *tbl; struct mesh_path *mpath; struct mpath_node *node; - struct hlist_node *p; int i; rcu_read_lock(); read_lock_bh(&pathtbl_resize_lock); tbl = resize_dereference_mesh_paths(); - for_each_mesh_entry(tbl, p, node, i) { + for_each_mesh_entry(tbl, node, i) { mpath = node->mpath; if (rcu_dereference(mpath->next_hop) == sta) { spin_lock(&tbl->hashwlock[i]); @@ -815,11 +808,10 @@ static void table_flush_by_iface(struct mesh_table *tbl, { struct mesh_path *mpath; struct mpath_node *node; - struct hlist_node *p; int i; WARN_ON(!rcu_read_lock_held()); - for_each_mesh_entry(tbl, p, node, i) { + for_each_mesh_entry(tbl, node, i) { mpath = node->mpath; if (mpath->sdata != sdata) continue; @@ -865,7 +857,6 @@ int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr) struct mesh_path *mpath; struct mpath_node *node; struct hlist_head *bucket; - struct hlist_node *n; int hash_idx; int err = 0; @@ -875,7 +866,7 @@ int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr) bucket = &tbl->hash_buckets[hash_idx]; spin_lock(&tbl->hashwlock[hash_idx]); - hlist_for_each_entry(node, n, bucket, list) { + hlist_for_each_entry(node, bucket, list) { mpath = node->mpath; if (mpath->sdata == sdata && ether_addr_equal(addr, mpath->dst)) { @@ -920,7 +911,6 @@ void mesh_path_tx_pending(struct mesh_path *mpath) int mesh_path_send_to_gates(struct mesh_path *mpath) { struct ieee80211_sub_if_data *sdata = mpath->sdata; - struct hlist_node *n; struct mesh_table *tbl; struct mesh_path *from_mpath = mpath; struct mpath_node *gate = NULL; @@ -935,7 +925,7 @@ int mesh_path_send_to_gates(struct mesh_path *mpath) if (!known_gates) return -EHOSTUNREACH; - hlist_for_each_entry_rcu(gate, n, known_gates, list) { + hlist_for_each_entry_rcu(gate, known_gates, list) { if (gate->mpath->sdata != sdata) continue; @@ -951,7 +941,7 @@ int mesh_path_send_to_gates(struct mesh_path *mpath) } } - hlist_for_each_entry_rcu(gate, n, known_gates, list) + hlist_for_each_entry_rcu(gate, known_gates, list) if (gate->mpath->sdata == sdata) { mpath_dbg(sdata, "Sending to %pM\n", gate->mpath->dst); mesh_path_tx_pending(gate->mpath); @@ -1096,12 +1086,11 @@ void mesh_path_expire(struct ieee80211_sub_if_data *sdata) struct mesh_table *tbl; struct mesh_path *mpath; struct mpath_node *node; - struct hlist_node *p; int i; rcu_read_lock(); tbl = rcu_dereference(mesh_paths); - for_each_mesh_entry(tbl, p, node, i) { + for_each_mesh_entry(tbl, node, i) { if (node->mpath->sdata != sdata) continue; mpath = node->mpath; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 5b9602b62405..de8548bf0a7f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2017,24 +2017,14 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, skb = skb_clone(skb, GFP_ATOMIC); if (skb) { unsigned long flags; - int id, r; + int id; spin_lock_irqsave(&local->ack_status_lock, flags); - r = idr_get_new_above(&local->ack_status_frames, - orig_skb, 1, &id); - if (r == -EAGAIN) { - idr_pre_get(&local->ack_status_frames, - GFP_ATOMIC); - r = idr_get_new_above(&local->ack_status_frames, - orig_skb, 1, &id); - } - if (WARN_ON(!id) || id > 0xffff) { - idr_remove(&local->ack_status_frames, id); - r = -ERANGE; - } + id = idr_alloc(&local->ack_status_frames, orig_skb, + 1, 0x10000, GFP_ATOMIC); spin_unlock_irqrestore(&local->ack_status_lock, flags); - if (!r) { + if (id >= 0) { info_id = id; info_flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; } else if (skb_shared(skb)) { diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 9f00db7e03f2..704e514e02ab 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -259,13 +259,12 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) { unsigned int hash; struct ip_vs_conn *cp; - struct hlist_node *n; hash = ip_vs_conn_hashkey_param(p, false); ct_read_lock(hash); - hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->af == p->af && p->cport == cp->cport && p->vport == cp->vport && ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && @@ -344,13 +343,12 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) { unsigned int hash; struct ip_vs_conn *cp; - struct hlist_node *n; hash = ip_vs_conn_hashkey_param(p, false); ct_read_lock(hash); - hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (!ip_vs_conn_net_eq(cp, p->net)) continue; if (p->pe_data && p->pe->ct_match) { @@ -394,7 +392,6 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) { unsigned int hash; struct ip_vs_conn *cp, *ret=NULL; - struct hlist_node *n; /* * Check for "full" addressed entries @@ -403,7 +400,7 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) ct_read_lock(hash); - hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->af == p->af && p->vport == cp->cport && p->cport == cp->dport && ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && @@ -953,11 +950,10 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) int idx; struct ip_vs_conn *cp; struct ip_vs_iter_state *iter = seq->private; - struct hlist_node *n; for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { ct_read_lock_bh(idx); - hlist_for_each_entry(cp, n, &ip_vs_conn_tab[idx], c_list) { + hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { if (pos-- == 0) { iter->l = &ip_vs_conn_tab[idx]; return cp; @@ -981,7 +977,6 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ip_vs_conn *cp = v; struct ip_vs_iter_state *iter = seq->private; - struct hlist_node *e; struct hlist_head *l = iter->l; int idx; @@ -990,15 +985,15 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) return ip_vs_conn_array(seq, 0); /* more on same hash chain? */ - if ((e = cp->c_list.next)) - return hlist_entry(e, struct ip_vs_conn, c_list); + if (cp->c_list.next) + return hlist_entry(cp->c_list.next, struct ip_vs_conn, c_list); idx = l - ip_vs_conn_tab; ct_read_unlock_bh(idx); while (++idx < ip_vs_conn_tab_size) { ct_read_lock_bh(idx); - hlist_for_each_entry(cp, e, &ip_vs_conn_tab[idx], c_list) { + hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { iter->l = &ip_vs_conn_tab[idx]; return cp; } @@ -1200,14 +1195,13 @@ void ip_vs_random_dropentry(struct net *net) */ for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) { unsigned int hash = net_random() & ip_vs_conn_tab_mask; - struct hlist_node *n; /* * Lock is actually needed in this loop. */ ct_write_lock_bh(hash); - hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->flags & IP_VS_CONN_F_TEMPLATE) /* connection template */ continue; @@ -1255,14 +1249,12 @@ static void ip_vs_conn_flush(struct net *net) flush_again: for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { - struct hlist_node *n; - /* * Lock is actually needed in this loop. */ ct_write_lock_bh(idx); - hlist_for_each_entry(cp, n, &ip_vs_conn_tab[idx], c_list) { + hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { if (!ip_vs_conn_net_eq(cp, net)) continue; IP_VS_DBG(4, "del connection\n"); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 3921e5bc1235..8c10e3db3d9b 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -90,14 +90,13 @@ __nf_ct_expect_find(struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; - struct hlist_node *n; unsigned int h; if (!net->ct.expect_count) return NULL; h = nf_ct_expect_dst_hash(tuple); - hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) { + hlist_for_each_entry_rcu(i, &net->ct.expect_hash[h], hnode) { if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && nf_ct_zone(i->master) == zone) return i; @@ -130,14 +129,13 @@ nf_ct_find_expectation(struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i, *exp = NULL; - struct hlist_node *n; unsigned int h; if (!net->ct.expect_count) return NULL; h = nf_ct_expect_dst_hash(tuple); - hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) { + hlist_for_each_entry(i, &net->ct.expect_hash[h], hnode) { if (!(i->flags & NF_CT_EXPECT_INACTIVE) && nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && nf_ct_zone(i->master) == zone) { @@ -172,13 +170,13 @@ void nf_ct_remove_expectations(struct nf_conn *ct) { struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_expect *exp; - struct hlist_node *n, *next; + struct hlist_node *next; /* Optimization: most connection never expect any others. */ if (!help) return; - hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) { + hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { if (del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); nf_ct_expect_put(exp); @@ -348,9 +346,8 @@ static void evict_oldest_expect(struct nf_conn *master, { struct nf_conn_help *master_help = nfct_help(master); struct nf_conntrack_expect *exp, *last = NULL; - struct hlist_node *n; - hlist_for_each_entry(exp, n, &master_help->expectations, lnode) { + hlist_for_each_entry(exp, &master_help->expectations, lnode) { if (exp->class == new->class) last = exp; } @@ -369,7 +366,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) struct nf_conn_help *master_help = nfct_help(master); struct nf_conntrack_helper *helper; struct net *net = nf_ct_exp_net(expect); - struct hlist_node *n, *next; + struct hlist_node *next; unsigned int h; int ret = 1; @@ -378,7 +375,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) goto out; } h = nf_ct_expect_dst_hash(&expect->tuple); - hlist_for_each_entry_safe(i, n, next, &net->ct.expect_hash[h], hnode) { + hlist_for_each_entry_safe(i, next, &net->ct.expect_hash[h], hnode) { if (expect_matches(i, expect)) { if (del_timer(&i->timeout)) { nf_ct_unlink_expect(i); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 013cdf69fe29..a9740bd6fe54 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -116,14 +116,13 @@ __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_helper *helper; struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) }; - struct hlist_node *n; unsigned int h; if (!nf_ct_helper_count) return NULL; h = helper_hash(tuple); - hlist_for_each_entry_rcu(helper, n, &nf_ct_helper_hash[h], hnode) { + hlist_for_each_entry_rcu(helper, &nf_ct_helper_hash[h], hnode) { if (nf_ct_tuple_src_mask_cmp(tuple, &helper->tuple, &mask)) return helper; } @@ -134,11 +133,10 @@ struct nf_conntrack_helper * __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum) { struct nf_conntrack_helper *h; - struct hlist_node *n; unsigned int i; for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry_rcu(h, n, &nf_ct_helper_hash[i], hnode) { + hlist_for_each_entry_rcu(h, &nf_ct_helper_hash[i], hnode) { if (!strcmp(h->name, name) && h->tuple.src.l3num == l3num && h->tuple.dst.protonum == protonum) @@ -357,7 +355,6 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) { int ret = 0; struct nf_conntrack_helper *cur; - struct hlist_node *n; unsigned int h = helper_hash(&me->tuple); BUG_ON(me->expect_policy == NULL); @@ -365,7 +362,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1); mutex_lock(&nf_ct_helper_mutex); - hlist_for_each_entry(cur, n, &nf_ct_helper_hash[h], hnode) { + hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) { if (strncmp(cur->name, me->name, NF_CT_HELPER_NAME_LEN) == 0 && cur->tuple.src.l3num == me->tuple.src.l3num && cur->tuple.dst.protonum == me->tuple.dst.protonum) { @@ -386,13 +383,13 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, { struct nf_conntrack_tuple_hash *h; struct nf_conntrack_expect *exp; - const struct hlist_node *n, *next; + const struct hlist_node *next; const struct hlist_nulls_node *nn; unsigned int i; /* Get rid of expectations */ for (i = 0; i < nf_ct_expect_hsize; i++) { - hlist_for_each_entry_safe(exp, n, next, + hlist_for_each_entry_safe(exp, next, &net->ct.expect_hash[i], hnode) { struct nf_conn_help *help = nfct_help(exp->master); if ((rcu_dereference_protected( diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 5d60e04f9679..9904b15f600e 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2370,14 +2370,13 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) struct net *net = sock_net(skb->sk); struct nf_conntrack_expect *exp, *last; struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); - struct hlist_node *n; u_int8_t l3proto = nfmsg->nfgen_family; rcu_read_lock(); last = (struct nf_conntrack_expect *)cb->args[1]; for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) { restart: - hlist_for_each_entry(exp, n, &net->ct.expect_hash[cb->args[0]], + hlist_for_each_entry(exp, &net->ct.expect_hash[cb->args[0]], hnode) { if (l3proto && exp->tuple.src.l3num != l3proto) continue; @@ -2510,7 +2509,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; struct nfgenmsg *nfmsg = nlmsg_data(nlh); - struct hlist_node *n, *next; + struct hlist_node *next; u_int8_t u3 = nfmsg->nfgen_family; unsigned int i; u16 zone; @@ -2557,7 +2556,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, /* delete all expectations for this helper */ spin_lock_bh(&nf_conntrack_lock); for (i = 0; i < nf_ct_expect_hsize; i++) { - hlist_for_each_entry_safe(exp, n, next, + hlist_for_each_entry_safe(exp, next, &net->ct.expect_hash[i], hnode) { m_help = nfct_help(exp->master); @@ -2575,7 +2574,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, /* This basically means we have to flush everything*/ spin_lock_bh(&nf_conntrack_lock); for (i = 0; i < nf_ct_expect_hsize; i++) { - hlist_for_each_entry_safe(exp, n, next, + hlist_for_each_entry_safe(exp, next, &net->ct.expect_hash[i], hnode) { if (del_timer(&exp->timeout)) { diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 069229d919b6..0e7d423324c3 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -855,11 +855,11 @@ static int refresh_signalling_expectation(struct nf_conn *ct, { struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_expect *exp; - struct hlist_node *n, *next; + struct hlist_node *next; int found = 0; spin_lock_bh(&nf_conntrack_lock); - hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) { + hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { if (exp->class != SIP_EXPECT_SIGNALLING || !nf_inet_addr_cmp(&exp->tuple.dst.u3, addr) || exp->tuple.dst.protonum != proto || @@ -881,10 +881,10 @@ static void flush_expectations(struct nf_conn *ct, bool media) { struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_expect *exp; - struct hlist_node *n, *next; + struct hlist_node *next; spin_lock_bh(&nf_conntrack_lock); - hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) { + hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { if ((exp->class != SIP_EXPECT_SIGNALLING) ^ media) continue; if (!del_timer(&exp->timeout)) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 5f2f9109f461..8d5769c6d16e 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -191,9 +191,8 @@ find_appropriate_src(struct net *net, u16 zone, unsigned int h = hash_by_src(net, zone, tuple); const struct nf_conn_nat *nat; const struct nf_conn *ct; - const struct hlist_node *n; - hlist_for_each_entry_rcu(nat, n, &net->ct.nat_bysource[h], bysource) { + hlist_for_each_entry_rcu(nat, &net->ct.nat_bysource[h], bysource) { ct = nat->ct; if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) { /* Copy source part from reply tuple. */ diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 945950a8b1f1..a191b6db657e 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -282,7 +282,6 @@ nfnl_cthelper_new(struct sock *nfnl, struct sk_buff *skb, const char *helper_name; struct nf_conntrack_helper *cur, *helper = NULL; struct nf_conntrack_tuple tuple; - struct hlist_node *n; int ret = 0, i; if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE]) @@ -296,7 +295,7 @@ nfnl_cthelper_new(struct sock *nfnl, struct sk_buff *skb, rcu_read_lock(); for (i = 0; i < nf_ct_helper_hsize && !helper; i++) { - hlist_for_each_entry_rcu(cur, n, &nf_ct_helper_hash[i], hnode) { + hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) { /* skip non-userspace conntrack helpers. */ if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) @@ -452,13 +451,12 @@ static int nfnl_cthelper_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { struct nf_conntrack_helper *cur, *last; - struct hlist_node *n; rcu_read_lock(); last = (struct nf_conntrack_helper *)cb->args[1]; for (; cb->args[0] < nf_ct_helper_hsize; cb->args[0]++) { restart: - hlist_for_each_entry_rcu(cur, n, + hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[cb->args[0]], hnode) { /* skip non-userspace conntrack helpers. */ @@ -495,7 +493,6 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, { int ret = -ENOENT, i; struct nf_conntrack_helper *cur; - struct hlist_node *n; struct sk_buff *skb2; char *helper_name = NULL; struct nf_conntrack_tuple tuple; @@ -520,7 +517,7 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, } for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry_rcu(cur, n, &nf_ct_helper_hash[i], hnode) { + hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) { /* skip non-userspace conntrack helpers. */ if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) @@ -568,7 +565,7 @@ nfnl_cthelper_del(struct sock *nfnl, struct sk_buff *skb, { char *helper_name = NULL; struct nf_conntrack_helper *cur; - struct hlist_node *n, *tmp; + struct hlist_node *tmp; struct nf_conntrack_tuple tuple; bool tuple_set = false, found = false; int i, j = 0, ret; @@ -585,7 +582,7 @@ nfnl_cthelper_del(struct sock *nfnl, struct sk_buff *skb, } for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry_safe(cur, n, tmp, &nf_ct_helper_hash[i], + hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i], hnode) { /* skip non-userspace conntrack helpers. */ if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) @@ -654,13 +651,13 @@ err_out: static void __exit nfnl_cthelper_exit(void) { struct nf_conntrack_helper *cur; - struct hlist_node *n, *tmp; + struct hlist_node *tmp; int i; nfnetlink_subsys_unregister(&nfnl_cthelper_subsys); for (i=0; i<nf_ct_helper_hsize; i++) { - hlist_for_each_entry_safe(cur, n, tmp, &nf_ct_helper_hash[i], + hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i], hnode) { /* skip non-userspace conntrack helpers. */ if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 92fd8eca0d31..f248db572972 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -87,11 +87,10 @@ static struct nfulnl_instance * __instance_lookup(u_int16_t group_num) { struct hlist_head *head; - struct hlist_node *pos; struct nfulnl_instance *inst; head = &instance_table[instance_hashfn(group_num)]; - hlist_for_each_entry_rcu(inst, pos, head, hlist) { + hlist_for_each_entry_rcu(inst, head, hlist) { if (inst->group_num == group_num) return inst; } @@ -717,11 +716,11 @@ nfulnl_rcv_nl_event(struct notifier_block *this, /* destroy all instances for this portid */ spin_lock_bh(&instances_lock); for (i = 0; i < INSTANCE_BUCKETS; i++) { - struct hlist_node *tmp, *t2; + struct hlist_node *t2; struct nfulnl_instance *inst; struct hlist_head *head = &instance_table[i]; - hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { + hlist_for_each_entry_safe(inst, t2, head, hlist) { if ((net_eq(n->net, &init_net)) && (n->portid == inst->peer_portid)) __instance_destroy(inst); diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 3158d87b56a8..858fd52c1040 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -80,11 +80,10 @@ static struct nfqnl_instance * instance_lookup(u_int16_t queue_num) { struct hlist_head *head; - struct hlist_node *pos; struct nfqnl_instance *inst; head = &instance_table[instance_hashfn(queue_num)]; - hlist_for_each_entry_rcu(inst, pos, head, hlist) { + hlist_for_each_entry_rcu(inst, head, hlist) { if (inst->queue_num == queue_num) return inst; } @@ -583,11 +582,10 @@ nfqnl_dev_drop(int ifindex) rcu_read_lock(); for (i = 0; i < INSTANCE_BUCKETS; i++) { - struct hlist_node *tmp; struct nfqnl_instance *inst; struct hlist_head *head = &instance_table[i]; - hlist_for_each_entry_rcu(inst, tmp, head, hlist) + hlist_for_each_entry_rcu(inst, head, hlist) nfqnl_flush(inst, dev_cmp, ifindex); } @@ -627,11 +625,11 @@ nfqnl_rcv_nl_event(struct notifier_block *this, /* destroy all instances for this portid */ spin_lock(&instances_lock); for (i = 0; i < INSTANCE_BUCKETS; i++) { - struct hlist_node *tmp, *t2; + struct hlist_node *t2; struct nfqnl_instance *inst; struct hlist_head *head = &instance_table[i]; - hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { + hlist_for_each_entry_safe(inst, t2, head, hlist) { if ((n->net == &init_net) && (n->portid == inst->peer_portid)) __instance_destroy(inst); diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index f264032b8c56..370adf622cef 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -43,12 +43,11 @@ static void xt_rateest_hash_insert(struct xt_rateest *est) struct xt_rateest *xt_rateest_lookup(const char *name) { struct xt_rateest *est; - struct hlist_node *n; unsigned int h; h = xt_rateest_hash(name); mutex_lock(&xt_rateest_mutex); - hlist_for_each_entry(est, n, &rateest_hash[h], list) { + hlist_for_each_entry(est, &rateest_hash[h], list) { if (strcmp(est->name, name) == 0) { est->refcnt++; mutex_unlock(&xt_rateest_mutex); diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 70b5591a2586..c40b2695633b 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -101,7 +101,7 @@ static int count_them(struct net *net, { const struct nf_conntrack_tuple_hash *found; struct xt_connlimit_conn *conn; - struct hlist_node *pos, *n; + struct hlist_node *n; struct nf_conn *found_ct; struct hlist_head *hash; bool addit = true; @@ -115,7 +115,7 @@ static int count_them(struct net *net, rcu_read_lock(); /* check the saved connections */ - hlist_for_each_entry_safe(conn, pos, n, hash, node) { + hlist_for_each_entry_safe(conn, n, hash, node) { found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE, &conn->tuple); found_ct = NULL; @@ -258,14 +258,14 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_connlimit_info *info = par->matchinfo; struct xt_connlimit_conn *conn; - struct hlist_node *pos, *n; + struct hlist_node *n; struct hlist_head *hash = info->data->iphash; unsigned int i; nf_ct_l3proto_module_put(par->family); for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) { - hlist_for_each_entry_safe(conn, pos, n, &hash[i], node) { + hlist_for_each_entry_safe(conn, n, &hash[i], node) { hlist_del(&conn->node); kfree(conn); } diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 98218c896d2e..f330e8beaf69 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -141,11 +141,10 @@ dsthash_find(const struct xt_hashlimit_htable *ht, const struct dsthash_dst *dst) { struct dsthash_ent *ent; - struct hlist_node *pos; u_int32_t hash = hash_dst(ht, dst); if (!hlist_empty(&ht->hash[hash])) { - hlist_for_each_entry_rcu(ent, pos, &ht->hash[hash], node) + hlist_for_each_entry_rcu(ent, &ht->hash[hash], node) if (dst_cmp(ent, dst)) { spin_lock(&ent->lock); return ent; @@ -297,8 +296,8 @@ static void htable_selective_cleanup(struct xt_hashlimit_htable *ht, spin_lock_bh(&ht->lock); for (i = 0; i < ht->cfg.size; i++) { struct dsthash_ent *dh; - struct hlist_node *pos, *n; - hlist_for_each_entry_safe(dh, pos, n, &ht->hash[i], node) { + struct hlist_node *n; + hlist_for_each_entry_safe(dh, n, &ht->hash[i], node) { if ((*select)(ht, dh)) dsthash_free(ht, dh); } @@ -343,9 +342,8 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net, { struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); struct xt_hashlimit_htable *hinfo; - struct hlist_node *pos; - hlist_for_each_entry(hinfo, pos, &hashlimit_net->htables, node) { + hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) { if (!strcmp(name, hinfo->pde->name) && hinfo->family == family) { hinfo->use++; @@ -821,10 +819,9 @@ static int dl_seq_show(struct seq_file *s, void *v) struct xt_hashlimit_htable *htable = s->private; unsigned int *bucket = (unsigned int *)v; struct dsthash_ent *ent; - struct hlist_node *pos; if (!hlist_empty(&htable->hash[*bucket])) { - hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node) + hlist_for_each_entry(ent, &htable->hash[*bucket], node) if (dl_seq_real_show(ent, htable->family, s)) return -1; } @@ -877,7 +874,6 @@ static int __net_init hashlimit_proc_net_init(struct net *net) static void __net_exit hashlimit_proc_net_exit(struct net *net) { struct xt_hashlimit_htable *hinfo; - struct hlist_node *pos; struct proc_dir_entry *pde; struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); @@ -890,7 +886,7 @@ static void __net_exit hashlimit_proc_net_exit(struct net *net) if (pde == NULL) pde = hashlimit_net->ip6t_hashlimit; - hlist_for_each_entry(hinfo, pos, &hashlimit_net->htables, node) + hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) remove_proc_entry(hinfo->pde->name, pde); hashlimit_net->ipt_hashlimit = NULL; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8097b4f3ead4..1e3fd5bfcd86 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -248,11 +248,10 @@ static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) struct nl_portid_hash *hash = &nl_table[protocol].hash; struct hlist_head *head; struct sock *sk; - struct hlist_node *node; read_lock(&nl_table_lock); head = nl_portid_hashfn(hash, portid); - sk_for_each(sk, node, head) { + sk_for_each(sk, head) { if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) { sock_hold(sk); goto found; @@ -312,9 +311,9 @@ static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow) for (i = 0; i <= omask; i++) { struct sock *sk; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; - sk_for_each_safe(sk, node, tmp, &otable[i]) + sk_for_each_safe(sk, tmp, &otable[i]) __sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid)); } @@ -344,7 +343,6 @@ static void netlink_update_listeners(struct sock *sk) { struct netlink_table *tbl = &nl_table[sk->sk_protocol]; - struct hlist_node *node; unsigned long mask; unsigned int i; struct listeners *listeners; @@ -355,7 +353,7 @@ netlink_update_listeners(struct sock *sk) for (i = 0; i < NLGRPLONGS(tbl->groups); i++) { mask = 0; - sk_for_each_bound(sk, node, &tbl->mc_list) { + sk_for_each_bound(sk, &tbl->mc_list) { if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) mask |= nlk_sk(sk)->groups[i]; } @@ -371,18 +369,17 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid) struct hlist_head *head; int err = -EADDRINUSE; struct sock *osk; - struct hlist_node *node; int len; netlink_table_grab(); head = nl_portid_hashfn(hash, portid); len = 0; - sk_for_each(osk, node, head) { + sk_for_each(osk, head) { if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid)) break; len++; } - if (node) + if (osk) goto err; err = -EBUSY; @@ -575,7 +572,6 @@ static int netlink_autobind(struct socket *sock) struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; struct sock *osk; - struct hlist_node *node; s32 portid = task_tgid_vnr(current); int err; static s32 rover = -4097; @@ -584,7 +580,7 @@ retry: cond_resched(); netlink_table_grab(); head = nl_portid_hashfn(hash, portid); - sk_for_each(osk, node, head) { + sk_for_each(osk, head) { if (!net_eq(sock_net(osk), net)) continue; if (nlk_sk(osk)->portid == portid) { @@ -1101,7 +1097,6 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid { struct net *net = sock_net(ssk); struct netlink_broadcast_data info; - struct hlist_node *node; struct sock *sk; skb = netlink_trim(skb, allocation); @@ -1124,7 +1119,7 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid netlink_lock_table(); - sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) + sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) do_one_broadcast(sk, &info); consume_skb(skb); @@ -1200,7 +1195,6 @@ out: int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) { struct netlink_set_err_data info; - struct hlist_node *node; struct sock *sk; int ret = 0; @@ -1212,7 +1206,7 @@ int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) read_lock(&nl_table_lock); - sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) + sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) ret += do_one_set_err(sk, &info); read_unlock(&nl_table_lock); @@ -1676,10 +1670,9 @@ int netlink_change_ngroups(struct sock *sk, unsigned int groups) void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group) { struct sock *sk; - struct hlist_node *node; struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; - sk_for_each_bound(sk, node, &tbl->mc_list) + sk_for_each_bound(sk, &tbl->mc_list) netlink_update_socket_mc(nlk_sk(sk), group, 0); } @@ -1974,14 +1967,13 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) struct nl_seq_iter *iter = seq->private; int i, j; struct sock *s; - struct hlist_node *node; loff_t off = 0; for (i = 0; i < MAX_LINKS; i++) { struct nl_portid_hash *hash = &nl_table[i].hash; for (j = 0; j <= hash->mask; j++) { - sk_for_each(s, node, &hash->table[j]) { + sk_for_each(s, &hash->table[j]) { if (sock_net(s) != seq_file_net(seq)) continue; if (off == pos) { diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 297b07a029de..d1fa1d9ffd2e 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -104,10 +104,9 @@ static void nr_remove_socket(struct sock *sk) static void nr_kill_by_device(struct net_device *dev) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&nr_list_lock); - sk_for_each(s, node, &nr_list) + sk_for_each(s, &nr_list) if (nr_sk(s)->device == dev) nr_disconnect(s, ENETUNREACH); spin_unlock_bh(&nr_list_lock); @@ -149,10 +148,9 @@ static void nr_insert_socket(struct sock *sk) static struct sock *nr_find_listener(ax25_address *addr) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&nr_list_lock); - sk_for_each(s, node, &nr_list) + sk_for_each(s, &nr_list) if (!ax25cmp(&nr_sk(s)->source_addr, addr) && s->sk_state == TCP_LISTEN) { bh_lock_sock(s); @@ -170,10 +168,9 @@ found: static struct sock *nr_find_socket(unsigned char index, unsigned char id) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&nr_list_lock); - sk_for_each(s, node, &nr_list) { + sk_for_each(s, &nr_list) { struct nr_sock *nr = nr_sk(s); if (nr->my_index == index && nr->my_id == id) { @@ -194,10 +191,9 @@ static struct sock *nr_find_peer(unsigned char index, unsigned char id, ax25_address *dest) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&nr_list_lock); - sk_for_each(s, node, &nr_list) { + sk_for_each(s, &nr_list) { struct nr_sock *nr = nr_sk(s); if (nr->your_index == index && nr->your_id == id && diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 70ffff76a967..b976d5eff2de 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -49,10 +49,9 @@ static struct nr_node *nr_node_get(ax25_address *callsign) { struct nr_node *found = NULL; struct nr_node *nr_node; - struct hlist_node *node; spin_lock_bh(&nr_node_list_lock); - nr_node_for_each(nr_node, node, &nr_node_list) + nr_node_for_each(nr_node, &nr_node_list) if (ax25cmp(callsign, &nr_node->callsign) == 0) { nr_node_hold(nr_node); found = nr_node; @@ -67,10 +66,9 @@ static struct nr_neigh *nr_neigh_get_dev(ax25_address *callsign, { struct nr_neigh *found = NULL; struct nr_neigh *nr_neigh; - struct hlist_node *node; spin_lock_bh(&nr_neigh_list_lock); - nr_neigh_for_each(nr_neigh, node, &nr_neigh_list) + nr_neigh_for_each(nr_neigh, &nr_neigh_list) if (ax25cmp(callsign, &nr_neigh->callsign) == 0 && nr_neigh->dev == dev) { nr_neigh_hold(nr_neigh); @@ -114,10 +112,9 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, */ if (nr_neigh != NULL && nr_neigh->failed != 0 && quality == 0) { struct nr_node *nr_nodet; - struct hlist_node *node; spin_lock_bh(&nr_node_list_lock); - nr_node_for_each(nr_nodet, node, &nr_node_list) { + nr_node_for_each(nr_nodet, &nr_node_list) { nr_node_lock(nr_nodet); for (i = 0; i < nr_nodet->count; i++) if (nr_nodet->routes[i].neighbour == nr_neigh) @@ -485,11 +482,11 @@ static int nr_dec_obs(void) { struct nr_neigh *nr_neigh; struct nr_node *s; - struct hlist_node *node, *nodet; + struct hlist_node *nodet; int i; spin_lock_bh(&nr_node_list_lock); - nr_node_for_each_safe(s, node, nodet, &nr_node_list) { + nr_node_for_each_safe(s, nodet, &nr_node_list) { nr_node_lock(s); for (i = 0; i < s->count; i++) { switch (s->routes[i].obs_count) { @@ -540,15 +537,15 @@ static int nr_dec_obs(void) void nr_rt_device_down(struct net_device *dev) { struct nr_neigh *s; - struct hlist_node *node, *nodet, *node2, *node2t; + struct hlist_node *nodet, *node2t; struct nr_node *t; int i; spin_lock_bh(&nr_neigh_list_lock); - nr_neigh_for_each_safe(s, node, nodet, &nr_neigh_list) { + nr_neigh_for_each_safe(s, nodet, &nr_neigh_list) { if (s->dev == dev) { spin_lock_bh(&nr_node_list_lock); - nr_node_for_each_safe(t, node2, node2t, &nr_node_list) { + nr_node_for_each_safe(t, node2t, &nr_node_list) { nr_node_lock(t); for (i = 0; i < t->count; i++) { if (t->routes[i].neighbour == s) { @@ -737,11 +734,10 @@ int nr_rt_ioctl(unsigned int cmd, void __user *arg) void nr_link_failed(ax25_cb *ax25, int reason) { struct nr_neigh *s, *nr_neigh = NULL; - struct hlist_node *node; struct nr_node *nr_node = NULL; spin_lock_bh(&nr_neigh_list_lock); - nr_neigh_for_each(s, node, &nr_neigh_list) { + nr_neigh_for_each(s, &nr_neigh_list) { if (s->ax25 == ax25) { nr_neigh_hold(s); nr_neigh = s; @@ -761,7 +757,7 @@ void nr_link_failed(ax25_cb *ax25, int reason) return; } spin_lock_bh(&nr_node_list_lock); - nr_node_for_each(nr_node, node, &nr_node_list) { + nr_node_for_each(nr_node, &nr_node_list) { nr_node_lock(nr_node); if (nr_node->which < nr_node->count && nr_node->routes[nr_node->which].neighbour == nr_neigh) @@ -1013,16 +1009,16 @@ void __exit nr_rt_free(void) { struct nr_neigh *s = NULL; struct nr_node *t = NULL; - struct hlist_node *node, *nodet; + struct hlist_node *nodet; spin_lock_bh(&nr_neigh_list_lock); spin_lock_bh(&nr_node_list_lock); - nr_node_for_each_safe(t, node, nodet, &nr_node_list) { + nr_node_for_each_safe(t, nodet, &nr_node_list) { nr_node_lock(t); nr_remove_node_locked(t); nr_node_unlock(t); } - nr_neigh_for_each_safe(s, node, nodet, &nr_neigh_list) { + nr_neigh_for_each_safe(s, nodet, &nr_neigh_list) { while(s->count) { s->count--; nr_neigh_put(s); diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index 746f5a2f9804..7f8266dd14cb 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -71,14 +71,14 @@ static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock) static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) { struct sock *sk; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; struct nfc_llcp_sock *llcp_sock; skb_queue_purge(&local->tx_queue); write_lock(&local->sockets.lock); - sk_for_each_safe(sk, node, tmp, &local->sockets.head) { + sk_for_each_safe(sk, tmp, &local->sockets.head) { llcp_sock = nfc_llcp_sock(sk); bh_lock_sock(sk); @@ -171,7 +171,6 @@ static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local, u8 ssap, u8 dsap) { struct sock *sk; - struct hlist_node *node; struct nfc_llcp_sock *llcp_sock, *tmp_sock; pr_debug("ssap dsap %d %d\n", ssap, dsap); @@ -183,7 +182,7 @@ static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local, llcp_sock = NULL; - sk_for_each(sk, node, &local->sockets.head) { + sk_for_each(sk, &local->sockets.head) { tmp_sock = nfc_llcp_sock(sk); if (tmp_sock->ssap == ssap && tmp_sock->dsap == dsap) { @@ -272,7 +271,6 @@ struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local, u8 *sn, size_t sn_len) { struct sock *sk; - struct hlist_node *node; struct nfc_llcp_sock *llcp_sock, *tmp_sock; pr_debug("sn %zd %p\n", sn_len, sn); @@ -284,7 +282,7 @@ struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local, llcp_sock = NULL; - sk_for_each(sk, node, &local->sockets.head) { + sk_for_each(sk, &local->sockets.head) { tmp_sock = nfc_llcp_sock(sk); pr_debug("llcp sock %p\n", tmp_sock); @@ -601,14 +599,13 @@ static void nfc_llcp_set_nrns(struct nfc_llcp_sock *sock, struct sk_buff *pdu) void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local, struct sk_buff *skb, u8 direction) { - struct hlist_node *node; struct sk_buff *skb_copy = NULL, *nskb; struct sock *sk; u8 *data; read_lock(&local->raw_sockets.lock); - sk_for_each(sk, node, &local->raw_sockets.head) { + sk_for_each(sk, &local->raw_sockets.head) { if (sk->sk_state != LLCP_BOUND) continue; @@ -697,11 +694,10 @@ static struct nfc_llcp_sock *nfc_llcp_connecting_sock_get(struct nfc_llcp_local { struct sock *sk; struct nfc_llcp_sock *llcp_sock; - struct hlist_node *node; read_lock(&local->connecting_sockets.lock); - sk_for_each(sk, node, &local->connecting_sockets.head) { + sk_for_each(sk, &local->connecting_sockets.head) { llcp_sock = nfc_llcp_sock(sk); if (llcp_sock->ssap == ssap) { diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 9dc537df46c4..e87a26506dba 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -158,11 +158,10 @@ static struct hlist_head *vport_hash_bucket(const struct datapath *dp, struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) { struct vport *vport; - struct hlist_node *n; struct hlist_head *head; head = vport_hash_bucket(dp, port_no); - hlist_for_each_entry_rcu(vport, n, head, dp_hash_node) { + hlist_for_each_entry_rcu(vport, head, dp_hash_node) { if (vport->port_no == port_no) return vport; } @@ -1386,9 +1385,9 @@ static void __dp_destroy(struct datapath *dp) for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { struct vport *vport; - struct hlist_node *node, *n; + struct hlist_node *n; - hlist_for_each_entry_safe(vport, node, n, &dp->ports[i], dp_hash_node) + hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) if (vport->port_no != OVSP_LOCAL) ovs_dp_detach_port(vport); } @@ -1825,10 +1824,9 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) { struct vport *vport; - struct hlist_node *n; j = 0; - hlist_for_each_entry_rcu(vport, n, &dp->ports[i], dp_hash_node) { + hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { if (j >= skip && ovs_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).portid, diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index c3294cebc4f2..20605ecf100b 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -299,10 +299,10 @@ void ovs_flow_tbl_destroy(struct flow_table *table) for (i = 0; i < table->n_buckets; i++) { struct sw_flow *flow; struct hlist_head *head = flex_array_get(table->buckets, i); - struct hlist_node *node, *n; + struct hlist_node *n; int ver = table->node_ver; - hlist_for_each_entry_safe(flow, node, n, head, hash_node[ver]) { + hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { hlist_del_rcu(&flow->hash_node[ver]); ovs_flow_free(flow); } @@ -332,7 +332,6 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la { struct sw_flow *flow; struct hlist_head *head; - struct hlist_node *n; int ver; int i; @@ -340,7 +339,7 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la while (*bucket < table->n_buckets) { i = 0; head = flex_array_get(table->buckets, *bucket); - hlist_for_each_entry_rcu(flow, n, head, hash_node[ver]) { + hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { if (i < *last) { i++; continue; @@ -367,11 +366,10 @@ static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new for (i = 0; i < old->n_buckets; i++) { struct sw_flow *flow; struct hlist_head *head; - struct hlist_node *n; head = flex_array_get(old->buckets, i); - hlist_for_each_entry(flow, n, head, hash_node[old_ver]) + hlist_for_each_entry(flow, head, hash_node[old_ver]) ovs_flow_tbl_insert(new, flow); } old->keep_flows = true; @@ -766,14 +764,13 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, struct sw_flow_key *key, int key_len) { struct sw_flow *flow; - struct hlist_node *n; struct hlist_head *head; u32 hash; hash = ovs_flow_hash(key, key_len); head = find_bucket(table, hash); - hlist_for_each_entry_rcu(flow, n, head, hash_node[table->node_ver]) { + hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { if (flow->hash == hash && !memcmp(&flow->key, key, key_len)) { diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 70af0bedbac4..ba717cc038b3 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -86,9 +86,8 @@ struct vport *ovs_vport_locate(struct net *net, const char *name) { struct hlist_head *bucket = hash_bucket(net, name); struct vport *vport; - struct hlist_node *node; - hlist_for_each_entry_rcu(vport, node, bucket, hash_node) + hlist_for_each_entry_rcu(vport, bucket, hash_node) if (!strcmp(name, vport->ops->get_name(vport)) && net_eq(ovs_dp_get_net(vport->dp), net)) return vport; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c7bfeff10767..1d6793dbfbae 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3263,12 +3263,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data) { struct sock *sk; - struct hlist_node *node; struct net_device *dev = data; struct net *net = dev_net(dev); rcu_read_lock(); - sk_for_each_rcu(sk, node, &net->packet.sklist) { + sk_for_each_rcu(sk, &net->packet.sklist) { struct packet_sock *po = pkt_sk(sk); switch (msg) { diff --git a/net/packet/diag.c b/net/packet/diag.c index 8db6e21c46bd..d3fcd1ebef7e 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -172,13 +172,12 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) struct packet_diag_req *req; struct net *net; struct sock *sk; - struct hlist_node *node; net = sock_net(skb->sk); req = nlmsg_data(cb->nlh); mutex_lock(&net->packet.sklist_lock); - sk_for_each(sk, node, &net->packet.sklist) { + sk_for_each(sk, &net->packet.sklist) { if (!net_eq(sock_net(sk), net)) continue; if (num < s_num) diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 576f22c9c76e..e77411735de8 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -640,11 +640,10 @@ static struct sock *pep_find_pipe(const struct hlist_head *hlist, const struct sockaddr_pn *dst, u8 pipe_handle) { - struct hlist_node *node; struct sock *sknode; u16 dobj = pn_sockaddr_get_object(dst); - sk_for_each(sknode, node, hlist) { + sk_for_each(sknode, hlist) { struct pep_sock *pnnode = pep_sk(sknode); /* Ports match, but addresses might not: */ diff --git a/net/phonet/socket.c b/net/phonet/socket.c index b7e982782255..1afd1381cdc7 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -76,7 +76,6 @@ static struct hlist_head *pn_hash_list(u16 obj) */ struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn) { - struct hlist_node *node; struct sock *sknode; struct sock *rval = NULL; u16 obj = pn_sockaddr_get_object(spn); @@ -84,7 +83,7 @@ struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn) struct hlist_head *hlist = pn_hash_list(obj); rcu_read_lock(); - sk_for_each_rcu(sknode, node, hlist) { + sk_for_each_rcu(sknode, hlist) { struct pn_sock *pn = pn_sk(sknode); BUG_ON(!pn->sobject); /* unbound socket */ @@ -120,10 +119,9 @@ void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb) rcu_read_lock(); for (h = 0; h < PN_HASHSIZE; h++) { - struct hlist_node *node; struct sock *sknode; - sk_for_each(sknode, node, hlist) { + sk_for_each(sknode, hlist) { struct sk_buff *clone; if (!net_eq(sock_net(sknode), net)) @@ -543,12 +541,11 @@ static struct sock *pn_sock_get_idx(struct seq_file *seq, loff_t pos) { struct net *net = seq_file_net(seq); struct hlist_head *hlist = pnsocks.hlist; - struct hlist_node *node; struct sock *sknode; unsigned int h; for (h = 0; h < PN_HASHSIZE; h++) { - sk_for_each_rcu(sknode, node, hlist) { + sk_for_each_rcu(sknode, hlist) { if (!net_eq(net, sock_net(sknode))) continue; if (!pos) diff --git a/net/rds/bind.c b/net/rds/bind.c index 637bde56c9db..b5ad65a0067e 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -52,13 +52,12 @@ static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port, struct rds_sock *insert) { struct rds_sock *rs; - struct hlist_node *node; struct hlist_head *head = hash_to_bucket(addr, port); u64 cmp; u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port); rcu_read_lock(); - hlist_for_each_entry_rcu(rs, node, head, rs_bound_node) { + hlist_for_each_entry_rcu(rs, head, rs_bound_node) { cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) | be16_to_cpu(rs->rs_bound_port); diff --git a/net/rds/connection.c b/net/rds/connection.c index 9e07c756d1f9..642ad42c416b 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -69,9 +69,8 @@ static struct rds_connection *rds_conn_lookup(struct hlist_head *head, struct rds_transport *trans) { struct rds_connection *conn, *ret = NULL; - struct hlist_node *pos; - hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) { + hlist_for_each_entry_rcu(conn, head, c_hash_node) { if (conn->c_faddr == faddr && conn->c_laddr == laddr && conn->c_trans == trans) { ret = conn; @@ -376,7 +375,6 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, int want_send) { struct hlist_head *head; - struct hlist_node *pos; struct list_head *list; struct rds_connection *conn; struct rds_message *rm; @@ -390,7 +388,7 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); i++, head++) { - hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) { + hlist_for_each_entry_rcu(conn, head, c_hash_node) { if (want_send) list = &conn->c_send_queue; else @@ -439,7 +437,6 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, { uint64_t buffer[(item_len + 7) / 8]; struct hlist_head *head; - struct hlist_node *pos; struct rds_connection *conn; size_t i; @@ -450,7 +447,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); i++, head++) { - hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) { + hlist_for_each_entry_rcu(conn, head, c_hash_node) { /* XXX no c_lock usage.. */ if (!visitor(conn, buffer)) diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index b768fe9d5e7a..cf68e6e4054a 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -165,10 +165,9 @@ static void rose_remove_socket(struct sock *sk) void rose_kill_by_neigh(struct rose_neigh *neigh) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&rose_list_lock); - sk_for_each(s, node, &rose_list) { + sk_for_each(s, &rose_list) { struct rose_sock *rose = rose_sk(s); if (rose->neighbour == neigh) { @@ -186,10 +185,9 @@ void rose_kill_by_neigh(struct rose_neigh *neigh) static void rose_kill_by_device(struct net_device *dev) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&rose_list_lock); - sk_for_each(s, node, &rose_list) { + sk_for_each(s, &rose_list) { struct rose_sock *rose = rose_sk(s); if (rose->device == dev) { @@ -246,10 +244,9 @@ static void rose_insert_socket(struct sock *sk) static struct sock *rose_find_listener(rose_address *addr, ax25_address *call) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&rose_list_lock); - sk_for_each(s, node, &rose_list) { + sk_for_each(s, &rose_list) { struct rose_sock *rose = rose_sk(s); if (!rosecmp(&rose->source_addr, addr) && @@ -258,7 +255,7 @@ static struct sock *rose_find_listener(rose_address *addr, ax25_address *call) goto found; } - sk_for_each(s, node, &rose_list) { + sk_for_each(s, &rose_list) { struct rose_sock *rose = rose_sk(s); if (!rosecmp(&rose->source_addr, addr) && @@ -278,10 +275,9 @@ found: struct sock *rose_find_socket(unsigned int lci, struct rose_neigh *neigh) { struct sock *s; - struct hlist_node *node; spin_lock_bh(&rose_list_lock); - sk_for_each(s, node, &rose_list) { + sk_for_each(s, &rose_list) { struct rose_sock *rose = rose_sk(s); if (rose->lci == lci && rose->neighbour == neigh) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index a181b484812a..c297e2a8e2a1 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -545,7 +545,7 @@ static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n) void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) { struct Qdisc_class_common *cl; - struct hlist_node *n, *next; + struct hlist_node *next; struct hlist_head *nhash, *ohash; unsigned int nsize, nmask, osize; unsigned int i, h; @@ -564,7 +564,7 @@ void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) sch_tree_lock(sch); for (i = 0; i < osize; i++) { - hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) { + hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) { h = qdisc_class_hash(cl->classid, nmask); hlist_add_head(&cl->hnode, &nhash[h]); } diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 0e19948470b8..13aa47aa2ffb 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1041,14 +1041,13 @@ static void cbq_adjust_levels(struct cbq_class *this) static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) { struct cbq_class *cl; - struct hlist_node *n; unsigned int h; if (q->quanta[prio] == 0) return; for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { /* BUGGGG... Beware! This expression suffer of * arithmetic overflows! */ @@ -1087,10 +1086,9 @@ static void cbq_sync_defmap(struct cbq_class *cl) continue; for (h = 0; h < q->clhash.hashsize; h++) { - struct hlist_node *n; struct cbq_class *c; - hlist_for_each_entry(c, n, &q->clhash.hash[h], + hlist_for_each_entry(c, &q->clhash.hash[h], common.hnode) { if (c->split == split && c->level < level && c->defmap & (1<<i)) { @@ -1210,7 +1208,6 @@ cbq_reset(struct Qdisc *sch) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl; - struct hlist_node *n; int prio; unsigned int h; @@ -1228,7 +1225,7 @@ cbq_reset(struct Qdisc *sch) q->active[prio] = NULL; for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { qdisc_reset(cl->q); cl->next_alive = NULL; @@ -1697,7 +1694,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) static void cbq_destroy(struct Qdisc *sch) { struct cbq_sched_data *q = qdisc_priv(sch); - struct hlist_node *n, *next; + struct hlist_node *next; struct cbq_class *cl; unsigned int h; @@ -1710,11 +1707,11 @@ static void cbq_destroy(struct Qdisc *sch) * be bound to classes which have been destroyed already. --TGR '04 */ for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) tcf_destroy_chain(&cl->filter_list); } for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h], + hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h], common.hnode) cbq_destroy_class(sch, cl); } @@ -2013,14 +2010,13 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl; - struct hlist_node *n; unsigned int h; if (arg->stop) return; for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { if (arg->count < arg->skip) { arg->count++; continue; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 71e50c80315f..759b308d1a8d 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -293,14 +293,13 @@ static void drr_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl; - struct hlist_node *n; unsigned int i; if (arg->stop) return; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (arg->count < arg->skip) { arg->count++; continue; @@ -451,11 +450,10 @@ static void drr_reset_qdisc(struct Qdisc *sch) { struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl; - struct hlist_node *n; unsigned int i; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (cl->qdisc->q.qlen) list_del(&cl->alist); qdisc_reset(cl->qdisc); @@ -468,13 +466,13 @@ static void drr_destroy_qdisc(struct Qdisc *sch) { struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl; - struct hlist_node *n, *next; + struct hlist_node *next; unsigned int i; tcf_destroy_chain(&q->filter_list); for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], common.hnode) drr_destroy_class(sch, cl); } diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 6c2ec4510540..9facea03faeb 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1389,7 +1389,6 @@ static void hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct hfsc_sched *q = qdisc_priv(sch); - struct hlist_node *n; struct hfsc_class *cl; unsigned int i; @@ -1397,7 +1396,7 @@ hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], + hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) { if (arg->count < arg->skip) { arg->count++; @@ -1523,11 +1522,10 @@ hfsc_reset_qdisc(struct Qdisc *sch) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl; - struct hlist_node *n; unsigned int i; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) hfsc_reset_class(cl); } q->eligible = RB_ROOT; @@ -1540,16 +1538,16 @@ static void hfsc_destroy_qdisc(struct Qdisc *sch) { struct hfsc_sched *q = qdisc_priv(sch); - struct hlist_node *n, *next; + struct hlist_node *next; struct hfsc_class *cl; unsigned int i; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) tcf_destroy_chain(&cl->filter_list); } for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], cl_common.hnode) hfsc_destroy_class(sch, cl); } @@ -1564,12 +1562,11 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) unsigned char *b = skb_tail_pointer(skb); struct tc_hfsc_qopt qopt; struct hfsc_class *cl; - struct hlist_node *n; unsigned int i; sch->qstats.backlog = 0; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) sch->qstats.backlog += cl->qdisc->qstats.backlog; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 03c2692ca01e..571f1d211f4d 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -949,11 +949,10 @@ static void htb_reset(struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl; - struct hlist_node *n; unsigned int i; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (cl->level) memset(&cl->un.inner, 0, sizeof(cl->un.inner)); else { @@ -1218,7 +1217,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) static void htb_destroy(struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); - struct hlist_node *n, *next; + struct hlist_node *next; struct htb_class *cl; unsigned int i; @@ -1232,11 +1231,11 @@ static void htb_destroy(struct Qdisc *sch) tcf_destroy_chain(&q->filter_list); for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) tcf_destroy_chain(&cl->filter_list); } for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], common.hnode) htb_destroy_class(sch, cl); } @@ -1516,14 +1515,13 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl; - struct hlist_node *n; unsigned int i; if (arg->stop) return; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (arg->count < arg->skip) { arg->count++; continue; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 6ed37652a4c3..e9a77f621c3d 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -276,9 +276,8 @@ static struct qfq_aggregate *qfq_find_agg(struct qfq_sched *q, u32 lmax, u32 weight) { struct qfq_aggregate *agg; - struct hlist_node *n; - hlist_for_each_entry(agg, n, &q->nonfull_aggs, nonfull_next) + hlist_for_each_entry(agg, &q->nonfull_aggs, nonfull_next) if (agg->lmax == lmax && agg->class_weight == weight) return agg; @@ -670,14 +669,13 @@ static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl; - struct hlist_node *n; unsigned int i; if (arg->stop) return; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (arg->count < arg->skip) { arg->count++; continue; @@ -1376,11 +1374,10 @@ static unsigned int qfq_drop_from_slot(struct qfq_sched *q, struct hlist_head *slot) { struct qfq_aggregate *agg; - struct hlist_node *n; struct qfq_class *cl; unsigned int len; - hlist_for_each_entry(agg, n, slot, next) { + hlist_for_each_entry(agg, slot, next) { list_for_each_entry(cl, &agg->active, alist) { if (!cl->qdisc->ops->drop) @@ -1459,11 +1456,10 @@ static void qfq_reset_qdisc(struct Qdisc *sch) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl; - struct hlist_node *n; unsigned int i; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (cl->qdisc->q.qlen > 0) qfq_deactivate_class(q, cl); @@ -1477,13 +1473,13 @@ static void qfq_destroy_qdisc(struct Qdisc *sch) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl; - struct hlist_node *n, *next; + struct hlist_node *next; unsigned int i; tcf_destroy_chain(&q->filter_list); for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], common.hnode) { qfq_destroy_class(sch, cl); } diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 2f95f5a5145d..43cd0dd9149d 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1591,32 +1591,31 @@ int sctp_assoc_lookup_laddr(struct sctp_association *asoc, /* Set an association id for a given association */ int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp) { - int assoc_id; - int error = 0; + bool preload = gfp & __GFP_WAIT; + int ret; /* If the id is already assigned, keep it. */ if (asoc->assoc_id) - return error; -retry: - if (unlikely(!idr_pre_get(&sctp_assocs_id, gfp))) - return -ENOMEM; + return 0; + if (preload) + idr_preload(gfp); spin_lock_bh(&sctp_assocs_id_lock); - error = idr_get_new_above(&sctp_assocs_id, (void *)asoc, - idr_low, &assoc_id); - if (!error) { - idr_low = assoc_id + 1; + /* 0 is not a valid id, idr_low is always >= 1 */ + ret = idr_alloc(&sctp_assocs_id, asoc, idr_low, 0, GFP_NOWAIT); + if (ret >= 0) { + idr_low = ret + 1; if (idr_low == INT_MAX) idr_low = 1; } spin_unlock_bh(&sctp_assocs_id_lock); - if (error == -EAGAIN) - goto retry; - else if (error) - return error; + if (preload) + idr_preload_end(); + if (ret < 0) + return ret; - asoc->assoc_id = (sctp_assoc_t) assoc_id; - return error; + asoc->assoc_id = (sctp_assoc_t)ret; + return 0; } /* Free the ASCONF queue */ diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 73aad3d16a45..2b3ef03c6098 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -332,7 +332,6 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc( struct sctp_transport *t = NULL; struct sctp_hashbucket *head; struct sctp_ep_common *epb; - struct hlist_node *node; int hash; int rport; @@ -350,7 +349,7 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc( rport); head = &sctp_assoc_hashtable[hash]; read_lock(&head->lock); - sctp_for_each_hentry(epb, node, &head->chain) { + sctp_for_each_hentry(epb, &head->chain) { tmp = sctp_assoc(epb); if (tmp->ep != ep || rport != tmp->peer.port) continue; diff --git a/net/sctp/input.c b/net/sctp/input.c index 965bbbbe48d4..4b2c83146aa7 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -784,13 +784,12 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net, struct sctp_hashbucket *head; struct sctp_ep_common *epb; struct sctp_endpoint *ep; - struct hlist_node *node; int hash; hash = sctp_ep_hashfn(net, ntohs(laddr->v4.sin_port)); head = &sctp_ep_hashtable[hash]; read_lock(&head->lock); - sctp_for_each_hentry(epb, node, &head->chain) { + sctp_for_each_hentry(epb, &head->chain) { ep = sctp_ep(epb); if (sctp_endpoint_is_match(ep, net, laddr)) goto hit; @@ -876,7 +875,6 @@ static struct sctp_association *__sctp_lookup_association( struct sctp_ep_common *epb; struct sctp_association *asoc; struct sctp_transport *transport; - struct hlist_node *node; int hash; /* Optimize here for direct hit, only listening connections can @@ -886,7 +884,7 @@ static struct sctp_association *__sctp_lookup_association( ntohs(peer->v4.sin_port)); head = &sctp_assoc_hashtable[hash]; read_lock(&head->lock); - sctp_for_each_hentry(epb, node, &head->chain) { + sctp_for_each_hentry(epb, &head->chain) { asoc = sctp_assoc(epb); transport = sctp_assoc_is_match(asoc, net, local, peer); if (transport) diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 8c19e97262ca..ab3bba8cb0a8 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -213,7 +213,6 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) struct sctp_ep_common *epb; struct sctp_endpoint *ep; struct sock *sk; - struct hlist_node *node; int hash = *(loff_t *)v; if (hash >= sctp_ep_hashsize) @@ -222,7 +221,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) head = &sctp_ep_hashtable[hash]; sctp_local_bh_disable(); read_lock(&head->lock); - sctp_for_each_hentry(epb, node, &head->chain) { + sctp_for_each_hentry(epb, &head->chain) { ep = sctp_ep(epb); sk = epb->sk; if (!net_eq(sock_net(sk), seq_file_net(seq))) @@ -321,7 +320,6 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) struct sctp_ep_common *epb; struct sctp_association *assoc; struct sock *sk; - struct hlist_node *node; int hash = *(loff_t *)v; if (hash >= sctp_assoc_hashsize) @@ -330,7 +328,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) head = &sctp_assoc_hashtable[hash]; sctp_local_bh_disable(); read_lock(&head->lock); - sctp_for_each_hentry(epb, node, &head->chain) { + sctp_for_each_hentry(epb, &head->chain) { assoc = sctp_assoc(epb); sk = epb->sk; if (!net_eq(sock_net(sk), seq_file_net(seq))) @@ -436,7 +434,6 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) struct sctp_hashbucket *head; struct sctp_ep_common *epb; struct sctp_association *assoc; - struct hlist_node *node; struct sctp_transport *tsp; int hash = *(loff_t *)v; @@ -447,7 +444,7 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) sctp_local_bh_disable(); read_lock(&head->lock); rcu_read_lock(); - sctp_for_each_hentry(epb, node, &head->chain) { + sctp_for_each_hentry(epb, &head->chain) { if (!net_eq(sock_net(epb->sk), seq_file_net(seq))) continue; assoc = sctp_assoc(epb); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index cedd9bf67b8c..c99458df3f3f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5882,8 +5882,7 @@ static struct sctp_bind_bucket *sctp_bucket_create( static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) { struct sctp_bind_hashbucket *head; /* hash list */ - struct sctp_bind_bucket *pp; /* hash list port iterator */ - struct hlist_node *node; + struct sctp_bind_bucket *pp; unsigned short snum; int ret; @@ -5910,7 +5909,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) index = sctp_phashfn(sock_net(sk), rover); head = &sctp_port_hashtable[index]; sctp_spin_lock(&head->lock); - sctp_for_each_hentry(pp, node, &head->chain) + sctp_for_each_hentry(pp, &head->chain) if ((pp->port == rover) && net_eq(sock_net(sk), pp->net)) goto next; @@ -5938,7 +5937,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) */ head = &sctp_port_hashtable[sctp_phashfn(sock_net(sk), snum)]; sctp_spin_lock(&head->lock); - sctp_for_each_hentry(pp, node, &head->chain) { + sctp_for_each_hentry(pp, &head->chain) { if ((pp->port == snum) && net_eq(pp->net, sock_net(sk))) goto pp_found; } @@ -5970,7 +5969,7 @@ pp_found: * that this port/socket (sk) combination are already * in an endpoint. */ - sk_for_each_bound(sk2, node, &pp->owner) { + sk_for_each_bound(sk2, &pp->owner) { struct sctp_endpoint *ep2; ep2 = sctp_sk(sk2)->ep; diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index d11418f97f1f..a622ad64acd8 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -17,7 +17,8 @@ */ #include <net/ipv6.h> -#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> +#include <linux/sunrpc/msg_prot.h> #include <linux/slab.h> #include <linux/export.h> diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 392adc41e2e5..f5294047df77 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -407,7 +407,6 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, { LIST_HEAD(free); struct rpc_cred_cache *cache = auth->au_credcache; - struct hlist_node *pos; struct rpc_cred *cred = NULL, *entry, *new; unsigned int nr; @@ -415,7 +414,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, nr = hash_long(from_kuid(&init_user_ns, acred->uid), cache->hashbits); rcu_read_lock(); - hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) { + hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) { if (!entry->cr_ops->crmatch(acred, entry, flags)) continue; spin_lock(&cache->lock); @@ -439,7 +438,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, } spin_lock(&cache->lock); - hlist_for_each_entry(entry, pos, &cache->hashtable[nr], cr_hash) { + hlist_for_each_entry(entry, &cache->hashtable[nr], cr_hash) { if (!entry->cr_ops->crmatch(acred, entry, flags)) continue; cred = get_rpccred(entry); diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 107c4528654f..88edec929d73 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -574,6 +574,8 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip; buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip; + /* Trim off the checksum blob */ + xdr_buf_trim(buf, GSS_KRB5_TOK_HDR_LEN + tailskip); return GSS_S_COMPLETE; } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index ecd1d58bf611..f7d34e7b6f81 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -182,12 +182,6 @@ static void rsi_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int rsi_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, rsi_request); -} - - static int rsi_parse(struct cache_detail *cd, char *mesg, int mlen) { @@ -275,7 +269,7 @@ static struct cache_detail rsi_cache_template = { .hash_size = RSI_HASHMAX, .name = "auth.rpcsec.init", .cache_put = rsi_put, - .cache_upcall = rsi_upcall, + .cache_request = rsi_request, .cache_parse = rsi_parse, .match = rsi_match, .init = rsi_init, @@ -825,13 +819,17 @@ read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj) * The server uses base of head iovec as read pointer, while the * client uses separate pointer. */ static int -unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) +unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) { int stat = -EINVAL; u32 integ_len, maj_stat; struct xdr_netobj mic; struct xdr_buf integ_buf; + /* Did we already verify the signature on the original pass through? */ + if (rqstp->rq_deferred) + return 0; + integ_len = svc_getnl(&buf->head[0]); if (integ_len & 3) return stat; @@ -854,6 +852,8 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) goto out; if (svc_getnl(&buf->head[0]) != seq) goto out; + /* trim off the mic at the end before returning */ + xdr_buf_trim(buf, mic.len + 4); stat = 0; out: kfree(mic.data); @@ -1198,7 +1198,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) /* placeholders for length and seq. number: */ svc_putnl(resv, 0); svc_putnl(resv, 0); - if (unwrap_integ_data(&rqstp->rq_arg, + if (unwrap_integ_data(rqstp, &rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) goto garbage_args; break; diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index f3897d10f649..25d58e766014 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -196,9 +196,9 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_update); static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h) { - if (!cd->cache_upcall) - return -EINVAL; - return cd->cache_upcall(cd, h); + if (cd->cache_upcall) + return cd->cache_upcall(cd, h); + return sunrpc_cache_pipe_upcall(cd, h); } static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h) @@ -670,13 +670,13 @@ static void cache_revisit_request(struct cache_head *item) { struct cache_deferred_req *dreq; struct list_head pending; - struct hlist_node *lp, *tmp; + struct hlist_node *tmp; int hash = DFR_HASH(item); INIT_LIST_HEAD(&pending); spin_lock(&cache_defer_lock); - hlist_for_each_entry_safe(dreq, lp, tmp, &cache_defer_hash[hash], hash) + hlist_for_each_entry_safe(dreq, tmp, &cache_defer_hash[hash], hash) if (dreq->item == item) { __unhash_deferred_req(dreq); list_add(&dreq->recent, &pending); @@ -750,6 +750,18 @@ struct cache_reader { int offset; /* if non-0, we have a refcnt on next request */ }; +static int cache_request(struct cache_detail *detail, + struct cache_request *crq) +{ + char *bp = crq->buf; + int len = PAGE_SIZE; + + detail->cache_request(detail, crq->item, &bp, &len); + if (len < 0) + return -EAGAIN; + return PAGE_SIZE - len; +} + static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos, struct cache_detail *cd) { @@ -784,6 +796,13 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, rq->readers++; spin_unlock(&queue_lock); + if (rq->len == 0) { + err = cache_request(cd, rq); + if (err < 0) + goto out; + rq->len = err; + } + if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) { err = -EAGAIN; spin_lock(&queue_lock); @@ -1140,17 +1159,14 @@ static bool cache_listeners_exist(struct cache_detail *detail) * * Each request is at most one page long. */ -int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, - void (*cache_request)(struct cache_detail *, - struct cache_head *, - char **, - int *)) +int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) { char *buf; struct cache_request *crq; - char *bp; - int len; + + if (!detail->cache_request) + return -EINVAL; if (!cache_listeners_exist(detail)) { warn_no_listener(detail); @@ -1167,19 +1183,10 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, return -EAGAIN; } - bp = buf; len = PAGE_SIZE; - - cache_request(detail, h, &bp, &len); - - if (len < 0) { - kfree(buf); - kfree(crq); - return -EAGAIN; - } crq->q.reader = 0; crq->item = cache_get(h); crq->buf = buf; - crq->len = PAGE_SIZE - len; + crq->len = 0; crq->readers = 0; spin_lock(&queue_lock); list_add_tail(&crq->q.list, &detail->queue); @@ -1605,7 +1612,7 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) if (p == NULL) goto out_nomem; - if (cd->cache_upcall || cd->cache_parse) { + if (cd->cache_request || cd->cache_parse) { p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, cd->u.procfs.proc_ent, &cache_file_operations_procfs, cd); @@ -1614,7 +1621,7 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) goto out_nomem; } if (cd->cache_show) { - p = proc_create_data("content", S_IFREG|S_IRUSR|S_IWUSR, + p = proc_create_data("content", S_IFREG|S_IRUSR, cd->u.procfs.proc_ent, &content_file_operations_procfs, cd); cd->u.procfs.content_ent = p; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a9f7906c1a6a..d7a369e61085 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -33,6 +33,7 @@ #include <linux/rcupdate.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/sunrpc/metrics.h> #include <linux/sunrpc/bc_xprt.h> diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 795a0f4e920b..3df764dc330c 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -26,6 +26,7 @@ #include <net/ipv6.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/sched.h> #include <linux/sunrpc/xprtsock.h> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index dbf12ac5ecb7..89a588b4478b 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -515,15 +515,6 @@ EXPORT_SYMBOL_GPL(svc_create_pooled); void svc_shutdown_net(struct svc_serv *serv, struct net *net) { - /* - * The set of xprts (contained in the sv_tempsocks and - * sv_permsocks lists) is now constant, since it is modified - * only by accepting new sockets (done by service threads in - * svc_recv) or aging old ones (done by sv_temptimer), or - * configuration changes (excluded by whatever locking the - * caller is using--nfsd_mutex in the case of nfsd). So it's - * safe to traverse those lists and shut everything down: - */ svc_close_net(serv, net); if (serv->sv_shutdown) @@ -1042,6 +1033,7 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net) /* * dprintk the given error with the address of the client that caused it. */ +#ifdef RPC_DEBUG static __printf(2, 3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) { @@ -1058,6 +1050,9 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) va_end(args); } +#else +static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {} +#endif /* * Common routine for processing the RPC request. diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index b8e47fac7315..80a6640f329b 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -499,7 +499,8 @@ void svc_wake_up(struct svc_serv *serv) rqstp->rq_xprt = NULL; */ wake_up(&rqstp->rq_wait); - } + } else + pool->sp_task_pending = 1; spin_unlock_bh(&pool->sp_lock); } } @@ -634,7 +635,13 @@ struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) * long for cache updates. */ rqstp->rq_chandle.thread_wait = 1*HZ; + pool->sp_task_pending = 0; } else { + if (pool->sp_task_pending) { + pool->sp_task_pending = 0; + spin_unlock_bh(&pool->sp_lock); + return ERR_PTR(-EAGAIN); + } /* No data pending. Go to sleep */ svc_thread_enqueue(pool, rqstp); @@ -856,7 +863,6 @@ static void svc_age_temp_xprts(unsigned long closure) struct svc_serv *serv = (struct svc_serv *)closure; struct svc_xprt *xprt; struct list_head *le, *next; - LIST_HEAD(to_be_aged); dprintk("svc_age_temp_xprts\n"); @@ -877,25 +883,15 @@ static void svc_age_temp_xprts(unsigned long closure) if (atomic_read(&xprt->xpt_ref.refcount) > 1 || test_bit(XPT_BUSY, &xprt->xpt_flags)) continue; - svc_xprt_get(xprt); - list_move(le, &to_be_aged); + list_del_init(le); set_bit(XPT_CLOSE, &xprt->xpt_flags); set_bit(XPT_DETACHED, &xprt->xpt_flags); - } - spin_unlock_bh(&serv->sv_lock); - - while (!list_empty(&to_be_aged)) { - le = to_be_aged.next; - /* fiddling the xpt_list node is safe 'cos we're XPT_DETACHED */ - list_del_init(le); - xprt = list_entry(le, struct svc_xprt, xpt_list); - dprintk("queuing xprt %p for closing\n", xprt); /* a thread will dequeue and close it soon */ svc_xprt_enqueue(xprt); - svc_xprt_put(xprt); } + spin_unlock_bh(&serv->sv_lock); mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); } @@ -959,21 +955,24 @@ void svc_close_xprt(struct svc_xprt *xprt) } EXPORT_SYMBOL_GPL(svc_close_xprt); -static void svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net) +static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net) { struct svc_xprt *xprt; + int ret = 0; spin_lock(&serv->sv_lock); list_for_each_entry(xprt, xprt_list, xpt_list) { if (xprt->xpt_net != net) continue; + ret++; set_bit(XPT_CLOSE, &xprt->xpt_flags); - set_bit(XPT_BUSY, &xprt->xpt_flags); + svc_xprt_enqueue(xprt); } spin_unlock(&serv->sv_lock); + return ret; } -static void svc_clear_pools(struct svc_serv *serv, struct net *net) +static struct svc_xprt *svc_dequeue_net(struct svc_serv *serv, struct net *net) { struct svc_pool *pool; struct svc_xprt *xprt; @@ -988,42 +987,46 @@ static void svc_clear_pools(struct svc_serv *serv, struct net *net) if (xprt->xpt_net != net) continue; list_del_init(&xprt->xpt_ready); + spin_unlock_bh(&pool->sp_lock); + return xprt; } spin_unlock_bh(&pool->sp_lock); } + return NULL; } -static void svc_clear_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net) +static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net) { struct svc_xprt *xprt; - struct svc_xprt *tmp; - LIST_HEAD(victims); - spin_lock(&serv->sv_lock); - list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { - if (xprt->xpt_net != net) - continue; - list_move(&xprt->xpt_list, &victims); - } - spin_unlock(&serv->sv_lock); - - list_for_each_entry_safe(xprt, tmp, &victims, xpt_list) + while ((xprt = svc_dequeue_net(serv, net))) { + set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_delete_xprt(xprt); + } } +/* + * Server threads may still be running (especially in the case where the + * service is still running in other network namespaces). + * + * So we shut down sockets the same way we would on a running server, by + * setting XPT_CLOSE, enqueuing, and letting a thread pick it up to do + * the close. In the case there are no such other threads, + * threads running, svc_clean_up_xprts() does a simple version of a + * server's main event loop, and in the case where there are other + * threads, we may need to wait a little while and then check again to + * see if they're done. + */ void svc_close_net(struct svc_serv *serv, struct net *net) { - svc_close_list(serv, &serv->sv_tempsocks, net); - svc_close_list(serv, &serv->sv_permsocks, net); + int delay = 0; - svc_clear_pools(serv, net); - /* - * At this point the sp_sockets lists will stay empty, since - * svc_xprt_enqueue will not add new entries without taking the - * sp_lock and checking XPT_BUSY. - */ - svc_clear_list(serv, &serv->sv_tempsocks, net); - svc_clear_list(serv, &serv->sv_permsocks, net); + while (svc_close_list(serv, &serv->sv_permsocks, net) + + svc_close_list(serv, &serv->sv_tempsocks, net)) { + + svc_clean_up_xprts(serv, net); + msleep(delay++); + } } /* diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 7963569fc04f..2af7b0cba43a 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -138,13 +138,12 @@ auth_domain_lookup(char *name, struct auth_domain *new) { struct auth_domain *hp; struct hlist_head *head; - struct hlist_node *np; head = &auth_domain_table[hash_str(name, DN_HASHBITS)]; spin_lock(&auth_domain_lock); - hlist_for_each_entry(hp, np, head, hash) { + hlist_for_each_entry(hp, head, hash) { if (strcmp(hp->name, name)==0) { kref_get(&hp->ref); spin_unlock(&auth_domain_lock); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index a1852e19ed0c..c3f9e1ef7f53 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -6,6 +6,7 @@ #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/svcauth.h> #include <linux/sunrpc/gss_api.h> +#include <linux/sunrpc/addr.h> #include <linux/err.h> #include <linux/seq_file.h> #include <linux/hash.h> @@ -17,7 +18,6 @@ #include <linux/user_namespace.h> #define RPCDBG_FACILITY RPCDBG_AUTH -#include <linux/sunrpc/clnt.h> #include "netns.h" @@ -157,11 +157,6 @@ static void ip_map_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, ip_map_request); -} - static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct in6_addr *addr); static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, struct unix_domain *udom, time_t expiry); @@ -475,11 +470,6 @@ static void unix_gid_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, unix_gid_request); -} - static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, kuid_t uid); static int unix_gid_parse(struct cache_detail *cd, @@ -586,7 +576,7 @@ static struct cache_detail unix_gid_cache_template = { .hash_size = GID_HASHMAX, .name = "auth.unix.gid", .cache_put = unix_gid_put, - .cache_upcall = unix_gid_upcall, + .cache_request = unix_gid_request, .cache_parse = unix_gid_parse, .cache_show = unix_gid_show, .match = unix_gid_match, @@ -885,7 +875,7 @@ static struct cache_detail ip_map_cache_template = { .hash_size = IP_HASHMAX, .name = "auth.unix.ip", .cache_put = ip_map_put, - .cache_upcall = ip_map_upcall, + .cache_request = ip_map_request, .cache_parse = ip_map_parse, .cache_show = ip_map_show, .match = ip_map_match, diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 56055632f151..75edcfad6e26 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -879,6 +879,47 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, } EXPORT_SYMBOL_GPL(xdr_buf_subsegment); +/** + * xdr_buf_trim - lop at most "len" bytes off the end of "buf" + * @buf: buf to be trimmed + * @len: number of bytes to reduce "buf" by + * + * Trim an xdr_buf by the given number of bytes by fixing up the lengths. Note + * that it's possible that we'll trim less than that amount if the xdr_buf is + * too small, or if (for instance) it's all in the head and the parser has + * already read too far into it. + */ +void xdr_buf_trim(struct xdr_buf *buf, unsigned int len) +{ + size_t cur; + unsigned int trim = len; + + if (buf->tail[0].iov_len) { + cur = min_t(size_t, buf->tail[0].iov_len, trim); + buf->tail[0].iov_len -= cur; + trim -= cur; + if (!trim) + goto fix_len; + } + + if (buf->page_len) { + cur = min_t(unsigned int, buf->page_len, trim); + buf->page_len -= cur; + trim -= cur; + if (!trim) + goto fix_len; + } + + if (buf->head[0].iov_len) { + cur = min_t(size_t, buf->head[0].iov_len, trim); + buf->head[0].iov_len -= cur; + trim -= cur; + } +fix_len: + buf->len -= (len - trim); +} +EXPORT_SYMBOL_GPL(xdr_buf_trim); + static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len) { unsigned int this_len; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index d0074289708e..794312f22b9b 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -51,6 +51,7 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/seq_file.h> +#include <linux/sunrpc/addr.h> #include "xprt_rdma.h" diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 37cbda63f45c..c1d8476b7692 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -33,6 +33,7 @@ #include <linux/udp.h> #include <linux/tcp.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/sched.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/xprtsock.h> @@ -1867,13 +1868,9 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, * @xprt: RPC transport to connect * @transport: socket transport to connect * @create_sock: function to create a socket of the correct type - * - * Invoked by a work queue tasklet. */ -static void xs_local_setup_socket(struct work_struct *work) +static int xs_local_setup_socket(struct sock_xprt *transport) { - struct sock_xprt *transport = - container_of(work, struct sock_xprt, connect_worker.work); struct rpc_xprt *xprt = &transport->xprt; struct socket *sock; int status = -EIO; @@ -1918,6 +1915,30 @@ out: xprt_clear_connecting(xprt); xprt_wake_pending_tasks(xprt, status); current->flags &= ~PF_FSTRANS; + return status; +} + +static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + int ret; + + if (RPC_IS_ASYNC(task)) { + /* + * We want the AF_LOCAL connect to be resolved in the + * filesystem namespace of the process making the rpc + * call. Thus we connect synchronously. + * + * If we want to support asynchronous AF_LOCAL calls, + * we'll need to figure out how to pass a namespace to + * connect. + */ + rpc_exit(task, -ENOTCONN); + return; + } + ret = xs_local_setup_socket(transport); + if (ret && !RPC_IS_SOFTCONN(task)) + msleep_interruptible(15000); } #ifdef CONFIG_SUNRPC_SWAP @@ -2455,7 +2476,7 @@ static struct rpc_xprt_ops xs_local_ops = { .alloc_slot = xprt_alloc_slot, .rpcbind = xs_local_rpcbind, .set_port = xs_local_set_port, - .connect = xs_connect, + .connect = xs_local_connect, .buf_alloc = rpc_malloc, .buf_free = rpc_free, .send_request = xs_local_send_request, @@ -2628,8 +2649,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) goto out_err; } xprt_set_bound(xprt); - INIT_DELAYED_WORK(&transport->connect_worker, - xs_local_setup_socket); xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL); break; default: diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 46754779fd3d..24b167914311 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -473,11 +473,10 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, static struct name_seq *nametbl_find_seq(u32 type) { struct hlist_head *seq_head; - struct hlist_node *seq_node; struct name_seq *ns; seq_head = &table.types[hash(type)]; - hlist_for_each_entry(ns, seq_node, seq_head, ns_list) { + hlist_for_each_entry(ns, seq_head, ns_list) { if (ns->type == type) return ns; } @@ -853,7 +852,6 @@ static int nametbl_list(char *buf, int len, u32 depth_info, u32 type, u32 lowbound, u32 upbound) { struct hlist_head *seq_head; - struct hlist_node *seq_node; struct name_seq *seq; int all_types; int ret = 0; @@ -873,7 +871,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info, upbound = ~0; for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { seq_head = &table.types[i]; - hlist_for_each_entry(seq, seq_node, seq_head, ns_list) { + hlist_for_each_entry(seq, seq_head, ns_list) { ret += nameseq_list(seq, buf + ret, len - ret, depth, seq->type, lowbound, upbound, i); @@ -889,7 +887,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info, ret += nametbl_header(buf + ret, len - ret, depth); i = hash(type); seq_head = &table.types[i]; - hlist_for_each_entry(seq, seq_node, seq_head, ns_list) { + hlist_for_each_entry(seq, seq_head, ns_list) { if (seq->type == type) { ret += nameseq_list(seq, buf + ret, len - ret, depth, type, diff --git a/net/tipc/node.c b/net/tipc/node.c index 48f39dd3eae8..6e6c434872e8 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -69,12 +69,11 @@ static unsigned int tipc_hashfn(u32 addr) struct tipc_node *tipc_node_find(u32 addr) { struct tipc_node *node; - struct hlist_node *pos; if (unlikely(!in_own_cluster_exact(addr))) return NULL; - hlist_for_each_entry(node, pos, &node_htable[tipc_hashfn(addr)], hash) { + hlist_for_each_entry(node, &node_htable[tipc_hashfn(addr)], hash) { if (node->addr == addr) return node; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 87d284289012..51be64f163ec 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -263,9 +263,8 @@ static struct sock *__unix_find_socket_byname(struct net *net, int len, int type, unsigned int hash) { struct sock *s; - struct hlist_node *node; - sk_for_each(s, node, &unix_socket_table[hash ^ type]) { + sk_for_each(s, &unix_socket_table[hash ^ type]) { struct unix_sock *u = unix_sk(s); if (!net_eq(sock_net(s), net)) @@ -298,10 +297,9 @@ static inline struct sock *unix_find_socket_byname(struct net *net, static struct sock *unix_find_socket_byinode(struct inode *i) { struct sock *s; - struct hlist_node *node; spin_lock(&unix_table_lock); - sk_for_each(s, node, + sk_for_each(s, &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->path.dentry; diff --git a/net/unix/diag.c b/net/unix/diag.c index 5ac19dc1d5e4..d591091603bf 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -192,10 +192,9 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) slot < ARRAY_SIZE(unix_socket_table); s_num = 0, slot++) { struct sock *sk; - struct hlist_node *node; num = 0; - sk_for_each(sk, node, &unix_socket_table[slot]) { + sk_for_each(sk, &unix_socket_table[slot]) { if (!net_eq(sock_net(sk), net)) continue; if (num < s_num) @@ -226,9 +225,7 @@ static struct sock *unix_lookup_by_ino(int ino) spin_lock(&unix_table_lock); for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) { - struct hlist_node *node; - - sk_for_each(sk, node, &unix_socket_table[i]) + sk_for_each(sk, &unix_socket_table[i]) if (ino == sock_i_ino(sk)) { sock_hold(sk); spin_unlock(&unix_table_lock); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index a306bc66000e..37ca9694aabe 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -208,11 +208,10 @@ static void x25_remove_socket(struct sock *sk) static void x25_kill_by_device(struct net_device *dev) { struct sock *s; - struct hlist_node *node; write_lock_bh(&x25_list_lock); - sk_for_each(s, node, &x25_list) + sk_for_each(s, &x25_list) if (x25_sk(s)->neighbour && x25_sk(s)->neighbour->dev == dev) x25_disconnect(s, ENETUNREACH, 0, 0); @@ -280,12 +279,11 @@ static struct sock *x25_find_listener(struct x25_address *addr, { struct sock *s; struct sock *next_best; - struct hlist_node *node; read_lock_bh(&x25_list_lock); next_best = NULL; - sk_for_each(s, node, &x25_list) + sk_for_each(s, &x25_list) if ((!strcmp(addr->x25_addr, x25_sk(s)->source_addr.x25_addr) || !strcmp(addr->x25_addr, @@ -323,9 +321,8 @@ found: static struct sock *__x25_find_socket(unsigned int lci, struct x25_neigh *nb) { struct sock *s; - struct hlist_node *node; - sk_for_each(s, node, &x25_list) + sk_for_each(s, &x25_list) if (x25_sk(s)->lci == lci && x25_sk(s)->neighbour == nb) { sock_hold(s); goto found; @@ -1782,11 +1779,10 @@ static struct notifier_block x25_dev_notifier = { void x25_kill_by_neigh(struct x25_neigh *nb) { struct sock *s; - struct hlist_node *node; write_lock_bh(&x25_list_lock); - sk_for_each(s, node, &x25_list) + sk_for_each(s, &x25_list) if (x25_sk(s)->neighbour == nb) x25_disconnect(s, ENETUNREACH, 0, 0); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 5b47180986f8..167c67d46c6a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -379,27 +379,27 @@ static void xfrm_dst_hash_transfer(struct hlist_head *list, struct hlist_head *ndsttable, unsigned int nhashmask) { - struct hlist_node *entry, *tmp, *entry0 = NULL; + struct hlist_node *tmp, *entry0 = NULL; struct xfrm_policy *pol; unsigned int h0 = 0; redo: - hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) { + hlist_for_each_entry_safe(pol, tmp, list, bydst) { unsigned int h; h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, pol->family, nhashmask); if (!entry0) { - hlist_del(entry); + hlist_del(&pol->bydst); hlist_add_head(&pol->bydst, ndsttable+h); h0 = h; } else { if (h != h0) continue; - hlist_del(entry); + hlist_del(&pol->bydst); hlist_add_after(entry0, &pol->bydst); } - entry0 = entry; + entry0 = &pol->bydst; } if (!hlist_empty(list)) { entry0 = NULL; @@ -411,10 +411,10 @@ static void xfrm_idx_hash_transfer(struct hlist_head *list, struct hlist_head *nidxtable, unsigned int nhashmask) { - struct hlist_node *entry, *tmp; + struct hlist_node *tmp; struct xfrm_policy *pol; - hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) { + hlist_for_each_entry_safe(pol, tmp, list, byidx) { unsigned int h; h = __idx_hash(pol->index, nhashmask); @@ -544,7 +544,6 @@ static u32 xfrm_gen_index(struct net *net, int dir) static u32 idx_generator; for (;;) { - struct hlist_node *entry; struct hlist_head *list; struct xfrm_policy *p; u32 idx; @@ -556,7 +555,7 @@ static u32 xfrm_gen_index(struct net *net, int dir) idx = 8; list = net->xfrm.policy_byidx + idx_hash(net, idx); found = 0; - hlist_for_each_entry(p, entry, list, byidx) { + hlist_for_each_entry(p, list, byidx) { if (p->index == idx) { found = 1; break; @@ -628,13 +627,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) struct xfrm_policy *pol; struct xfrm_policy *delpol; struct hlist_head *chain; - struct hlist_node *entry, *newpos; + struct hlist_node *newpos; write_lock_bh(&xfrm_policy_lock); chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); delpol = NULL; newpos = NULL; - hlist_for_each_entry(pol, entry, chain, bydst) { + hlist_for_each_entry(pol, chain, bydst) { if (pol->type == policy->type && !selector_cmp(&pol->selector, &policy->selector) && xfrm_policy_mark_match(policy, pol) && @@ -691,13 +690,12 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, { struct xfrm_policy *pol, *ret; struct hlist_head *chain; - struct hlist_node *entry; *err = 0; write_lock_bh(&xfrm_policy_lock); chain = policy_hash_bysel(net, sel, sel->family, dir); ret = NULL; - hlist_for_each_entry(pol, entry, chain, bydst) { + hlist_for_each_entry(pol, chain, bydst) { if (pol->type == type && (mark & pol->mark.m) == pol->mark.v && !selector_cmp(sel, &pol->selector) && @@ -729,7 +727,6 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, { struct xfrm_policy *pol, *ret; struct hlist_head *chain; - struct hlist_node *entry; *err = -ENOENT; if (xfrm_policy_id2dir(id) != dir) @@ -739,7 +736,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, write_lock_bh(&xfrm_policy_lock); chain = net->xfrm.policy_byidx + idx_hash(net, id); ret = NULL; - hlist_for_each_entry(pol, entry, chain, byidx) { + hlist_for_each_entry(pol, chain, byidx) { if (pol->type == type && pol->index == id && (mark & pol->mark.m) == pol->mark.v) { xfrm_pol_hold(pol); @@ -772,10 +769,9 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy *pol; - struct hlist_node *entry; int i; - hlist_for_each_entry(pol, entry, + hlist_for_each_entry(pol, &net->xfrm.policy_inexact[dir], bydst) { if (pol->type != type) continue; @@ -789,7 +785,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi } } for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { - hlist_for_each_entry(pol, entry, + hlist_for_each_entry(pol, net->xfrm.policy_bydst[dir].table + i, bydst) { if (pol->type != type) @@ -828,11 +824,10 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy *pol; - struct hlist_node *entry; int i; again1: - hlist_for_each_entry(pol, entry, + hlist_for_each_entry(pol, &net->xfrm.policy_inexact[dir], bydst) { if (pol->type != type) continue; @@ -852,7 +847,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { again2: - hlist_for_each_entry(pol, entry, + hlist_for_each_entry(pol, net->xfrm.policy_bydst[dir].table + i, bydst) { if (pol->type != type) @@ -980,7 +975,6 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, int err; struct xfrm_policy *pol, *ret; const xfrm_address_t *daddr, *saddr; - struct hlist_node *entry; struct hlist_head *chain; u32 priority = ~0U; @@ -992,7 +986,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, read_lock_bh(&xfrm_policy_lock); chain = policy_hash_direct(net, daddr, saddr, family, dir); ret = NULL; - hlist_for_each_entry(pol, entry, chain, bydst) { + hlist_for_each_entry(pol, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); if (err) { if (err == -ESRCH) @@ -1008,7 +1002,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, } } chain = &net->xfrm.policy_inexact[dir]; - hlist_for_each_entry(pol, entry, chain, bydst) { + hlist_for_each_entry(pol, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); if (err) { if (err == -ESRCH) @@ -3041,13 +3035,12 @@ static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector u8 dir, u8 type) { struct xfrm_policy *pol, *ret = NULL; - struct hlist_node *entry; struct hlist_head *chain; u32 priority = ~0U; read_lock_bh(&xfrm_policy_lock); chain = policy_hash_direct(&init_net, &sel->daddr, &sel->saddr, sel->family, dir); - hlist_for_each_entry(pol, entry, chain, bydst) { + hlist_for_each_entry(pol, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type) { ret = pol; @@ -3056,7 +3049,7 @@ static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector } } chain = &init_net.xfrm.policy_inexact[dir]; - hlist_for_each_entry(pol, entry, chain, bydst) { + hlist_for_each_entry(pol, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type && pol->priority < priority) { diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index ae01bdbcb294..2c341bdaf47c 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -72,10 +72,10 @@ static void xfrm_hash_transfer(struct hlist_head *list, struct hlist_head *nspitable, unsigned int nhashmask) { - struct hlist_node *entry, *tmp; + struct hlist_node *tmp; struct xfrm_state *x; - hlist_for_each_entry_safe(x, entry, tmp, list, bydst) { + hlist_for_each_entry_safe(x, tmp, list, bydst) { unsigned int h; h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr, @@ -368,14 +368,14 @@ static void xfrm_state_gc_task(struct work_struct *work) { struct net *net = container_of(work, struct net, xfrm.state_gc_work); struct xfrm_state *x; - struct hlist_node *entry, *tmp; + struct hlist_node *tmp; struct hlist_head gc_list; spin_lock_bh(&xfrm_state_gc_lock); hlist_move_list(&net->xfrm.state_gc_list, &gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist) + hlist_for_each_entry_safe(x, tmp, &gc_list, gclist) xfrm_state_gc_destroy(x); wake_up(&net->xfrm.km_waitq); @@ -577,10 +577,9 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi int i, err = 0; for (i = 0; i <= net->xfrm.state_hmask; i++) { - struct hlist_node *entry; struct xfrm_state *x; - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto) && (err = security_xfrm_state_delete(x)) != 0) { xfrm_audit_state_delete(x, 0, @@ -613,10 +612,9 @@ int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info) err = -ESRCH; for (i = 0; i <= net->xfrm.state_hmask; i++) { - struct hlist_node *entry; struct xfrm_state *x; restart: - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { if (!xfrm_state_kern(x) && xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); @@ -685,9 +683,8 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, { unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); struct xfrm_state *x; - struct hlist_node *entry; - hlist_for_each_entry(x, entry, net->xfrm.state_byspi+h, byspi) { + hlist_for_each_entry(x, net->xfrm.state_byspi+h, byspi) { if (x->props.family != family || x->id.spi != spi || x->id.proto != proto || @@ -710,9 +707,8 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, { unsigned int h = xfrm_src_hash(net, daddr, saddr, family); struct xfrm_state *x; - struct hlist_node *entry; - hlist_for_each_entry(x, entry, net->xfrm.state_bysrc+h, bysrc) { + hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) { if (x->props.family != family || x->id.proto != proto || !xfrm_addr_equal(&x->id.daddr, daddr, family) || @@ -798,7 +794,6 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, static xfrm_address_t saddr_wildcard = { }; struct net *net = xp_net(pol); unsigned int h, h_wildcard; - struct hlist_node *entry; struct xfrm_state *x, *x0, *to_put; int acquire_in_progress = 0; int error = 0; @@ -810,7 +805,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, spin_lock_bh(&xfrm_state_lock); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && @@ -826,7 +821,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, goto found; h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family); - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+h_wildcard, bydst) { if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && @@ -906,11 +901,10 @@ xfrm_stateonly_find(struct net *net, u32 mark, { unsigned int h; struct xfrm_state *rx = NULL, *x = NULL; - struct hlist_node *entry; spin_lock(&xfrm_state_lock); h = xfrm_dst_hash(net, daddr, saddr, reqid, family); - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && (mark & x->mark.m) == x->mark.v && @@ -972,12 +966,11 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) unsigned short family = xnew->props.family; u32 reqid = xnew->props.reqid; struct xfrm_state *x; - struct hlist_node *entry; unsigned int h; u32 mark = xnew->mark.v & xnew->mark.m; h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family); - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && (mark & x->mark.m) == x->mark.v && @@ -1004,11 +997,10 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m, const xfrm_address_t *saddr, int create) { unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family); - struct hlist_node *entry; struct xfrm_state *x; u32 mark = m->v & m->m; - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { if (x->props.reqid != reqid || x->props.mode != mode || x->props.family != family || @@ -1215,12 +1207,11 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) { unsigned int h; struct xfrm_state *x; - struct hlist_node *entry; if (m->reqid) { h = xfrm_dst_hash(&init_net, &m->old_daddr, &m->old_saddr, m->reqid, m->old_family); - hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, init_net.xfrm.state_bydst+h, bydst) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -1237,7 +1228,7 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) } else { h = xfrm_src_hash(&init_net, &m->old_daddr, &m->old_saddr, m->old_family); - hlist_for_each_entry(x, entry, init_net.xfrm.state_bysrc+h, bysrc) { + hlist_for_each_entry(x, init_net.xfrm.state_bysrc+h, bysrc) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -1466,10 +1457,9 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s int i; for (i = 0; i <= net->xfrm.state_hmask; i++) { - struct hlist_node *entry; struct xfrm_state *x; - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { if (x->km.seq == seq && (mark & x->mark.m) == x->mark.v && x->km.state == XFRM_STATE_ACQ) { diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst index 06ba4a70bd4d..25f216a841d5 100644 --- a/scripts/Makefile.headersinst +++ b/scripts/Makefile.headersinst @@ -7,15 +7,15 @@ # # ========================================================================== -# called may set destination dir (when installing to asm/) -_dst := $(or $(destination-y),$(dst),$(obj)) - # generated header directory gen := $(if $(gen),$(gen),$(subst include/,include/generated/,$(obj))) kbuild-file := $(srctree)/$(obj)/Kbuild include $(kbuild-file) +# called may set destination dir (when installing to asm/) +_dst := $(or $(destination-y),$(dst),$(obj)) + old-kbuild-file := $(srctree)/$(subst uapi/,,$(obj))/Kbuild ifneq ($(wildcard $(old-kbuild-file)),) include $(old-kbuild-file) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index a1cb0222ebe6..cf82c832458f 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -66,10 +66,6 @@ modules := $(patsubst %.o,%.ko, $(wildcard $(__modules:.ko=.o))) # Stop after building .o files if NOFINAL is set. Makes compile tests quicker _modpost: $(if $(KBUILD_MODPOST_NOFINAL), $(modules:.ko:.o),$(modules)) -ifneq ($(KBUILD_BUILDHOST),$(ARCH)) - cross_build := 1 -endif - # Step 2), invoke modpost # Includes step 3,4 modpost = scripts/mod/modpost \ @@ -80,8 +76,7 @@ modpost = scripts/mod/modpost \ $(if $(KBUILD_EXTRA_SYMBOLS), $(patsubst %, -e %,$(KBUILD_EXTRA_SYMBOLS))) \ $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ $(if $(CONFIG_DEBUG_SECTION_MISMATCH),,-S) \ - $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) \ - $(if $(cross_build),-c) + $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules cmd_modpost = $(modpost) -s diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 747bcd768da0..b28cc384a5bc 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2930,7 +2930,7 @@ sub process { my $var = $1; if ($var !~ /$Constant/ && $var =~ /[A-Z]\w*[a-z]|[a-z]\w*[A-Z]/ && - $var !~ /^Page[A-Z]/ && + $var !~ /"^(?:Clear|Set|TestClear|TestSet|)Page[A-Z]/ && !defined $camelcase{$var}) { $camelcase{$var} = 1; WARN("CAMELCASE", diff --git a/scripts/coccicheck b/scripts/coccicheck index 1a49d1c7ecfe..85d31899ad98 100755 --- a/scripts/coccicheck +++ b/scripts/coccicheck @@ -2,6 +2,15 @@ SPATCH="`which ${SPATCH:=spatch}`" +# The verbosity may be set by the environmental parameter V= +# as for example with 'make V=1 coccicheck' + +if [ -n "$V" -a "$V" != "0" ]; then + VERBOSE=1 +else + VERBOSE=0 +fi + if [ "$C" = "1" -o "$C" = "2" ]; then ONLINE=1 @@ -46,6 +55,14 @@ if [ "$ONLINE" = "0" ] ; then echo '' fi +run_cmd() { + if [ $VERBOSE -ne 0 ] ; then + echo "Running: $@" + fi + eval $@ +} + + coccinelle () { COCCI="$1" @@ -55,7 +72,7 @@ coccinelle () { # # $SPATCH -D $MODE $FLAGS -parse_cocci $COCCI $OPT > /dev/null - if [ "$ONLINE" = "0" ] ; then + if [ $VERBOSE -ne 0 ] ; then FILE=`echo $COCCI | sed "s|$srctree/||"` @@ -91,15 +108,21 @@ coccinelle () { fi if [ "$MODE" = "chain" ] ; then - $SPATCH -D patch $FLAGS -sp_file $COCCI $OPT $OPTIONS || \ - $SPATCH -D report $FLAGS -sp_file $COCCI $OPT $OPTIONS -no_show_diff || \ - $SPATCH -D context $FLAGS -sp_file $COCCI $OPT $OPTIONS || \ - $SPATCH -D org $FLAGS -sp_file $COCCI $OPT $OPTIONS -no_show_diff || exit 1 + run_cmd $SPATCH -D patch \ + $FLAGS -sp_file $COCCI $OPT $OPTIONS || \ + run_cmd $SPATCH -D report \ + $FLAGS -sp_file $COCCI $OPT $OPTIONS -no_show_diff || \ + run_cmd $SPATCH -D context \ + $FLAGS -sp_file $COCCI $OPT $OPTIONS || \ + run_cmd $SPATCH -D org \ + $FLAGS -sp_file $COCCI $OPT $OPTIONS -no_show_diff || exit 1 elif [ "$MODE" = "rep+ctxt" ] ; then - $SPATCH -D report $FLAGS -sp_file $COCCI $OPT $OPTIONS -no_show_diff && \ - $SPATCH -D context $FLAGS -sp_file $COCCI $OPT $OPTIONS || exit 1 + run_cmd $SPATCH -D report \ + $FLAGS -sp_file $COCCI $OPT $OPTIONS -no_show_diff && \ + run_cmd $SPATCH -D context \ + $FLAGS -sp_file $COCCI $OPT $OPTIONS || exit 1 else - $SPATCH -D $MODE $FLAGS -sp_file $COCCI $OPT $OPTIONS || exit 1 + run_cmd $SPATCH -D $MODE $FLAGS -sp_file $COCCI $OPT $OPTIONS || exit 1 fi } diff --git a/scripts/coccinelle/misc/memcpy-assign.cocci b/scripts/coccinelle/misc/memcpy-assign.cocci new file mode 100644 index 000000000000..afd058be497f --- /dev/null +++ b/scripts/coccinelle/misc/memcpy-assign.cocci @@ -0,0 +1,103 @@ +// +// Replace memcpy with struct assignment. +// +// Confidence: High +// Copyright: (C) 2012 Peter Senna Tschudin, INRIA/LIP6. GPLv2. +// URL: http://coccinelle.lip6.fr/ +// Comments: +// Options: --no-includes --include-headers + +virtual patch +virtual report +virtual context +virtual org + +@r1 depends on !patch@ +identifier struct_name; +struct struct_name to; +struct struct_name from; +struct struct_name *top; +struct struct_name *fromp; +position p; +@@ +memcpy@p(\(&(to)\|top\), \(&(from)\|fromp\), \(sizeof(to)\|sizeof(from)\|sizeof(struct struct_name)\|sizeof(*top)\|sizeof(*fromp)\)) + +@script:python depends on report@ +p << r1.p; +@@ +coccilib.report.print_report(p[0],"Replace memcpy with struct assignment") + +@depends on context@ +position r1.p; +@@ +*memcpy@p(...); + +@script:python depends on org@ +p << r1.p; +@@ +cocci.print_main("Replace memcpy with struct assignment",p) + +@depends on patch@ +identifier struct_name; +struct struct_name to; +struct struct_name from; +@@ +( +-memcpy(&(to), &(from), sizeof(to)); ++to = from; +| +-memcpy(&(to), &(from), sizeof(from)); ++to = from; +| +-memcpy(&(to), &(from), sizeof(struct struct_name)); ++to = from; +) + +@depends on patch@ +identifier struct_name; +struct struct_name to; +struct struct_name *from; +@@ +( +-memcpy(&(to), from, sizeof(to)); ++to = *from; +| +-memcpy(&(to), from, sizeof(*from)); ++to = *from; +| +-memcpy(&(to), from, sizeof(struct struct_name)); ++to = *from; +) + +@depends on patch@ +identifier struct_name; +struct struct_name *to; +struct struct_name from; +@@ +( +-memcpy(to, &(from), sizeof(*to)); ++ *to = from; +| +-memcpy(to, &(from), sizeof(from)); ++ *to = from; +| +-memcpy(to, &(from), sizeof(struct struct_name)); ++ *to = from; +) + +@depends on patch@ +identifier struct_name; +struct struct_name *to; +struct struct_name *from; +@@ +( +-memcpy(to, from, sizeof(*to)); ++ *to = *from; +| +-memcpy(to, from, sizeof(*from)); ++ *to = *from; +| +-memcpy(to, from, sizeof(struct struct_name)); ++ *to = *from; +) + diff --git a/scripts/coccinelle/misc/orplus.cocci b/scripts/coccinelle/misc/orplus.cocci new file mode 100644 index 000000000000..4a28cef1484e --- /dev/null +++ b/scripts/coccinelle/misc/orplus.cocci @@ -0,0 +1,55 @@ +/// Check for constants that are added but are used elsewhere as bitmasks +/// The results should be checked manually to ensure that the nonzero +/// bits in the two constants are actually disjoint. +/// +// Confidence: Moderate +// Copyright: (C) 2013 Julia Lawall, INRIA/LIP6. GPLv2. +// Copyright: (C) 2013 Gilles Muller, INRIA/LIP6. GPLv2. +// URL: http://coccinelle.lip6.fr/ +// Comments: +// Options: -no_includes -include_headers + +virtual org +virtual report +virtual context + +@r@ +constant c; +identifier i; +expression e; +@@ + +( +e | c@i +| +e & c@i +| +e |= c@i +| +e &= c@i +) + +@s@ +constant r.c,c1; +identifier i1; +position p; +@@ + +( + c1 + c - 1 +| +*c1@i1 +@p c +) + +@script:python depends on org@ +p << s.p; +@@ + +cocci.print_main("sum of probable bitmasks, consider |",p) + +@script:python depends on report@ +p << s.p; +@@ + +msg = "WARNING: sum of probable bitmasks, consider |" +coccilib.report.print_report(p[0],msg) diff --git a/scripts/coccinelle/misc/semicolon.cocci b/scripts/coccinelle/misc/semicolon.cocci new file mode 100644 index 000000000000..a47eba2edc9e --- /dev/null +++ b/scripts/coccinelle/misc/semicolon.cocci @@ -0,0 +1,83 @@ +/// +/// Removes unneeded semicolon. +/// +// Confidence: Moderate +// Copyright: (C) 2012 Peter Senna Tschudin, INRIA/LIP6. GPLv2. +// URL: http://coccinelle.lip6.fr/ +// Comments: Some false positives on empty default cases in switch statements. +// Options: --no-includes --include-headers + +virtual patch +virtual report +virtual context +virtual org + +@r_default@ +position p; +@@ +switch (...) +{ +default: ...;@p +} + +@r_case@ +position p; +@@ +( +switch (...) +{ +case ...:;@p +} +| +switch (...) +{ +case ...:... +case ...:;@p +} +| +switch (...) +{ +case ...:... +case ...: +case ...:;@p +} +) + +@r1@ +statement S; +position p1; +position p != {r_default.p, r_case.p}; +identifier label; +@@ +( +label:; +| +S@p1;@p +) + +@script:python@ +p << r1.p; +p1 << r1.p1; +@@ +if p[0].line != p1[0].line_end: + cocci.include_match(False) + +@depends on patch@ +position r1.p; +@@ +-;@p + +@script:python depends on report@ +p << r1.p; +@@ +coccilib.report.print_report(p[0],"Unneeded semicolon") + +@depends on context@ +position r1.p; +@@ +*;@p + +@script:python depends on org@ +p << r1.p; +@@ +cocci.print_main("Unneeded semicolon",p) diff --git a/scripts/depmod.sh b/scripts/depmod.sh index 2ae481703141..122599b1c13b 100755 --- a/scripts/depmod.sh +++ b/scripts/depmod.sh @@ -2,16 +2,36 @@ # # A depmod wrapper used by the toplevel Makefile -if test $# -ne 2; then - echo "Usage: $0 /sbin/depmod <kernelrelease>" >&2 +if test $# -ne 3; then + echo "Usage: $0 /sbin/depmod <kernelrelease> <symbolprefix>" >&2 exit 1 fi DEPMOD=$1 KERNELRELEASE=$2 +SYMBOL_PREFIX=$3 if ! test -r System.map -a -x "$DEPMOD"; then exit 0 fi + +# older versions of depmod don't support -P <symbol-prefix> +# support was added in module-init-tools 3.13 +if test -n "$SYMBOL_PREFIX"; then + release=$("$DEPMOD" --version) + package=$(echo "$release" | cut -d' ' -f 1) + if test "$package" = "module-init-tools"; then + version=$(echo "$release" | cut -d' ' -f 2) + later=$(printf '%s\n' "$version" "3.13" | sort -V | tail -n 1) + if test "$later" != "$version"; then + # module-init-tools < 3.13, drop the symbol prefix + SYMBOL_PREFIX="" + fi + fi + if test -n "$SYMBOL_PREFIX"; then + SYMBOL_PREFIX="-P $SYMBOL_PREFIX" + fi +fi + # older versions of depmod require the version string to start with three # numbers, so we cheat with a symlink here depmod_hack_needed=true @@ -34,7 +54,7 @@ set -- -ae -F System.map if test -n "$INSTALL_MOD_PATH"; then set -- "$@" -b "$INSTALL_MOD_PATH" fi -"$DEPMOD" "$@" "$KERNELRELEASE" +"$DEPMOD" "$@" "$KERNELRELEASE" $SYMBOL_PREFIX ret=$? if $depmod_hack_needed; then diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 18d4ab55606b..ce4cc837b748 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -611,6 +611,10 @@ sub get_maintainers { $hash{$tvi} = $value_pd; } } + } elsif ($type eq 'K') { + if ($file =~ m/$value/x) { + $hash{$tvi} = 0; + } } } } diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index 3091794e9354..231b4759c714 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -11,6 +11,9 @@ else Kconfig := Kconfig endif +# We need this, in case the user has it in its environment +unexport CONFIG_ + xconfig: $(obj)/qconf $< $(Kconfig) diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c index 4da3b4adfad2..e39fcd8143ea 100644 --- a/scripts/kconfig/conf.c +++ b/scripts/kconfig/conf.c @@ -36,6 +36,7 @@ enum input_mode { } input_mode = oldaskconfig; static int indent = 1; +static int tty_stdio; static int valid_stdin = 1; static int sync_kconfig; static int conf_cnt; @@ -108,6 +109,8 @@ static int conf_askvalue(struct symbol *sym, const char *def) case oldaskconfig: fflush(stdout); xfgets(line, 128, stdin); + if (!tty_stdio) + printf("\n"); return 1; default: break; @@ -495,6 +498,8 @@ int main(int ac, char **av) bindtextdomain(PACKAGE, LOCALEDIR); textdomain(PACKAGE); + tty_stdio = isatty(0) && isatty(1) && isatty(2); + while ((opt = getopt_long(ac, av, "", long_opts, NULL)) != -1) { input_mode = (enum input_mode)opt; switch (opt) { @@ -621,7 +626,7 @@ int main(int ac, char **av) return 1; } } - valid_stdin = isatty(0) && isatty(1) && isatty(2); + valid_stdin = tty_stdio; } switch (input_mode) { diff --git a/scripts/kconfig/expr.c b/scripts/kconfig/expr.c index 290ce41f8ba4..d6626521f9b9 100644 --- a/scripts/kconfig/expr.c +++ b/scripts/kconfig/expr.c @@ -13,7 +13,7 @@ struct expr *expr_alloc_symbol(struct symbol *sym) { - struct expr *e = calloc(1, sizeof(*e)); + struct expr *e = xcalloc(1, sizeof(*e)); e->type = E_SYMBOL; e->left.sym = sym; return e; @@ -21,7 +21,7 @@ struct expr *expr_alloc_symbol(struct symbol *sym) struct expr *expr_alloc_one(enum expr_type type, struct expr *ce) { - struct expr *e = calloc(1, sizeof(*e)); + struct expr *e = xcalloc(1, sizeof(*e)); e->type = type; e->left.expr = ce; return e; @@ -29,7 +29,7 @@ struct expr *expr_alloc_one(enum expr_type type, struct expr *ce) struct expr *expr_alloc_two(enum expr_type type, struct expr *e1, struct expr *e2) { - struct expr *e = calloc(1, sizeof(*e)); + struct expr *e = xcalloc(1, sizeof(*e)); e->type = type; e->left.expr = e1; e->right.expr = e2; @@ -38,7 +38,7 @@ struct expr *expr_alloc_two(enum expr_type type, struct expr *e1, struct expr *e struct expr *expr_alloc_comp(enum expr_type type, struct symbol *s1, struct symbol *s2) { - struct expr *e = calloc(1, sizeof(*e)); + struct expr *e = xcalloc(1, sizeof(*e)); e->type = type; e->left.sym = s1; e->right.sym = s2; @@ -66,7 +66,7 @@ struct expr *expr_copy(const struct expr *org) if (!org) return NULL; - e = malloc(sizeof(*org)); + e = xmalloc(sizeof(*org)); memcpy(e, org, sizeof(*org)); switch (org->type) { case E_SYMBOL: diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index adc230638c5b..f2bee70e26f4 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -10,6 +10,7 @@ # include <config.h> #endif +#include <stdlib.h> #include "lkc.h" #include "images.c" @@ -22,7 +23,6 @@ #include <string.h> #include <unistd.h> #include <time.h> -#include <stdlib.h> //#define DEBUG diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h index c18f2bd9c095..f8aee5fc6d5e 100644 --- a/scripts/kconfig/lkc.h +++ b/scripts/kconfig/lkc.h @@ -39,6 +39,12 @@ extern "C" { #ifndef CONFIG_ #define CONFIG_ "CONFIG_" #endif +static inline const char *CONFIG_prefix(void) +{ + return getenv( "CONFIG_" ) ?: CONFIG_; +} +#undef CONFIG_ +#define CONFIG_ CONFIG_prefix() #define TF_COMMAND 0x0001 #define TF_PARAM 0x0002 @@ -116,6 +122,8 @@ void menu_set_type(int type); /* util.c */ struct file *file_lookup(const char *name); int file_write_dep(const char *name); +void *xmalloc(size_t size); +void *xcalloc(size_t nmemb, size_t size); struct gstr { size_t len; diff --git a/scripts/kconfig/lxdialog/check-lxdialog.sh b/scripts/kconfig/lxdialog/check-lxdialog.sh index c8e8a7154753..80788137c670 100644 --- a/scripts/kconfig/lxdialog/check-lxdialog.sh +++ b/scripts/kconfig/lxdialog/check-lxdialog.sh @@ -21,6 +21,7 @@ ccflags() { if [ -f /usr/include/ncursesw/curses.h ]; then echo '-I/usr/include/ncursesw -DCURSES_LOC="<ncursesw/curses.h>"' + echo ' -DNCURSES_WIDECHAR=1' elif [ -f /usr/include/ncurses/ncurses.h ]; then echo '-I/usr/include/ncurses -DCURSES_LOC="<ncurses.h>"' elif [ -f /usr/include/ncurses/curses.h ]; then diff --git a/scripts/kconfig/lxdialog/dialog.h b/scripts/kconfig/lxdialog/dialog.h index ee17a5264d5b..307022a8beef 100644 --- a/scripts/kconfig/lxdialog/dialog.h +++ b/scripts/kconfig/lxdialog/dialog.h @@ -221,7 +221,6 @@ int dialog_menu(const char *title, const char *prompt, const void *selected, int *s_scroll); int dialog_checklist(const char *title, const char *prompt, int height, int width, int list_height); -extern char dialog_input_result[]; int dialog_inputbox(const char *title, const char *prompt, int height, int width, const char *init); diff --git a/scripts/kconfig/lxdialog/inputbox.c b/scripts/kconfig/lxdialog/inputbox.c index dd8e587c50e2..21404a04d7c3 100644 --- a/scripts/kconfig/lxdialog/inputbox.c +++ b/scripts/kconfig/lxdialog/inputbox.c @@ -45,7 +45,8 @@ int dialog_inputbox(const char *title, const char *prompt, int height, int width const char *init) { int i, x, y, box_y, box_x, box_width; - int input_x = 0, scroll = 0, key = 0, button = -1; + int input_x = 0, key = 0, button = -1; + int show_x, len, pos; char *instr = dialog_input_result; WINDOW *dialog; @@ -97,14 +98,17 @@ do_resize: wmove(dialog, box_y, box_x); wattrset(dialog, dlg.inputbox.atr); - input_x = strlen(instr); + len = strlen(instr); + pos = len; - if (input_x >= box_width) { - scroll = input_x - box_width + 1; + if (len >= box_width) { + show_x = len - box_width + 1; input_x = box_width - 1; for (i = 0; i < box_width - 1; i++) - waddch(dialog, instr[scroll + i]); + waddch(dialog, instr[show_x + i]); } else { + show_x = 0; + input_x = len; waddstr(dialog, instr); } @@ -121,45 +125,104 @@ do_resize: case KEY_UP: case KEY_DOWN: break; - case KEY_LEFT: - continue; - case KEY_RIGHT: - continue; case KEY_BACKSPACE: case 127: - if (input_x || scroll) { + if (pos) { wattrset(dialog, dlg.inputbox.atr); - if (!input_x) { - scroll = scroll < box_width - 1 ? 0 : scroll - (box_width - 1); - wmove(dialog, box_y, box_x); - for (i = 0; i < box_width; i++) - waddch(dialog, - instr[scroll + input_x + i] ? - instr[scroll + input_x + i] : ' '); - input_x = strlen(instr) - scroll; + if (input_x == 0) { + show_x--; } else input_x--; - instr[scroll + input_x] = '\0'; - mvwaddch(dialog, box_y, input_x + box_x, ' '); + + if (pos < len) { + for (i = pos - 1; i < len; i++) { + instr[i] = instr[i+1]; + } + } + + pos--; + len--; + instr[len] = '\0'; + wmove(dialog, box_y, box_x); + for (i = 0; i < box_width; i++) { + if (!instr[show_x + i]) { + waddch(dialog, ' '); + break; + } + waddch(dialog, instr[show_x + i]); + } wmove(dialog, box_y, input_x + box_x); wrefresh(dialog); } continue; + case KEY_LEFT: + if (pos > 0) { + if (input_x > 0) { + wmove(dialog, box_y, --input_x + box_x); + } else if (input_x == 0) { + show_x--; + wmove(dialog, box_y, box_x); + for (i = 0; i < box_width; i++) { + if (!instr[show_x + i]) { + waddch(dialog, ' '); + break; + } + waddch(dialog, instr[show_x + i]); + } + wmove(dialog, box_y, box_x); + } + pos--; + } + continue; + case KEY_RIGHT: + if (pos < len) { + if (input_x < box_width - 1) { + wmove(dialog, box_y, ++input_x + box_x); + } else if (input_x == box_width - 1) { + show_x++; + wmove(dialog, box_y, box_x); + for (i = 0; i < box_width; i++) { + if (!instr[show_x + i]) { + waddch(dialog, ' '); + break; + } + waddch(dialog, instr[show_x + i]); + } + wmove(dialog, box_y, input_x + box_x); + } + pos++; + } + continue; default: if (key < 0x100 && isprint(key)) { - if (scroll + input_x < MAX_LEN) { + if (len < MAX_LEN) { wattrset(dialog, dlg.inputbox.atr); - instr[scroll + input_x] = key; - instr[scroll + input_x + 1] = '\0'; + if (pos < len) { + for (i = len; i > pos; i--) + instr[i] = instr[i-1]; + instr[pos] = key; + } else { + instr[len] = key; + } + pos++; + len++; + instr[len] = '\0'; + if (input_x == box_width - 1) { - scroll++; - wmove(dialog, box_y, box_x); - for (i = 0; i < box_width - 1; i++) - waddch(dialog, instr [scroll + i]); + show_x++; } else { - wmove(dialog, box_y, input_x++ + box_x); - waddch(dialog, key); + input_x++; + } + + wmove(dialog, box_y, box_x); + for (i = 0; i < box_width; i++) { + if (!instr[show_x + i]) { + waddch(dialog, ' '); + break; + } + waddch(dialog, instr[show_x + i]); } + wmove(dialog, box_y, input_x + box_x); wrefresh(dialog); } else flash(); /* Alarm user about overflow */ diff --git a/scripts/kconfig/lxdialog/menubox.c b/scripts/kconfig/lxdialog/menubox.c index 1d604738fa13..48d382e7e374 100644 --- a/scripts/kconfig/lxdialog/menubox.c +++ b/scripts/kconfig/lxdialog/menubox.c @@ -26,7 +26,7 @@ * * *) A bugfix for the Page-Down problem * - * *) Formerly when I used Page Down and Page Up, the cursor would be set + * *) Formerly when I used Page Down and Page Up, the cursor would be set * to the first position in the menu box. Now lxdialog is a bit * smarter and works more like other menu systems (just have a look at * it). @@ -154,12 +154,14 @@ static void print_arrows(WINDOW * win, int item_no, int scroll, int y, int x, */ static void print_buttons(WINDOW * win, int height, int width, int selected) { - int x = width / 2 - 16; + int x = width / 2 - 28; int y = height - 2; print_button(win, gettext("Select"), y, x, selected == 0); print_button(win, gettext(" Exit "), y, x + 12, selected == 1); print_button(win, gettext(" Help "), y, x + 24, selected == 2); + print_button(win, gettext(" Save "), y, x + 36, selected == 3); + print_button(win, gettext(" Load "), y, x + 48, selected == 4); wmove(win, y, x + 1 + 12 * selected); wrefresh(win); @@ -372,7 +374,7 @@ do_resize: case TAB: case KEY_RIGHT: button = ((key == KEY_LEFT ? --button : ++button) < 0) - ? 2 : (button > 2 ? 0 : button); + ? 4 : (button > 4 ? 0 : button); print_buttons(dialog, height, width, button); wrefresh(menu); @@ -399,17 +401,17 @@ do_resize: return 2; case 's': case 'y': - return 3; + return 5; case 'n': - return 4; + return 6; case 'm': - return 5; + return 7; case ' ': - return 6; + return 8; case '/': - return 7; + return 9; case 'z': - return 8; + return 10; case '\n': return button; } diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c index 53975cf87608..566288a76370 100644 --- a/scripts/kconfig/mconf.c +++ b/scripts/kconfig/mconf.c @@ -280,6 +280,7 @@ static struct menu *current_menu; static int child_count; static int single_menu_mode; static int show_all_options; +static int save_and_exit; static void conf(struct menu *menu, struct menu *active_menu); static void conf_choice(struct menu *menu); @@ -348,15 +349,19 @@ static void search_conf(void) { struct symbol **sym_arr; struct gstr res; + struct gstr title; char *dialog_input; int dres, vscroll = 0, hscroll = 0; bool again; + title = str_new(); + str_printf( &title, _("Enter %s (sub)string to search for " + "(with or without \"%s\")"), CONFIG_, CONFIG_); + again: dialog_clear(); dres = dialog_inputbox(_("Search Configuration Parameter"), - _("Enter " CONFIG_ " (sub)string to search for " - "(with or without \"" CONFIG_ "\")"), + str_get(&title), 10, 75, ""); switch (dres) { case 0: @@ -365,6 +370,7 @@ again: show_helptext(_("Search Configuration"), search_help); goto again; default: + str_free(&title); return; } @@ -398,6 +404,7 @@ again: str_free(&res); } while (again); free(sym_arr); + str_free(&title); } static void build_conf(struct menu *menu) @@ -592,14 +599,6 @@ static void conf(struct menu *menu, struct menu *active_menu) build_conf(menu); if (!child_count) break; - if (menu == &rootmenu) { - item_make("--- "); - item_set_tag(':'); - item_make(_(" Load an Alternate Configuration File")); - item_set_tag('L'); - item_make(_(" Save an Alternate Configuration File")); - item_set_tag('S'); - } dialog_clear(); res = dialog_menu(prompt ? _(prompt) : _("Main Menu"), _(menu_instructions), @@ -636,12 +635,6 @@ static void conf(struct menu *menu, struct menu *active_menu) case 's': conf_string(submenu); break; - case 'L': - conf_load(); - break; - case 'S': - conf_save(); - break; } break; case 2: @@ -651,6 +644,12 @@ static void conf(struct menu *menu, struct menu *active_menu) show_helptext(_("README"), _(mconf_readme)); break; case 3: + conf_save(); + break; + case 4: + conf_load(); + break; + case 5: if (item_is_tag('t')) { if (sym_set_tristate_value(sym, yes)) break; @@ -658,24 +657,24 @@ static void conf(struct menu *menu, struct menu *active_menu) show_textbox(NULL, setmod_text, 6, 74); } break; - case 4: + case 6: if (item_is_tag('t')) sym_set_tristate_value(sym, no); break; - case 5: + case 7: if (item_is_tag('t')) sym_set_tristate_value(sym, mod); break; - case 6: + case 8: if (item_is_tag('t')) sym_toggle_tristate_value(sym); else if (item_is_tag('m')) conf(submenu, NULL); break; - case 7: + case 9: search_conf(); break; - case 8: + case 10: show_all_options = !show_all_options; break; } @@ -702,6 +701,17 @@ static void show_helptext(const char *title, const char *text) show_textbox(title, text, 0, 0); } +static void conf_message_callback(const char *fmt, va_list ap) +{ + char buf[PATH_MAX+1]; + + vsnprintf(buf, sizeof(buf), fmt, ap); + if (save_and_exit) + printf("%s", buf); + else + show_textbox(NULL, buf, 6, 60); +} + static void show_help(struct menu *menu) { struct gstr help = str_new(); @@ -870,6 +880,7 @@ static int handle_exit(void) { int res; + save_and_exit = 1; dialog_clear(); if (conf_get_changed()) res = dialog_yesno(NULL, @@ -941,6 +952,7 @@ int main(int ac, char **av) } set_config_filename(conf_get_configname()); + conf_set_message_callback(conf_message_callback); do { conf(&rootmenu, NULL); res = handle_exit(); diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c index e98a05c8e508..f3bffa309333 100644 --- a/scripts/kconfig/menu.c +++ b/scripts/kconfig/menu.c @@ -48,7 +48,7 @@ void menu_add_entry(struct symbol *sym) { struct menu *menu; - menu = malloc(sizeof(*menu)); + menu = xmalloc(sizeof(*menu)); memset(menu, 0, sizeof(*menu)); menu->sym = sym; menu->parent = current_menu; @@ -531,7 +531,7 @@ static void get_prompt_str(struct gstr *r, struct property *prop, location = menu; } if (head && location) { - jump = malloc(sizeof(struct jump_key)); + jump = xmalloc(sizeof(struct jump_key)); if (menu_is_visible(prop->menu)) { /* diff --git a/scripts/kconfig/merge_config.sh b/scripts/kconfig/merge_config.sh index 974d5cb7e30a..05274fccb88e 100755 --- a/scripts/kconfig/merge_config.sh +++ b/scripts/kconfig/merge_config.sh @@ -32,11 +32,13 @@ usage() { echo " -m only merge the fragments, do not execute the make command" echo " -n use allnoconfig instead of alldefconfig" echo " -r list redundant entries when merging fragments" + echo " -O dir to put generated output files" } MAKE=true ALLTARGET=alldefconfig WARNREDUN=false +OUTPUT=. while true; do case $1 in @@ -59,6 +61,16 @@ while true; do shift continue ;; + "-O") + if [ -d $2 ];then + OUTPUT=$(echo $2 | sed 's/\/*$//') + else + echo "output directory $2 does not exist" 1>&2 + exit 1 + fi + shift 2 + continue + ;; *) break ;; @@ -100,9 +112,9 @@ for MERGE_FILE in $MERGE_LIST ; do done if [ "$MAKE" = "false" ]; then - cp $TMP_FILE .config + cp $TMP_FILE $OUTPUT/.config echo "#" - echo "# merged configuration written to .config (needs make)" + echo "# merged configuration written to $OUTPUT/.config (needs make)" echo "#" clean_up exit @@ -111,14 +123,14 @@ fi # Use the merged file as the starting point for: # alldefconfig: Fills in any missing symbols with Kconfig default # allnoconfig: Fills in any missing symbols with # CONFIG_* is not set -make KCONFIG_ALLCONFIG=$TMP_FILE $ALLTARGET +make KCONFIG_ALLCONFIG=$TMP_FILE O=$OUTPUT $ALLTARGET # Check all specified config values took (might have missed-dependency issues) for CFG in $(sed -n "$SED_CONFIG_EXP" $TMP_FILE); do REQUESTED_VAL=$(grep -w -e "$CFG" $TMP_FILE) - ACTUAL_VAL=$(grep -w -e "$CFG" .config) + ACTUAL_VAL=$(grep -w -e "$CFG" $OUTPUT/.config) if [ "x$REQUESTED_VAL" != "x$ACTUAL_VAL" ] ; then echo "Value requested for $CFG not in final .config" echo "Requested value: $REQUESTED_VAL" diff --git a/scripts/kconfig/nconf.c b/scripts/kconfig/nconf.c index 87d4b15da951..dbf31edd22b2 100644 --- a/scripts/kconfig/nconf.c +++ b/scripts/kconfig/nconf.c @@ -7,215 +7,208 @@ */ #define _GNU_SOURCE #include <string.h> +#include <stdlib.h> #include "lkc.h" #include "nconf.h" #include <ctype.h> -static const char nconf_readme[] = N_( -"Overview\n" -"--------\n" -"This interface let you select features and parameters for the build.\n" -"Features can either be built-in, modularized, or ignored. Parameters\n" -"must be entered in as decimal or hexadecimal numbers or text.\n" +static const char nconf_global_help[] = N_( +"Help windows\n" +"------------\n" +"o Global help: Unless in a data entry window, pressing <F1> will give \n" +" you the global help window, which you are just reading.\n" "\n" -"Menu items beginning with following braces represent features that\n" -" [ ] can be built in or removed\n" -" < > can be built in, modularized or removed\n" -" { } can be built in or modularized (selected by other feature)\n" -" - - are selected by other feature,\n" -" XXX cannot be selected. Use Symbol Info to find out why,\n" -"while *, M or whitespace inside braces means to build in, build as\n" -"a module or to exclude the feature respectively.\n" +"o A short version of the global help is available by pressing <F3>.\n" "\n" -"To change any of these features, highlight it with the cursor\n" -"keys and press <Y> to build it in, <M> to make it a module or\n" -"<N> to removed it. You may also press the <Space Bar> to cycle\n" -"through the available options (ie. Y->N->M->Y).\n" +"o Local help: To get help related to the current menu entry, use any\n" +" of <?> <h>, or if in a data entry window then press <F1>.\n" "\n" -"Some additional keyboard hints:\n" "\n" -"Menus\n" -"----------\n" -"o Use the Up/Down arrow keys (cursor keys) to highlight the item\n" -" you wish to change use <Enter> or <Space>. Goto submenu by \n" -" pressing <Enter> of <right-arrow>. Use <Esc> or <left-arrow> to go back.\n" -" Submenus are designated by \"--->\".\n" -"\n" -" Searching: pressing '/' triggers interactive search mode.\n" -" nconfig performs a case insensitive search for the string\n" -" in the menu prompts (no regex support).\n" -" Pressing the up/down keys highlights the previous/next\n" -" matching item. Backspace removes one character from the\n" -" match string. Pressing either '/' again or ESC exits\n" -" search mode. All other keys behave normally.\n" +"Menu entries\n" +"------------\n" +"This interface lets you select features and parameters for the kernel\n" +"build. Kernel features can either be built-in, modularized, or removed.\n" +"Parameters must be entered as text or decimal or hexadecimal numbers.\n" "\n" -" You may also use the <PAGE UP> and <PAGE DOWN> keys to scroll\n" -" unseen options into view.\n" +"Menu entries beginning with following braces represent features that\n" +" [ ] can be built in or removed\n" +" < > can be built in, modularized or removed\n" +" { } can be built in or modularized, are selected by another feature\n" +" - - are selected by another feature\n" +" XXX cannot be selected. Symbol Info <F2> tells you why.\n" +"*, M or whitespace inside braces means to build in, build as a module\n" +"or to exclude the feature respectively.\n" "\n" -"o To exit a menu use the just press <ESC> <F5> <F8> or <left-arrow>.\n" +"To change any of these features, highlight it with the movement keys\n" +"listed below and press <y> to build it in, <m> to make it a module or\n" +"<n> to remove it. You may press the <Space> key to cycle through the\n" +"available options.\n" "\n" -"o To get help with an item, press <F1>\n" -" Shortcut: Press <h> or <?>.\n" +"A trailing \"--->\" designates a submenu.\n" "\n" "\n" -"Radiolists (Choice lists)\n" -"-----------\n" -"o Use the cursor keys to select the option you wish to set and press\n" -" <S> or the <SPACE BAR>.\n" +"Menu navigation keys\n" +"----------------------------------------------------------------------\n" +"Linewise up <Up>\n" +"Linewise down <Down>\n" +"Pagewise up <Page Up>\n" +"Pagewise down <Page Down>\n" +"First entry <Home>\n" +"Last entry <End>\n" +"Enter a submenu <Right> <Enter>\n" +"Go back to parent menu <Left> <Esc> <F5>\n" +"Close a help window <Enter> <Esc> <F5>\n" +"Close entry window, apply <Enter>\n" +"Close entry window, forget <Esc> <F5>\n" +"Start incremental, case-insensitive search for STRING in menu entries,\n" +" no regex support, STRING is displayed in upper left corner\n" +" </>STRING\n" +" Remove last character <Backspace>\n" +" Jump to next hit <Down>\n" +" Jump to previous hit <Up>\n" +"Exit menu search mode </> <Esc>\n" +"Search for configuration variables with or without leading CONFIG_\n" +" <F8>RegExpr<Enter>\n" +"Verbose search help <F8><F1>\n" +"----------------------------------------------------------------------\n" "\n" -" Shortcut: Press the first letter of the option you wish to set then\n" -" press <S> or <SPACE BAR>.\n" +"Unless in a data entry window, key <1> may be used instead of <F1>,\n" +"<2> instead of <F2>, etc.\n" "\n" -"o To see available help for the item, press <F1>\n" -" Shortcut: Press <H> or <?>.\n" "\n" +"Radiolist (Choice list)\n" +"-----------------------\n" +"Use the movement keys listed above to select the option you wish to set\n" +"and press <Space>.\n" "\n" -"Data Entry\n" -"-----------\n" -"o Enter the requested information and press <ENTER>\n" -" If you are entering hexadecimal values, it is not necessary to\n" -" add the '0x' prefix to the entry.\n" "\n" -"o For help, press <F1>.\n" +"Data entry\n" +"----------\n" +"Enter the requested information and press <Enter>. Hexadecimal values\n" +"may be entered without the \"0x\" prefix.\n" "\n" "\n" -"Text Box (Help Window)\n" -"--------\n" -"o Use the cursor keys to scroll up/down/left/right. The VI editor\n" -" keys h,j,k,l function here as do <u>, <d> and <SPACE BAR> for\n" -" those who are familiar with less and lynx.\n" +"Text Box (Help Window)\n" +"----------------------\n" +"Use movement keys as listed in table above.\n" "\n" -"o Press <Enter>, <F1>, <F5>, <F9>, <q> or <Esc> to exit.\n" +"Press any of <Enter> <Esc> <q> <F5> <F9> to exit.\n" "\n" "\n" -"Alternate Configuration Files\n" +"Alternate configuration files\n" "-----------------------------\n" -"nconfig supports the use of alternate configuration files for\n" -"those who, for various reasons, find it necessary to switch\n" -"between different configurations.\n" +"nconfig supports switching between different configurations.\n" +"Press <F6> to save your current configuration. Press <F7> and enter\n" +"a file name to load a previously saved configuration.\n" "\n" -"At the end of the main menu you will find two options. One is\n" -"for saving the current configuration to a file of your choosing.\n" -"The other option is for loading a previously saved alternate\n" -"configuration.\n" "\n" -"Even if you don't use alternate configuration files, but you\n" -"find during a nconfig session that you have completely messed\n" -"up your settings, you may use the \"Load Alternate...\" option to\n" -"restore your previously saved settings from \".config\" without\n" -"restarting nconfig.\n" +"Terminal configuration\n" +"----------------------\n" +"If you use nconfig in a xterm window, make sure your TERM environment\n" +"variable specifies a terminal configuration which supports at least\n" +"16 colors. Otherwise nconfig will look rather bad.\n" "\n" -"Other information\n" -"-----------------\n" -"If you use nconfig in an XTERM window make sure you have your\n" -"$TERM variable set to point to a xterm definition which supports color.\n" -"Otherwise, nconfig will look rather bad. nconfig will not\n" -"display correctly in a RXVT window because rxvt displays only one\n" -"intensity of color, bright.\n" +"If the \"stty size\" command reports the current terminalsize correctly,\n" +"nconfig will adapt to sizes larger than the traditional 80x25 \"standard\"\n" +"and display longer menus properly.\n" "\n" -"nconfig will display larger menus on screens or xterms which are\n" -"set to display more than the standard 25 row by 80 column geometry.\n" -"In order for this to work, the \"stty size\" command must be able to\n" -"display the screen's current row and column geometry. I STRONGLY\n" -"RECOMMEND that you make sure you do NOT have the shell variables\n" -"LINES and COLUMNS exported into your environment. Some distributions\n" -"export those variables via /etc/profile. Some ncurses programs can\n" -"become confused when those variables (LINES & COLUMNS) don't reflect\n" -"the true screen size.\n" "\n" -"Optional personality available\n" -"------------------------------\n" -"If you prefer to have all of the options listed in a single menu, rather\n" -"than the default multimenu hierarchy, run the nconfig with NCONFIG_MODE\n" -"environment variable set to single_menu. Example:\n" +"Single menu mode\n" +"----------------\n" +"If you prefer to have all of the menu entries listed in a single menu,\n" +"rather than the default multimenu hierarchy, run nconfig with\n" +"NCONFIG_MODE environment variable set to single_menu. Example:\n" "\n" "make NCONFIG_MODE=single_menu nconfig\n" "\n" -"<Enter> will then unroll the appropriate category, or enfold it if it\n" -"is already unrolled.\n" +"<Enter> will then unfold the appropriate category, or fold it if it\n" +"is already unfolded. Folded menu entries will be designated by a\n" +"leading \"++>\" and unfolded entries by a leading \"-->\".\n" "\n" -"Note that this mode can eventually be a little more CPU expensive\n" -"(especially with a larger number of unrolled categories) than the\n" -"default mode.\n" +"Note that this mode can eventually be a little more CPU expensive than\n" +"the default mode, especially with a larger number of unfolded submenus.\n" "\n"), menu_no_f_instructions[] = N_( -" You do not have function keys support. Please follow the\n" -" following instructions:\n" -" Arrow keys navigate the menu.\n" -" <Enter> or <right-arrow> selects submenus --->.\n" -" Capital Letters are hotkeys.\n" -" Pressing <Y> includes, <N> excludes, <M> modularizes features.\n" -" Pressing SpaceBar toggles between the above options.\n" -" Press <Esc> or <left-arrow> to go back one menu,\n" -" <?> or <h> for Help, </> for Search.\n" -" <1> is interchangeable with <F1>, <2> with <F2>, etc.\n" -" Legend: [*] built-in [ ] excluded <M> module < > module capable.\n" -" <Esc> always leaves the current window.\n"), +"Legend: [*] built-in [ ] excluded <M> module < > module capable.\n" +"Submenus are designated by a trailing \"--->\".\n" +"\n" +"Use the following keys to navigate the menus:\n" +"Move up or down with <Up> and <Down>.\n" +"Enter a submenu with <Enter> or <Right>.\n" +"Exit a submenu to its parent menu with <Esc> or <Left>.\n" +"Pressing <y> includes, <n> excludes, <m> modularizes features.\n" +"Pressing <Space> cycles through the available options.\n" +"To search for menu entries press </>.\n" +"<Esc> always leaves the current window.\n" +"\n" +"You do not have function keys support.\n" +"Press <1> instead of <F1>, <2> instead of <F2>, etc.\n" +"For verbose global help use key <1>.\n" +"For help related to the current menu entry press <?> or <h>.\n"), menu_instructions[] = N_( -" Arrow keys navigate the menu.\n" -" <Enter> or <right-arrow> selects submenus --->.\n" -" Capital Letters are hotkeys.\n" -" Pressing <Y> includes, <N> excludes, <M> modularizes features.\n" -" Pressing SpaceBar toggles between the above options\n" -" Press <Esc>, <F5> or <left-arrow> to go back one menu,\n" -" <?>, <F1> or <h> for Help, </> for Search.\n" -" <1> is interchangeable with <F1>, <2> with <F2>, etc.\n" -" Legend: [*] built-in [ ] excluded <M> module < > module capable.\n" -" <Esc> always leaves the current window\n"), +"Legend: [*] built-in [ ] excluded <M> module < > module capable.\n" +"Submenus are designated by a trailing \"--->\".\n" +"\n" +"Use the following keys to navigate the menus:\n" +"Move up or down with <Up> or <Down>.\n" +"Enter a submenu with <Enter> or <Right>.\n" +"Exit a submenu to its parent menu with <Esc> or <Left>.\n" +"Pressing <y> includes, <n> excludes, <m> modularizes features.\n" +"Pressing <Space> cycles through the available options.\n" +"To search for menu entries press </>.\n" +"<Esc> always leaves the current window.\n" +"\n" +"Pressing <1> may be used instead of <F1>, <2> instead of <F2>, etc.\n" +"For verbose global help press <F1>.\n" +"For help related to the current menu entry press <?> or <h>.\n"), radiolist_instructions[] = N_( -" Use the arrow keys to navigate this window or\n" -" press the hotkey of the item you wish to select\n" -" followed by the <SPACE BAR>.\n" -" Press <?>, <F1> or <h> for additional information about this option.\n"), +"Press <Up>, <Down>, <Home> or <End> to navigate a radiolist, select\n" +"with <Space>.\n" +"For help related to the current entry press <?> or <h>.\n" +"For global help press <F1>.\n"), inputbox_instructions_int[] = N_( "Please enter a decimal value.\n" "Fractions will not be accepted.\n" -"Press <RETURN> to accept, <ESC> to cancel."), +"Press <Enter> to apply, <Esc> to cancel."), inputbox_instructions_hex[] = N_( "Please enter a hexadecimal value.\n" -"Press <RETURN> to accept, <ESC> to cancel."), +"Press <Enter> to apply, <Esc> to cancel."), inputbox_instructions_string[] = N_( "Please enter a string value.\n" -"Press <RETURN> to accept, <ESC> to cancel."), +"Press <Enter> to apply, <Esc> to cancel."), setmod_text[] = N_( -"This feature depends on another which\n" -"has been configured as a module.\n" -"As a result, this feature will be built as a module."), +"This feature depends on another feature which has been configured as a\n" +"module. As a result, the current feature will be built as a module too."), load_config_text[] = N_( "Enter the name of the configuration file you wish to load.\n" -"Accept the name shown to restore the configuration you\n" -"last retrieved. Leave blank to abort."), +"Accept the name shown to restore the configuration you last\n" +"retrieved. Leave empty to abort."), load_config_help[] = N_( -"\n" "For various reasons, one may wish to keep several different\n" "configurations available on a single machine.\n" "\n" "If you have saved a previous configuration in a file other than the\n" -"default one, entering its name here will allow you to modify that\n" -"configuration.\n" +"default one, entering its name here will allow you to load and modify\n" +"that configuration.\n" "\n" -"If you are uncertain, then you have probably never used alternate\n" -"configuration files. You should therefor leave this blank to abort.\n"), +"Leave empty to abort.\n"), save_config_text[] = N_( "Enter a filename to which this configuration should be saved\n" -"as an alternate. Leave blank to abort."), +"as an alternate. Leave empty to abort."), save_config_help[] = N_( -"\n" -"For various reasons, one may wish to keep different configurations\n" -"available on a single machine.\n" +"For various reasons, one may wish to keep several different\n" +"configurations available on a single machine.\n" "\n" "Entering a file name here will allow you to later retrieve, modify\n" "and use the current configuration as an alternate to whatever\n" "configuration options you have selected at that time.\n" "\n" -"If you are uncertain what all this means then you should probably\n" -"leave this blank.\n"), +"Leave empty to abort.\n"), search_help[] = N_( -"\n" -"Search for symbols and display their relations. Regular expressions\n" -"are allowed.\n" -"Example: search for \"^FOO\"\n" +"Search for symbols (configuration variable names CONFIG_*) and display\n" +"their relations. Regular expressions are supported.\n" +"Example: Search for \"^FOO\".\n" "Result:\n" "-----------------------------------------------------------------\n" "Symbol: FOO [ = m]\n" @@ -229,26 +222,26 @@ search_help[] = N_( "Selects: LIBCRC32\n" "Selected by: BAR\n" "-----------------------------------------------------------------\n" -"o The line 'Prompt:' shows the text used in the menu structure for\n" -" this symbol\n" -"o The 'Defined at' line tell at what file / line number the symbol\n" -" is defined\n" -"o The 'Depends on:' line tell what symbols needs to be defined for\n" -" this symbol to be visible in the menu (selectable)\n" -"o The 'Location:' lines tell where in the menu structure this symbol\n" -" is located\n" -" A location followed by a [ = y] indicate that this is a selectable\n" -" menu item - and current value is displayed inside brackets.\n" -"o The 'Selects:' line tell what symbol will be automatically\n" -" selected if this symbol is selected (y or m)\n" -"o The 'Selected by' line tell what symbol has selected this symbol\n" +"o The line 'Prompt:' shows the text displayed for this symbol in\n" +" the menu hierarchy.\n" +"o The 'Defined at' line tells at what file / line number the symbol is\n" +" defined.\n" +"o The 'Depends on:' line lists symbols that need to be defined for\n" +" this symbol to be visible and selectable in the menu.\n" +"o The 'Location:' lines tell, where in the menu structure this symbol\n" +" is located. A location followed by a [ = y] indicates that this is\n" +" a selectable menu item, and the current value is displayed inside\n" +" brackets.\n" +"o The 'Selects:' line tells, what symbol will be automatically selected\n" +" if this symbol is selected (y or m).\n" +"o The 'Selected by' line tells what symbol has selected this symbol.\n" "\n" "Only relevant lines are shown.\n" "\n\n" "Search examples:\n" -"Examples: USB => find all symbols containing USB\n" -" ^USB => find all symbols starting with USB\n" -" USB$ => find all symbols ending with USB\n" +"USB => find all symbols containing USB\n" +"^USB => find all symbols starting with USB\n" +"USB$ => find all symbols ending with USB\n" "\n"); struct mitem { @@ -319,19 +312,19 @@ struct function_keys function_keys[] = { }, { .key_str = "F2", - .func = "Sym Info", + .func = "SymInfo", .key = F_SYMBOL, .handler = handle_f2, }, { .key_str = "F3", - .func = "Insts", + .func = "Help 2", .key = F_INSTS, .handler = handle_f3, }, { .key_str = "F4", - .func = "Config", + .func = "ShowAll", .key = F_CONF, .handler = handle_f4, }, @@ -355,7 +348,7 @@ struct function_keys function_keys[] = { }, { .key_str = "F8", - .func = "Sym Search", + .func = "SymSearch", .key = F_SEARCH, .handler = handle_f8, }, @@ -392,7 +385,7 @@ static void print_function_line(void) static void handle_f1(int *key, struct menu *current_item) { show_scroll_win(main_window, - _("README"), _(nconf_readme)); + _("Global help"), _(nconf_global_help)); return; } @@ -407,7 +400,7 @@ static void handle_f2(int *key, struct menu *current_item) static void handle_f3(int *key, struct menu *current_item) { show_scroll_win(main_window, - _("Instructions"), + _("Short help"), _(current_instructions)); return; } @@ -696,13 +689,18 @@ static void search_conf(void) { struct symbol **sym_arr; struct gstr res; + struct gstr title; char *dialog_input; int dres; + + title = str_new(); + str_printf( &title, _("Enter %s (sub)string to search for " + "(with or without \"%s\")"), CONFIG_, CONFIG_); + again: dres = dialog_inputbox(main_window, _("Search Configuration Parameter"), - _("Enter " CONFIG_ " (sub)string to search for " - "(with or without \"" CONFIG_ "\")"), + str_get(&title), "", &dialog_input_result, &dialog_input_result_len); switch (dres) { case 0: @@ -712,6 +710,7 @@ again: _("Search Configuration"), search_help); goto again; default: + str_free(&title); return; } @@ -726,6 +725,7 @@ again: show_scroll_win(main_window, _("Search Results"), str_get(&res)); str_free(&res); + str_free(&title); } diff --git a/scripts/kconfig/nconf.gui.c b/scripts/kconfig/nconf.gui.c index 379003c7a2b4..9f8c44ecc703 100644 --- a/scripts/kconfig/nconf.gui.c +++ b/scripts/kconfig/nconf.gui.c @@ -48,7 +48,7 @@ static void set_normal_colors(void) init_pair(INPUT_FIELD, -1, -1); init_pair(FUNCTION_HIGHLIGHT, -1, -1); - init_pair(FUNCTION_TEXT, COLOR_BLUE, -1); + init_pair(FUNCTION_TEXT, COLOR_YELLOW, -1); } /* available attributes: diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index df274febb3e5..1500c38f0cca 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -6,6 +6,7 @@ #include <qglobal.h> #if QT_VERSION < 0x040000 +#include <stddef.h> #include <qmainwindow.h> #include <qvbox.h> #include <qvaluelist.h> diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index 22a3c400fc41..ecc5aa5f865d 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -656,11 +656,11 @@ bool sym_set_string_value(struct symbol *sym, const char *newval) size = strlen(newval) + 1; if (sym->type == S_HEX && (newval[0] != '0' || (newval[1] != 'x' && newval[1] != 'X'))) { size += 2; - sym->def[S_DEF_USER].val = val = malloc(size); + sym->def[S_DEF_USER].val = val = xmalloc(size); *val++ = '0'; *val++ = 'x'; } else if (!oldval || strcmp(oldval, newval)) - sym->def[S_DEF_USER].val = val = malloc(size); + sym->def[S_DEF_USER].val = val = xmalloc(size); else return true; @@ -812,7 +812,7 @@ struct symbol *sym_lookup(const char *name, int flags) hash = 0; } - symbol = malloc(sizeof(*symbol)); + symbol = xmalloc(sizeof(*symbol)); memset(symbol, 0, sizeof(*symbol)); symbol->name = new_name; symbol->type = S_UNKNOWN; @@ -863,7 +863,7 @@ const char *sym_expand_string_value(const char *in) size_t reslen; reslen = strlen(in) + 1; - res = malloc(reslen); + res = xmalloc(reslen); res[0] = '\0'; while ((src = strchr(in, '$'))) { @@ -921,7 +921,7 @@ const char *sym_escape_string_value(const char *in) p++; } - res = malloc(reslen); + res = xmalloc(reslen); res[0] = '\0'; strcat(res, "\""); @@ -1228,7 +1228,7 @@ struct property *prop_alloc(enum prop_type type, struct symbol *sym) struct property *prop; struct property **propp; - prop = malloc(sizeof(*prop)); + prop = xmalloc(sizeof(*prop)); memset(prop, 0, sizeof(*prop)); prop->type = type; prop->sym = sym; diff --git a/scripts/kconfig/util.c b/scripts/kconfig/util.c index d0b8b2318e48..6e7fbf196809 100644 --- a/scripts/kconfig/util.c +++ b/scripts/kconfig/util.c @@ -23,7 +23,7 @@ struct file *file_lookup(const char *name) } } - file = malloc(sizeof(*file)); + file = xmalloc(sizeof(*file)); memset(file, 0, sizeof(*file)); file->name = file_name; file->next = file_list; @@ -81,7 +81,7 @@ int file_write_dep(const char *name) struct gstr str_new(void) { struct gstr gs; - gs.s = malloc(sizeof(char) * 64); + gs.s = xmalloc(sizeof(char) * 64); gs.len = 64; gs.max_width = 0; strcpy(gs.s, "\0"); @@ -138,3 +138,22 @@ const char *str_get(struct gstr *gs) return gs->s; } +void *xmalloc(size_t size) +{ + void *p = malloc(size); + if (p) + return p; + fprintf(stderr, "Out of memory.\n"); + exit(1); +} + +void *xcalloc(size_t nmemb, size_t size) +{ + void *p = calloc(nmemb, size); + if (p) + return p; + fprintf(stderr, "Out of memory.\n"); + exit(1); +} + + diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l index 00f9d3a9cf8b..6555a475453b 100644 --- a/scripts/kconfig/zconf.l +++ b/scripts/kconfig/zconf.l @@ -40,7 +40,7 @@ static void zconf_endfile(void); static void new_string(void) { - text = malloc(START_STRSIZE); + text = xmalloc(START_STRSIZE); text_asize = START_STRSIZE; text_size = 0; *text = 0; @@ -62,7 +62,7 @@ static void append_string(const char *str, int size) static void alloc_string(const char *str, int size) { - text = malloc(size + 1); + text = xmalloc(size + 1); memcpy(text, str, size); text[size] = 0; } @@ -288,7 +288,7 @@ void zconf_initscan(const char *name) exit(1); } - current_buf = malloc(sizeof(*current_buf)); + current_buf = xmalloc(sizeof(*current_buf)); memset(current_buf, 0, sizeof(*current_buf)); current_file = file_lookup(name); @@ -299,7 +299,7 @@ void zconf_nextfile(const char *name) { struct file *iter; struct file *file = file_lookup(name); - struct buffer *buf = malloc(sizeof(*buf)); + struct buffer *buf = xmalloc(sizeof(*buf)); memset(buf, 0, sizeof(*buf)); current_buf->state = YY_CURRENT_BUFFER; diff --git a/scripts/kconfig/zconf.lex.c_shipped b/scripts/kconfig/zconf.lex.c_shipped index c32b1a49f5a3..a0521aa5974b 100644 --- a/scripts/kconfig/zconf.lex.c_shipped +++ b/scripts/kconfig/zconf.lex.c_shipped @@ -802,7 +802,7 @@ static void zconf_endfile(void); static void new_string(void) { - text = malloc(START_STRSIZE); + text = xmalloc(START_STRSIZE); text_asize = START_STRSIZE; text_size = 0; *text = 0; @@ -824,7 +824,7 @@ static void append_string(const char *str, int size) static void alloc_string(const char *str, int size) { - text = malloc(size + 1); + text = xmalloc(size + 1); memcpy(text, str, size); text[size] = 0; } @@ -2343,7 +2343,7 @@ void zconf_initscan(const char *name) exit(1); } - current_buf = malloc(sizeof(*current_buf)); + current_buf = xmalloc(sizeof(*current_buf)); memset(current_buf, 0, sizeof(*current_buf)); current_file = file_lookup(name); @@ -2354,7 +2354,7 @@ void zconf_nextfile(const char *name) { struct file *iter; struct file *file = file_lookup(name); - struct buffer *buf = malloc(sizeof(*buf)); + struct buffer *buf = xmalloc(sizeof(*buf)); memset(buf, 0, sizeof(*buf)); current_buf->state = YY_CURRENT_BUFFER; diff --git a/scripts/kernel-doc b/scripts/kernel-doc index f565536a2bef..4305b2f2ec5e 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1750,7 +1750,7 @@ sub dump_struct($$) { # strip kmemcheck_bitfield_{begin,end}.*; $members =~ s/kmemcheck_bitfield_.*?;//gos; # strip attributes - $members =~ s/__aligned\s*\(\d+\)//gos; + $members =~ s/__aligned\s*\(.+\)//gos; create_parameterlist($members, ';', $file); check_sections($file, $declaration_name, "struct", $sectcheck, $struct_actual, $nested); diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index b3d907eb93a9..3d569d6022c2 100644 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -132,7 +132,14 @@ if [ "$1" = "clean" ]; then fi # We need access to CONFIG_ symbols -. ./.config +case "${KCONFIG_CONFIG}" in +*/*) + . "${KCONFIG_CONFIG}" + ;; +*) + # Force using a file from the current directory + . "./${KCONFIG_CONFIG}" +esac #link vmlinux.o info LD vmlinux.o diff --git a/scripts/mod/.gitignore b/scripts/mod/.gitignore index e9b7abe7b95b..33bae0df4de5 100644 --- a/scripts/mod/.gitignore +++ b/scripts/mod/.gitignore @@ -1,4 +1,5 @@ elfconfig.h mk_elfconfig modpost +devicetable-offsets.h diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile index ff954f8168c1..9415b5663364 100644 --- a/scripts/mod/Makefile +++ b/scripts/mod/Makefile @@ -3,9 +3,44 @@ always := $(hostprogs-y) empty.o modpost-objs := modpost.o file2alias.o sumversion.o +devicetable-offsets-file := devicetable-offsets.h + +define sed-y + "/^->/{s:->#\(.*\):/* \1 */:; \ + s:^->\([^ ]*\) [\$$#]*\([-0-9]*\) \(.*\):#define \1 \2 /* \3 */:; \ + s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \ + s:->::; p;}" +endef + +quiet_cmd_offsets = GEN $@ +define cmd_offsets + (set -e; \ + echo "#ifndef __DEVICEVTABLE_OFFSETS_H__"; \ + echo "#define __DEVICEVTABLE_OFFSETS_H__"; \ + echo "/*"; \ + echo " * DO NOT MODIFY."; \ + echo " *"; \ + echo " * This file was generated by Kbuild"; \ + echo " *"; \ + echo " */"; \ + echo ""; \ + sed -ne $(sed-y) $<; \ + echo ""; \ + echo "#endif" ) > $@ +endef + +# We use internal kbuild rules to avoid the "is up to date" message from make +scripts/mod/devicetable-offsets.s: scripts/mod/devicetable-offsets.c FORCE + $(Q)mkdir -p $(dir $@) + $(call if_changed_dep,cc_s_c) + +$(obj)/$(devicetable-offsets-file): scripts/mod/devicetable-offsets.s + $(call cmd,offsets) + # dependencies on generated files need to be listed explicitly $(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h +$(obj)/file2alias.o: $(obj)/$(devicetable-offsets-file) quiet_cmd_elfconfig = MKELF $@ cmd_elfconfig = $(obj)/mk_elfconfig < $< > $@ diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c new file mode 100644 index 000000000000..b45260bfeaa0 --- /dev/null +++ b/scripts/mod/devicetable-offsets.c @@ -0,0 +1,178 @@ +#include <linux/kbuild.h> +#include <linux/mod_devicetable.h> + +#define DEVID(devid) DEFINE(SIZE_##devid, sizeof(struct devid)) +#define DEVID_FIELD(devid, field) \ + DEFINE(OFF_##devid##_##field, offsetof(struct devid, field)) + +int main(void) +{ + DEVID(usb_device_id); + DEVID_FIELD(usb_device_id, match_flags); + DEVID_FIELD(usb_device_id, idVendor); + DEVID_FIELD(usb_device_id, idProduct); + DEVID_FIELD(usb_device_id, bcdDevice_lo); + DEVID_FIELD(usb_device_id, bcdDevice_hi); + DEVID_FIELD(usb_device_id, bDeviceClass); + DEVID_FIELD(usb_device_id, bDeviceSubClass); + DEVID_FIELD(usb_device_id, bDeviceProtocol); + DEVID_FIELD(usb_device_id, bInterfaceClass); + DEVID_FIELD(usb_device_id, bInterfaceSubClass); + DEVID_FIELD(usb_device_id, bInterfaceProtocol); + DEVID_FIELD(usb_device_id, bInterfaceNumber); + + DEVID(hid_device_id); + DEVID_FIELD(hid_device_id, bus); + DEVID_FIELD(hid_device_id, group); + DEVID_FIELD(hid_device_id, vendor); + DEVID_FIELD(hid_device_id, product); + + DEVID(ieee1394_device_id); + DEVID_FIELD(ieee1394_device_id, match_flags); + DEVID_FIELD(ieee1394_device_id, vendor_id); + DEVID_FIELD(ieee1394_device_id, model_id); + DEVID_FIELD(ieee1394_device_id, specifier_id); + DEVID_FIELD(ieee1394_device_id, version); + + DEVID(pci_device_id); + DEVID_FIELD(pci_device_id, vendor); + DEVID_FIELD(pci_device_id, device); + DEVID_FIELD(pci_device_id, subvendor); + DEVID_FIELD(pci_device_id, subdevice); + DEVID_FIELD(pci_device_id, class); + DEVID_FIELD(pci_device_id, class_mask); + + DEVID(ccw_device_id); + DEVID_FIELD(ccw_device_id, match_flags); + DEVID_FIELD(ccw_device_id, cu_type); + DEVID_FIELD(ccw_device_id, cu_model); + DEVID_FIELD(ccw_device_id, dev_type); + DEVID_FIELD(ccw_device_id, dev_model); + + DEVID(ap_device_id); + DEVID_FIELD(ap_device_id, dev_type); + + DEVID(css_device_id); + DEVID_FIELD(css_device_id, type); + + DEVID(serio_device_id); + DEVID_FIELD(serio_device_id, type); + DEVID_FIELD(serio_device_id, proto); + DEVID_FIELD(serio_device_id, id); + DEVID_FIELD(serio_device_id, extra); + + DEVID(acpi_device_id); + DEVID_FIELD(acpi_device_id, id); + + DEVID(pnp_device_id); + DEVID_FIELD(pnp_device_id, id); + + DEVID(pnp_card_device_id); + DEVID_FIELD(pnp_card_device_id, devs); + + DEVID(pcmcia_device_id); + DEVID_FIELD(pcmcia_device_id, match_flags); + DEVID_FIELD(pcmcia_device_id, manf_id); + DEVID_FIELD(pcmcia_device_id, card_id); + DEVID_FIELD(pcmcia_device_id, func_id); + DEVID_FIELD(pcmcia_device_id, function); + DEVID_FIELD(pcmcia_device_id, device_no); + DEVID_FIELD(pcmcia_device_id, prod_id_hash); + + DEVID(of_device_id); + DEVID_FIELD(of_device_id, name); + DEVID_FIELD(of_device_id, type); + DEVID_FIELD(of_device_id, compatible); + + DEVID(vio_device_id); + DEVID_FIELD(vio_device_id, type); + DEVID_FIELD(vio_device_id, compat); + + DEVID(input_device_id); + DEVID_FIELD(input_device_id, flags); + DEVID_FIELD(input_device_id, bustype); + DEVID_FIELD(input_device_id, vendor); + DEVID_FIELD(input_device_id, product); + DEVID_FIELD(input_device_id, version); + DEVID_FIELD(input_device_id, evbit); + DEVID_FIELD(input_device_id, keybit); + DEVID_FIELD(input_device_id, relbit); + DEVID_FIELD(input_device_id, absbit); + DEVID_FIELD(input_device_id, mscbit); + DEVID_FIELD(input_device_id, ledbit); + DEVID_FIELD(input_device_id, sndbit); + DEVID_FIELD(input_device_id, ffbit); + DEVID_FIELD(input_device_id, swbit); + + DEVID(eisa_device_id); + DEVID_FIELD(eisa_device_id, sig); + + DEVID(parisc_device_id); + DEVID_FIELD(parisc_device_id, hw_type); + DEVID_FIELD(parisc_device_id, hversion); + DEVID_FIELD(parisc_device_id, hversion_rev); + DEVID_FIELD(parisc_device_id, sversion); + + DEVID(sdio_device_id); + DEVID_FIELD(sdio_device_id, class); + DEVID_FIELD(sdio_device_id, vendor); + DEVID_FIELD(sdio_device_id, device); + + DEVID(ssb_device_id); + DEVID_FIELD(ssb_device_id, vendor); + DEVID_FIELD(ssb_device_id, coreid); + DEVID_FIELD(ssb_device_id, revision); + + DEVID(bcma_device_id); + DEVID_FIELD(bcma_device_id, manuf); + DEVID_FIELD(bcma_device_id, id); + DEVID_FIELD(bcma_device_id, rev); + DEVID_FIELD(bcma_device_id, class); + + DEVID(virtio_device_id); + DEVID_FIELD(virtio_device_id, device); + DEVID_FIELD(virtio_device_id, vendor); + + DEVID(hv_vmbus_device_id); + DEVID_FIELD(hv_vmbus_device_id, guid); + + DEVID(i2c_device_id); + DEVID_FIELD(i2c_device_id, name); + + DEVID(spi_device_id); + DEVID_FIELD(spi_device_id, name); + + DEVID(dmi_system_id); + DEVID_FIELD(dmi_system_id, matches); + + DEVID(platform_device_id); + DEVID_FIELD(platform_device_id, name); + + DEVID(mdio_device_id); + DEVID_FIELD(mdio_device_id, phy_id); + DEVID_FIELD(mdio_device_id, phy_id_mask); + + DEVID(zorro_device_id); + DEVID_FIELD(zorro_device_id, id); + + DEVID(isapnp_device_id); + DEVID_FIELD(isapnp_device_id, vendor); + DEVID_FIELD(isapnp_device_id, function); + + DEVID(ipack_device_id); + DEVID_FIELD(ipack_device_id, format); + DEVID_FIELD(ipack_device_id, vendor); + DEVID_FIELD(ipack_device_id, device); + + DEVID(amba_id); + DEVID_FIELD(amba_id, id); + DEVID_FIELD(amba_id, mask); + + DEVID(x86_cpu_id); + DEVID_FIELD(x86_cpu_id, feature); + DEVID_FIELD(x86_cpu_id, family); + DEVID_FIELD(x86_cpu_id, model); + DEVID_FIELD(x86_cpu_id, vendor); + + return 0; +} diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index df4fc23dd836..771ac17f635d 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -11,6 +11,7 @@ */ #include "modpost.h" +#include "devicetable-offsets.h" /* We use the ELF typedefs for kernel_ulong_t but bite the bullet and * use either stdint.h or inttypes.h for the rest. */ @@ -84,13 +85,25 @@ extern struct devtable *__start___devtable[], *__stop___devtable[]; # define __used __attribute__((__used__)) #endif +/* Define a variable f that holds the value of field f of struct devid + * based at address m. + */ +#define DEF_FIELD(m, devid, f) \ + typeof(((struct devid *)0)->f) f = TO_NATIVE(*(typeof(f) *)((m) + OFF_##devid##_##f)) +/* Define a variable f that holds the address of field f of struct devid + * based at address m. Due to the way typeof works, for a field of type + * T[N] the variable has type T(*)[N], _not_ T*. + */ +#define DEF_FIELD_ADDR(m, devid, f) \ + typeof(((struct devid *)0)->f) *f = ((m) + OFF_##devid##_##f) + /* Add a table entry. We test function type matches while we're here. */ #define ADD_TO_DEVTABLE(device_id, type, function) \ static struct devtable __cat(devtable,__LINE__) = { \ device_id + 0*sizeof((function)((const char *)NULL, \ - (type *)NULL, \ + (void *)NULL, \ (char *)NULL)), \ - sizeof(type), (function) }; \ + SIZE_##type, (function) }; \ static struct devtable *SECTION(__devtable) __used \ __cat(devtable_ptr,__LINE__) = &__cat(devtable,__LINE__) @@ -116,7 +129,6 @@ static inline void add_wildcard(char *str) strcat(str + len, "*"); } -unsigned int cross_build = 0; /** * Check that sizeof(device_id type) are consistent with size of section * in .o file. If in-consistent then userspace and kernel does not agree @@ -131,8 +143,6 @@ static void device_id_check(const char *modname, const char *device_id, int i; if (size % id_size || size < id_size) { - if (cross_build != 0) - return; fatal("%s: sizeof(struct %s_device_id)=%lu is not a modulo " "of the size of section __mod_%s_device_table=%lu.\n" "Fix definition of struct %s_device_id " @@ -157,17 +167,29 @@ static void device_id_check(const char *modname, const char *device_id, /* USB is special because the bcdDevice can be matched against a numeric range */ /* Looks like "usb:vNpNdNdcNdscNdpNicNiscNipNinN" */ -static void do_usb_entry(struct usb_device_id *id, +static void do_usb_entry(void *symval, unsigned int bcdDevice_initial, int bcdDevice_initial_digits, unsigned char range_lo, unsigned char range_hi, unsigned char max, struct module *mod) { char alias[500]; + DEF_FIELD(symval, usb_device_id, match_flags); + DEF_FIELD(symval, usb_device_id, idVendor); + DEF_FIELD(symval, usb_device_id, idProduct); + DEF_FIELD(symval, usb_device_id, bcdDevice_lo); + DEF_FIELD(symval, usb_device_id, bDeviceClass); + DEF_FIELD(symval, usb_device_id, bDeviceSubClass); + DEF_FIELD(symval, usb_device_id, bDeviceProtocol); + DEF_FIELD(symval, usb_device_id, bInterfaceClass); + DEF_FIELD(symval, usb_device_id, bInterfaceSubClass); + DEF_FIELD(symval, usb_device_id, bInterfaceProtocol); + DEF_FIELD(symval, usb_device_id, bInterfaceNumber); + strcpy(alias, "usb:"); - ADD(alias, "v", id->match_flags&USB_DEVICE_ID_MATCH_VENDOR, - id->idVendor); - ADD(alias, "p", id->match_flags&USB_DEVICE_ID_MATCH_PRODUCT, - id->idProduct); + ADD(alias, "v", match_flags&USB_DEVICE_ID_MATCH_VENDOR, + idVendor); + ADD(alias, "p", match_flags&USB_DEVICE_ID_MATCH_PRODUCT, + idProduct); strcat(alias, "d"); if (bcdDevice_initial_digits) @@ -190,29 +212,23 @@ static void do_usb_entry(struct usb_device_id *id, range_lo); } } - if (bcdDevice_initial_digits < (sizeof(id->bcdDevice_lo) * 2 - 1)) + if (bcdDevice_initial_digits < (sizeof(bcdDevice_lo) * 2 - 1)) strcat(alias, "*"); - ADD(alias, "dc", id->match_flags&USB_DEVICE_ID_MATCH_DEV_CLASS, - id->bDeviceClass); - ADD(alias, "dsc", - id->match_flags&USB_DEVICE_ID_MATCH_DEV_SUBCLASS, - id->bDeviceSubClass); - ADD(alias, "dp", - id->match_flags&USB_DEVICE_ID_MATCH_DEV_PROTOCOL, - id->bDeviceProtocol); - ADD(alias, "ic", - id->match_flags&USB_DEVICE_ID_MATCH_INT_CLASS, - id->bInterfaceClass); - ADD(alias, "isc", - id->match_flags&USB_DEVICE_ID_MATCH_INT_SUBCLASS, - id->bInterfaceSubClass); - ADD(alias, "ip", - id->match_flags&USB_DEVICE_ID_MATCH_INT_PROTOCOL, - id->bInterfaceProtocol); - ADD(alias, "in", - id->match_flags&USB_DEVICE_ID_MATCH_INT_NUMBER, - id->bInterfaceNumber); + ADD(alias, "dc", match_flags&USB_DEVICE_ID_MATCH_DEV_CLASS, + bDeviceClass); + ADD(alias, "dsc", match_flags&USB_DEVICE_ID_MATCH_DEV_SUBCLASS, + bDeviceSubClass); + ADD(alias, "dp", match_flags&USB_DEVICE_ID_MATCH_DEV_PROTOCOL, + bDeviceProtocol); + ADD(alias, "ic", match_flags&USB_DEVICE_ID_MATCH_INT_CLASS, + bInterfaceClass); + ADD(alias, "isc", match_flags&USB_DEVICE_ID_MATCH_INT_SUBCLASS, + bInterfaceSubClass); + ADD(alias, "ip", match_flags&USB_DEVICE_ID_MATCH_INT_PROTOCOL, + bInterfaceProtocol); + ADD(alias, "in", match_flags&USB_DEVICE_ID_MATCH_INT_NUMBER, + bInterfaceNumber); add_wildcard(alias); buf_printf(&mod->dev_table_buf, @@ -258,24 +274,28 @@ static unsigned int incbcd(unsigned int *bcd, return init; } -static void do_usb_entry_multi(struct usb_device_id *id, struct module *mod) +static void do_usb_entry_multi(void *symval, struct module *mod) { unsigned int devlo, devhi; unsigned char chi, clo, max; int ndigits; - id->match_flags = TO_NATIVE(id->match_flags); - id->idVendor = TO_NATIVE(id->idVendor); - id->idProduct = TO_NATIVE(id->idProduct); + DEF_FIELD(symval, usb_device_id, match_flags); + DEF_FIELD(symval, usb_device_id, idVendor); + DEF_FIELD(symval, usb_device_id, idProduct); + DEF_FIELD(symval, usb_device_id, bcdDevice_lo); + DEF_FIELD(symval, usb_device_id, bcdDevice_hi); + DEF_FIELD(symval, usb_device_id, bDeviceClass); + DEF_FIELD(symval, usb_device_id, bInterfaceClass); - devlo = id->match_flags & USB_DEVICE_ID_MATCH_DEV_LO ? - TO_NATIVE(id->bcdDevice_lo) : 0x0U; - devhi = id->match_flags & USB_DEVICE_ID_MATCH_DEV_HI ? - TO_NATIVE(id->bcdDevice_hi) : ~0x0U; + devlo = match_flags & USB_DEVICE_ID_MATCH_DEV_LO ? + bcdDevice_lo : 0x0U; + devhi = match_flags & USB_DEVICE_ID_MATCH_DEV_HI ? + bcdDevice_hi : ~0x0U; /* Figure out if this entry is in bcd or hex format */ max = 0x9; /* Default to decimal format */ - for (ndigits = 0 ; ndigits < sizeof(id->bcdDevice_lo) * 2 ; ndigits++) { + for (ndigits = 0 ; ndigits < sizeof(bcdDevice_lo) * 2 ; ndigits++) { clo = (devlo >> (ndigits << 2)) & 0xf; chi = ((devhi > 0x9999 ? 0x9999 : devhi) >> (ndigits << 2)) & 0xf; if (clo > max || chi > max) { @@ -288,11 +308,11 @@ static void do_usb_entry_multi(struct usb_device_id *id, struct module *mod) * Some modules (visor) have empty slots as placeholder for * run-time specification that results in catch-all alias */ - if (!(id->idVendor | id->idProduct | id->bDeviceClass | id->bInterfaceClass)) + if (!(idVendor | idProduct | bDeviceClass | bInterfaceClass)) return; /* Convert numeric bcdDevice range into fnmatch-able pattern(s) */ - for (ndigits = sizeof(id->bcdDevice_lo) * 2 - 1; devlo <= devhi; ndigits--) { + for (ndigits = sizeof(bcdDevice_lo) * 2 - 1; devlo <= devhi; ndigits--) { clo = devlo & 0xf; chi = devhi & 0xf; if (chi > max) /* If we are in bcd mode, truncate if necessary */ @@ -301,20 +321,20 @@ static void do_usb_entry_multi(struct usb_device_id *id, struct module *mod) devhi >>= 4; if (devlo == devhi || !ndigits) { - do_usb_entry(id, devlo, ndigits, clo, chi, max, mod); + do_usb_entry(symval, devlo, ndigits, clo, chi, max, mod); break; } if (clo > 0x0) - do_usb_entry(id, + do_usb_entry(symval, incbcd(&devlo, 1, max, - sizeof(id->bcdDevice_lo) * 2), + sizeof(bcdDevice_lo) * 2), ndigits, clo, max, max, mod); if (chi < max) - do_usb_entry(id, + do_usb_entry(symval, incbcd(&devhi, -1, max, - sizeof(id->bcdDevice_lo) * 2), + sizeof(bcdDevice_lo) * 2), ndigits, 0x0, chi, max, mod); } } @@ -323,7 +343,7 @@ static void do_usb_table(void *symval, unsigned long size, struct module *mod) { unsigned int i; - const unsigned long id_size = sizeof(struct usb_device_id); + const unsigned long id_size = SIZE_usb_device_id; device_id_check(mod->name, "usb", size, id_size, symval); @@ -336,81 +356,81 @@ static void do_usb_table(void *symval, unsigned long size, /* Looks like: hid:bNvNpN */ static int do_hid_entry(const char *filename, - struct hid_device_id *id, char *alias) + void *symval, char *alias) { - id->bus = TO_NATIVE(id->bus); - id->group = TO_NATIVE(id->group); - id->vendor = TO_NATIVE(id->vendor); - id->product = TO_NATIVE(id->product); + DEF_FIELD(symval, hid_device_id, bus); + DEF_FIELD(symval, hid_device_id, group); + DEF_FIELD(symval, hid_device_id, vendor); + DEF_FIELD(symval, hid_device_id, product); sprintf(alias, "hid:"); - ADD(alias, "b", id->bus != HID_BUS_ANY, id->bus); - ADD(alias, "g", id->group != HID_GROUP_ANY, id->group); - ADD(alias, "v", id->vendor != HID_ANY_ID, id->vendor); - ADD(alias, "p", id->product != HID_ANY_ID, id->product); + ADD(alias, "b", bus != HID_BUS_ANY, bus); + ADD(alias, "g", group != HID_GROUP_ANY, group); + ADD(alias, "v", vendor != HID_ANY_ID, vendor); + ADD(alias, "p", product != HID_ANY_ID, product); return 1; } -ADD_TO_DEVTABLE("hid", struct hid_device_id, do_hid_entry); +ADD_TO_DEVTABLE("hid", hid_device_id, do_hid_entry); /* Looks like: ieee1394:venNmoNspNverN */ static int do_ieee1394_entry(const char *filename, - struct ieee1394_device_id *id, char *alias) + void *symval, char *alias) { - id->match_flags = TO_NATIVE(id->match_flags); - id->vendor_id = TO_NATIVE(id->vendor_id); - id->model_id = TO_NATIVE(id->model_id); - id->specifier_id = TO_NATIVE(id->specifier_id); - id->version = TO_NATIVE(id->version); + DEF_FIELD(symval, ieee1394_device_id, match_flags); + DEF_FIELD(symval, ieee1394_device_id, vendor_id); + DEF_FIELD(symval, ieee1394_device_id, model_id); + DEF_FIELD(symval, ieee1394_device_id, specifier_id); + DEF_FIELD(symval, ieee1394_device_id, version); strcpy(alias, "ieee1394:"); - ADD(alias, "ven", id->match_flags & IEEE1394_MATCH_VENDOR_ID, - id->vendor_id); - ADD(alias, "mo", id->match_flags & IEEE1394_MATCH_MODEL_ID, - id->model_id); - ADD(alias, "sp", id->match_flags & IEEE1394_MATCH_SPECIFIER_ID, - id->specifier_id); - ADD(alias, "ver", id->match_flags & IEEE1394_MATCH_VERSION, - id->version); + ADD(alias, "ven", match_flags & IEEE1394_MATCH_VENDOR_ID, + vendor_id); + ADD(alias, "mo", match_flags & IEEE1394_MATCH_MODEL_ID, + model_id); + ADD(alias, "sp", match_flags & IEEE1394_MATCH_SPECIFIER_ID, + specifier_id); + ADD(alias, "ver", match_flags & IEEE1394_MATCH_VERSION, + version); add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("ieee1394", struct ieee1394_device_id, do_ieee1394_entry); +ADD_TO_DEVTABLE("ieee1394", ieee1394_device_id, do_ieee1394_entry); /* Looks like: pci:vNdNsvNsdNbcNscNiN. */ static int do_pci_entry(const char *filename, - struct pci_device_id *id, char *alias) + void *symval, char *alias) { /* Class field can be divided into these three. */ unsigned char baseclass, subclass, interface, baseclass_mask, subclass_mask, interface_mask; - id->vendor = TO_NATIVE(id->vendor); - id->device = TO_NATIVE(id->device); - id->subvendor = TO_NATIVE(id->subvendor); - id->subdevice = TO_NATIVE(id->subdevice); - id->class = TO_NATIVE(id->class); - id->class_mask = TO_NATIVE(id->class_mask); + DEF_FIELD(symval, pci_device_id, vendor); + DEF_FIELD(symval, pci_device_id, device); + DEF_FIELD(symval, pci_device_id, subvendor); + DEF_FIELD(symval, pci_device_id, subdevice); + DEF_FIELD(symval, pci_device_id, class); + DEF_FIELD(symval, pci_device_id, class_mask); strcpy(alias, "pci:"); - ADD(alias, "v", id->vendor != PCI_ANY_ID, id->vendor); - ADD(alias, "d", id->device != PCI_ANY_ID, id->device); - ADD(alias, "sv", id->subvendor != PCI_ANY_ID, id->subvendor); - ADD(alias, "sd", id->subdevice != PCI_ANY_ID, id->subdevice); - - baseclass = (id->class) >> 16; - baseclass_mask = (id->class_mask) >> 16; - subclass = (id->class) >> 8; - subclass_mask = (id->class_mask) >> 8; - interface = id->class; - interface_mask = id->class_mask; + ADD(alias, "v", vendor != PCI_ANY_ID, vendor); + ADD(alias, "d", device != PCI_ANY_ID, device); + ADD(alias, "sv", subvendor != PCI_ANY_ID, subvendor); + ADD(alias, "sd", subdevice != PCI_ANY_ID, subdevice); + + baseclass = (class) >> 16; + baseclass_mask = (class_mask) >> 16; + subclass = (class) >> 8; + subclass_mask = (class_mask) >> 8; + interface = class; + interface_mask = class_mask; if ((baseclass_mask != 0 && baseclass_mask != 0xFF) || (subclass_mask != 0 && subclass_mask != 0xFF) || (interface_mask != 0 && interface_mask != 0xFF)) { warn("Can't handle masks in %s:%04X\n", - filename, id->class_mask); + filename, class_mask); return 0; } @@ -420,101 +440,105 @@ static int do_pci_entry(const char *filename, add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("pci", struct pci_device_id, do_pci_entry); +ADD_TO_DEVTABLE("pci", pci_device_id, do_pci_entry); /* looks like: "ccw:tNmNdtNdmN" */ static int do_ccw_entry(const char *filename, - struct ccw_device_id *id, char *alias) + void *symval, char *alias) { - id->match_flags = TO_NATIVE(id->match_flags); - id->cu_type = TO_NATIVE(id->cu_type); - id->cu_model = TO_NATIVE(id->cu_model); - id->dev_type = TO_NATIVE(id->dev_type); - id->dev_model = TO_NATIVE(id->dev_model); + DEF_FIELD(symval, ccw_device_id, match_flags); + DEF_FIELD(symval, ccw_device_id, cu_type); + DEF_FIELD(symval, ccw_device_id, cu_model); + DEF_FIELD(symval, ccw_device_id, dev_type); + DEF_FIELD(symval, ccw_device_id, dev_model); strcpy(alias, "ccw:"); - ADD(alias, "t", id->match_flags&CCW_DEVICE_ID_MATCH_CU_TYPE, - id->cu_type); - ADD(alias, "m", id->match_flags&CCW_DEVICE_ID_MATCH_CU_MODEL, - id->cu_model); - ADD(alias, "dt", id->match_flags&CCW_DEVICE_ID_MATCH_DEVICE_TYPE, - id->dev_type); - ADD(alias, "dm", id->match_flags&CCW_DEVICE_ID_MATCH_DEVICE_MODEL, - id->dev_model); + ADD(alias, "t", match_flags&CCW_DEVICE_ID_MATCH_CU_TYPE, + cu_type); + ADD(alias, "m", match_flags&CCW_DEVICE_ID_MATCH_CU_MODEL, + cu_model); + ADD(alias, "dt", match_flags&CCW_DEVICE_ID_MATCH_DEVICE_TYPE, + dev_type); + ADD(alias, "dm", match_flags&CCW_DEVICE_ID_MATCH_DEVICE_MODEL, + dev_model); add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("ccw", struct ccw_device_id, do_ccw_entry); +ADD_TO_DEVTABLE("ccw", ccw_device_id, do_ccw_entry); /* looks like: "ap:tN" */ static int do_ap_entry(const char *filename, - struct ap_device_id *id, char *alias) + void *symval, char *alias) { - sprintf(alias, "ap:t%02X*", id->dev_type); + DEF_FIELD(symval, ap_device_id, dev_type); + + sprintf(alias, "ap:t%02X*", dev_type); return 1; } -ADD_TO_DEVTABLE("ap", struct ap_device_id, do_ap_entry); +ADD_TO_DEVTABLE("ap", ap_device_id, do_ap_entry); /* looks like: "css:tN" */ static int do_css_entry(const char *filename, - struct css_device_id *id, char *alias) + void *symval, char *alias) { - sprintf(alias, "css:t%01X", id->type); + DEF_FIELD(symval, css_device_id, type); + + sprintf(alias, "css:t%01X", type); return 1; } -ADD_TO_DEVTABLE("css", struct css_device_id, do_css_entry); +ADD_TO_DEVTABLE("css", css_device_id, do_css_entry); /* Looks like: "serio:tyNprNidNexN" */ static int do_serio_entry(const char *filename, - struct serio_device_id *id, char *alias) + void *symval, char *alias) { - id->type = TO_NATIVE(id->type); - id->proto = TO_NATIVE(id->proto); - id->id = TO_NATIVE(id->id); - id->extra = TO_NATIVE(id->extra); + DEF_FIELD(symval, serio_device_id, type); + DEF_FIELD(symval, serio_device_id, proto); + DEF_FIELD(symval, serio_device_id, id); + DEF_FIELD(symval, serio_device_id, extra); strcpy(alias, "serio:"); - ADD(alias, "ty", id->type != SERIO_ANY, id->type); - ADD(alias, "pr", id->proto != SERIO_ANY, id->proto); - ADD(alias, "id", id->id != SERIO_ANY, id->id); - ADD(alias, "ex", id->extra != SERIO_ANY, id->extra); + ADD(alias, "ty", type != SERIO_ANY, type); + ADD(alias, "pr", proto != SERIO_ANY, proto); + ADD(alias, "id", id != SERIO_ANY, id); + ADD(alias, "ex", extra != SERIO_ANY, extra); add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("serio", struct serio_device_id, do_serio_entry); +ADD_TO_DEVTABLE("serio", serio_device_id, do_serio_entry); /* looks like: "acpi:ACPI0003 or acpi:PNP0C0B" or "acpi:LNXVIDEO" */ static int do_acpi_entry(const char *filename, - struct acpi_device_id *id, char *alias) + void *symval, char *alias) { - sprintf(alias, "acpi*:%s:*", id->id); + DEF_FIELD_ADDR(symval, acpi_device_id, id); + sprintf(alias, "acpi*:%s:*", *id); return 1; } -ADD_TO_DEVTABLE("acpi", struct acpi_device_id, do_acpi_entry); +ADD_TO_DEVTABLE("acpi", acpi_device_id, do_acpi_entry); /* looks like: "pnp:dD" */ static void do_pnp_device_entry(void *symval, unsigned long size, struct module *mod) { - const unsigned long id_size = sizeof(struct pnp_device_id); + const unsigned long id_size = SIZE_pnp_device_id; const unsigned int count = (size / id_size)-1; - const struct pnp_device_id *devs = symval; unsigned int i; device_id_check(mod->name, "pnp", size, id_size, symval); for (i = 0; i < count; i++) { - const char *id = (char *)devs[i].id; - char acpi_id[sizeof(devs[0].id)]; + DEF_FIELD_ADDR(symval + i*id_size, pnp_device_id, id); + char acpi_id[sizeof(*id)]; int j; buf_printf(&mod->dev_table_buf, - "MODULE_ALIAS(\"pnp:d%s*\");\n", id); + "MODULE_ALIAS(\"pnp:d%s*\");\n", *id); /* fix broken pnp bus lowercasing */ for (j = 0; j < sizeof(acpi_id); j++) - acpi_id[j] = toupper(id[j]); + acpi_id[j] = toupper((*id)[j]); buf_printf(&mod->dev_table_buf, "MODULE_ALIAS(\"acpi*:%s:*\");\n", acpi_id); } @@ -524,19 +548,18 @@ static void do_pnp_device_entry(void *symval, unsigned long size, static void do_pnp_card_entries(void *symval, unsigned long size, struct module *mod) { - const unsigned long id_size = sizeof(struct pnp_card_device_id); + const unsigned long id_size = SIZE_pnp_card_device_id; const unsigned int count = (size / id_size)-1; - const struct pnp_card_device_id *cards = symval; unsigned int i; device_id_check(mod->name, "pnp", size, id_size, symval); for (i = 0; i < count; i++) { unsigned int j; - const struct pnp_card_device_id *card = &cards[i]; + DEF_FIELD_ADDR(symval + i*id_size, pnp_card_device_id, devs); for (j = 0; j < PNP_MAX_DEVICES; j++) { - const char *id = (char *)card->devs[j].id; + const char *id = (char *)(*devs)[j].id; int i2, j2; int dup = 0; @@ -545,10 +568,10 @@ static void do_pnp_card_entries(void *symval, unsigned long size, /* find duplicate, already added value */ for (i2 = 0; i2 < i && !dup; i2++) { - const struct pnp_card_device_id *card2 = &cards[i2]; + DEF_FIELD_ADDR(symval + i2*id_size, pnp_card_device_id, devs); for (j2 = 0; j2 < PNP_MAX_DEVICES; j2++) { - const char *id2 = (char *)card2->devs[j2].id; + const char *id2 = (char *)(*devs)[j2].id; if (!id2[0]) break; @@ -562,7 +585,7 @@ static void do_pnp_card_entries(void *symval, unsigned long size, /* add an individual alias for every device entry */ if (!dup) { - char acpi_id[sizeof(card->devs[0].id)]; + char acpi_id[PNP_ID_LEN]; int k; buf_printf(&mod->dev_table_buf, @@ -580,54 +603,58 @@ static void do_pnp_card_entries(void *symval, unsigned long size, /* Looks like: pcmcia:mNcNfNfnNpfnNvaNvbNvcNvdN. */ static int do_pcmcia_entry(const char *filename, - struct pcmcia_device_id *id, char *alias) + void *symval, char *alias) { unsigned int i; - - id->match_flags = TO_NATIVE(id->match_flags); - id->manf_id = TO_NATIVE(id->manf_id); - id->card_id = TO_NATIVE(id->card_id); - id->func_id = TO_NATIVE(id->func_id); - id->function = TO_NATIVE(id->function); - id->device_no = TO_NATIVE(id->device_no); + DEF_FIELD(symval, pcmcia_device_id, match_flags); + DEF_FIELD(symval, pcmcia_device_id, manf_id); + DEF_FIELD(symval, pcmcia_device_id, card_id); + DEF_FIELD(symval, pcmcia_device_id, func_id); + DEF_FIELD(symval, pcmcia_device_id, function); + DEF_FIELD(symval, pcmcia_device_id, device_no); + DEF_FIELD_ADDR(symval, pcmcia_device_id, prod_id_hash); for (i=0; i<4; i++) { - id->prod_id_hash[i] = TO_NATIVE(id->prod_id_hash[i]); - } - - strcpy(alias, "pcmcia:"); - ADD(alias, "m", id->match_flags & PCMCIA_DEV_ID_MATCH_MANF_ID, - id->manf_id); - ADD(alias, "c", id->match_flags & PCMCIA_DEV_ID_MATCH_CARD_ID, - id->card_id); - ADD(alias, "f", id->match_flags & PCMCIA_DEV_ID_MATCH_FUNC_ID, - id->func_id); - ADD(alias, "fn", id->match_flags & PCMCIA_DEV_ID_MATCH_FUNCTION, - id->function); - ADD(alias, "pfn", id->match_flags & PCMCIA_DEV_ID_MATCH_DEVICE_NO, - id->device_no); - ADD(alias, "pa", id->match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID1, id->prod_id_hash[0]); - ADD(alias, "pb", id->match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID2, id->prod_id_hash[1]); - ADD(alias, "pc", id->match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID3, id->prod_id_hash[2]); - ADD(alias, "pd", id->match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID4, id->prod_id_hash[3]); + (*prod_id_hash)[i] = TO_NATIVE((*prod_id_hash)[i]); + } + + strcpy(alias, "pcmcia:"); + ADD(alias, "m", match_flags & PCMCIA_DEV_ID_MATCH_MANF_ID, + manf_id); + ADD(alias, "c", match_flags & PCMCIA_DEV_ID_MATCH_CARD_ID, + card_id); + ADD(alias, "f", match_flags & PCMCIA_DEV_ID_MATCH_FUNC_ID, + func_id); + ADD(alias, "fn", match_flags & PCMCIA_DEV_ID_MATCH_FUNCTION, + function); + ADD(alias, "pfn", match_flags & PCMCIA_DEV_ID_MATCH_DEVICE_NO, + device_no); + ADD(alias, "pa", match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID1, (*prod_id_hash)[0]); + ADD(alias, "pb", match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID2, (*prod_id_hash)[1]); + ADD(alias, "pc", match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID3, (*prod_id_hash)[2]); + ADD(alias, "pd", match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID4, (*prod_id_hash)[3]); add_wildcard(alias); - return 1; + return 1; } -ADD_TO_DEVTABLE("pcmcia", struct pcmcia_device_id, do_pcmcia_entry); +ADD_TO_DEVTABLE("pcmcia", pcmcia_device_id, do_pcmcia_entry); -static int do_of_entry (const char *filename, struct of_device_id *of, char *alias) +static int do_of_entry (const char *filename, void *symval, char *alias) { int len; char *tmp; + DEF_FIELD_ADDR(symval, of_device_id, name); + DEF_FIELD_ADDR(symval, of_device_id, type); + DEF_FIELD_ADDR(symval, of_device_id, compatible); + len = sprintf (alias, "of:N%sT%s", - of->name[0] ? of->name : "*", - of->type[0] ? of->type : "*"); + (*name)[0] ? *name : "*", + (*type)[0] ? *type : "*"); - if (of->compatible[0]) + if (compatible[0]) sprintf (&alias[len], "%sC%s", - of->type[0] ? "*" : "", - of->compatible); + (*type)[0] ? "*" : "", + *compatible); /* Replace all whitespace with underscores */ for (tmp = alias; tmp && *tmp; tmp++) @@ -637,15 +664,17 @@ static int do_of_entry (const char *filename, struct of_device_id *of, char *ali add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("of", struct of_device_id, do_of_entry); +ADD_TO_DEVTABLE("of", of_device_id, do_of_entry); -static int do_vio_entry(const char *filename, struct vio_device_id *vio, +static int do_vio_entry(const char *filename, void *symval, char *alias) { char *tmp; + DEF_FIELD_ADDR(symval, vio_device_id, type); + DEF_FIELD_ADDR(symval, vio_device_id, compat); - sprintf(alias, "vio:T%sS%s", vio->type[0] ? vio->type : "*", - vio->compat[0] ? vio->compat : "*"); + sprintf(alias, "vio:T%sS%s", (*type)[0] ? *type : "*", + (*compat)[0] ? *compat : "*"); /* Replace all whitespace with underscores */ for (tmp = alias; tmp && *tmp; tmp++) @@ -655,7 +684,7 @@ static int do_vio_entry(const char *filename, struct vio_device_id *vio, add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("vio", struct vio_device_id, do_vio_entry); +ADD_TO_DEVTABLE("vio", vio_device_id, do_vio_entry); #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) @@ -664,154 +693,172 @@ static void do_input(char *alias, { unsigned int i; + for (i = min / BITS_PER_LONG; i < max / BITS_PER_LONG + 1; i++) + arr[i] = TO_NATIVE(arr[i]); for (i = min; i < max; i++) if (arr[i / BITS_PER_LONG] & (1L << (i%BITS_PER_LONG))) sprintf(alias + strlen(alias), "%X,*", i); } /* input:b0v0p0e0-eXkXrXaXmXlXsXfXwX where X is comma-separated %02X. */ -static int do_input_entry(const char *filename, struct input_device_id *id, +static int do_input_entry(const char *filename, void *symval, char *alias) { + DEF_FIELD(symval, input_device_id, flags); + DEF_FIELD(symval, input_device_id, bustype); + DEF_FIELD(symval, input_device_id, vendor); + DEF_FIELD(symval, input_device_id, product); + DEF_FIELD(symval, input_device_id, version); + DEF_FIELD_ADDR(symval, input_device_id, evbit); + DEF_FIELD_ADDR(symval, input_device_id, keybit); + DEF_FIELD_ADDR(symval, input_device_id, relbit); + DEF_FIELD_ADDR(symval, input_device_id, absbit); + DEF_FIELD_ADDR(symval, input_device_id, mscbit); + DEF_FIELD_ADDR(symval, input_device_id, ledbit); + DEF_FIELD_ADDR(symval, input_device_id, sndbit); + DEF_FIELD_ADDR(symval, input_device_id, ffbit); + DEF_FIELD_ADDR(symval, input_device_id, swbit); + sprintf(alias, "input:"); - ADD(alias, "b", id->flags & INPUT_DEVICE_ID_MATCH_BUS, id->bustype); - ADD(alias, "v", id->flags & INPUT_DEVICE_ID_MATCH_VENDOR, id->vendor); - ADD(alias, "p", id->flags & INPUT_DEVICE_ID_MATCH_PRODUCT, id->product); - ADD(alias, "e", id->flags & INPUT_DEVICE_ID_MATCH_VERSION, id->version); + ADD(alias, "b", flags & INPUT_DEVICE_ID_MATCH_BUS, bustype); + ADD(alias, "v", flags & INPUT_DEVICE_ID_MATCH_VENDOR, vendor); + ADD(alias, "p", flags & INPUT_DEVICE_ID_MATCH_PRODUCT, product); + ADD(alias, "e", flags & INPUT_DEVICE_ID_MATCH_VERSION, version); sprintf(alias + strlen(alias), "-e*"); - if (id->flags & INPUT_DEVICE_ID_MATCH_EVBIT) - do_input(alias, id->evbit, 0, INPUT_DEVICE_ID_EV_MAX); + if (flags & INPUT_DEVICE_ID_MATCH_EVBIT) + do_input(alias, *evbit, 0, INPUT_DEVICE_ID_EV_MAX); sprintf(alias + strlen(alias), "k*"); - if (id->flags & INPUT_DEVICE_ID_MATCH_KEYBIT) - do_input(alias, id->keybit, + if (flags & INPUT_DEVICE_ID_MATCH_KEYBIT) + do_input(alias, *keybit, INPUT_DEVICE_ID_KEY_MIN_INTERESTING, INPUT_DEVICE_ID_KEY_MAX); sprintf(alias + strlen(alias), "r*"); - if (id->flags & INPUT_DEVICE_ID_MATCH_RELBIT) - do_input(alias, id->relbit, 0, INPUT_DEVICE_ID_REL_MAX); + if (flags & INPUT_DEVICE_ID_MATCH_RELBIT) + do_input(alias, *relbit, 0, INPUT_DEVICE_ID_REL_MAX); sprintf(alias + strlen(alias), "a*"); - if (id->flags & INPUT_DEVICE_ID_MATCH_ABSBIT) - do_input(alias, id->absbit, 0, INPUT_DEVICE_ID_ABS_MAX); + if (flags & INPUT_DEVICE_ID_MATCH_ABSBIT) + do_input(alias, *absbit, 0, INPUT_DEVICE_ID_ABS_MAX); sprintf(alias + strlen(alias), "m*"); - if (id->flags & INPUT_DEVICE_ID_MATCH_MSCIT) - do_input(alias, id->mscbit, 0, INPUT_DEVICE_ID_MSC_MAX); + if (flags & INPUT_DEVICE_ID_MATCH_MSCIT) + do_input(alias, *mscbit, 0, INPUT_DEVICE_ID_MSC_MAX); sprintf(alias + strlen(alias), "l*"); - if (id->flags & INPUT_DEVICE_ID_MATCH_LEDBIT) - do_input(alias, id->ledbit, 0, INPUT_DEVICE_ID_LED_MAX); + if (flags & INPUT_DEVICE_ID_MATCH_LEDBIT) + do_input(alias, *ledbit, 0, INPUT_DEVICE_ID_LED_MAX); sprintf(alias + strlen(alias), "s*"); - if (id->flags & INPUT_DEVICE_ID_MATCH_SNDBIT) - do_input(alias, id->sndbit, 0, INPUT_DEVICE_ID_SND_MAX); + if (flags & INPUT_DEVICE_ID_MATCH_SNDBIT) + do_input(alias, *sndbit, 0, INPUT_DEVICE_ID_SND_MAX); sprintf(alias + strlen(alias), "f*"); - if (id->flags & INPUT_DEVICE_ID_MATCH_FFBIT) - do_input(alias, id->ffbit, 0, INPUT_DEVICE_ID_FF_MAX); + if (flags & INPUT_DEVICE_ID_MATCH_FFBIT) + do_input(alias, *ffbit, 0, INPUT_DEVICE_ID_FF_MAX); sprintf(alias + strlen(alias), "w*"); - if (id->flags & INPUT_DEVICE_ID_MATCH_SWBIT) - do_input(alias, id->swbit, 0, INPUT_DEVICE_ID_SW_MAX); + if (flags & INPUT_DEVICE_ID_MATCH_SWBIT) + do_input(alias, *swbit, 0, INPUT_DEVICE_ID_SW_MAX); return 1; } -ADD_TO_DEVTABLE("input", struct input_device_id, do_input_entry); +ADD_TO_DEVTABLE("input", input_device_id, do_input_entry); -static int do_eisa_entry(const char *filename, struct eisa_device_id *eisa, +static int do_eisa_entry(const char *filename, void *symval, char *alias) { - if (eisa->sig[0]) - sprintf(alias, EISA_DEVICE_MODALIAS_FMT "*", eisa->sig); + DEF_FIELD_ADDR(symval, eisa_device_id, sig); + if (sig[0]) + sprintf(alias, EISA_DEVICE_MODALIAS_FMT "*", *sig); else strcat(alias, "*"); return 1; } -ADD_TO_DEVTABLE("eisa", struct eisa_device_id, do_eisa_entry); +ADD_TO_DEVTABLE("eisa", eisa_device_id, do_eisa_entry); /* Looks like: parisc:tNhvNrevNsvN */ -static int do_parisc_entry(const char *filename, struct parisc_device_id *id, +static int do_parisc_entry(const char *filename, void *symval, char *alias) { - id->hw_type = TO_NATIVE(id->hw_type); - id->hversion = TO_NATIVE(id->hversion); - id->hversion_rev = TO_NATIVE(id->hversion_rev); - id->sversion = TO_NATIVE(id->sversion); + DEF_FIELD(symval, parisc_device_id, hw_type); + DEF_FIELD(symval, parisc_device_id, hversion); + DEF_FIELD(symval, parisc_device_id, hversion_rev); + DEF_FIELD(symval, parisc_device_id, sversion); strcpy(alias, "parisc:"); - ADD(alias, "t", id->hw_type != PA_HWTYPE_ANY_ID, id->hw_type); - ADD(alias, "hv", id->hversion != PA_HVERSION_ANY_ID, id->hversion); - ADD(alias, "rev", id->hversion_rev != PA_HVERSION_REV_ANY_ID, id->hversion_rev); - ADD(alias, "sv", id->sversion != PA_SVERSION_ANY_ID, id->sversion); + ADD(alias, "t", hw_type != PA_HWTYPE_ANY_ID, hw_type); + ADD(alias, "hv", hversion != PA_HVERSION_ANY_ID, hversion); + ADD(alias, "rev", hversion_rev != PA_HVERSION_REV_ANY_ID, hversion_rev); + ADD(alias, "sv", sversion != PA_SVERSION_ANY_ID, sversion); add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("parisc", struct parisc_device_id, do_parisc_entry); +ADD_TO_DEVTABLE("parisc", parisc_device_id, do_parisc_entry); /* Looks like: sdio:cNvNdN. */ static int do_sdio_entry(const char *filename, - struct sdio_device_id *id, char *alias) + void *symval, char *alias) { - id->class = TO_NATIVE(id->class); - id->vendor = TO_NATIVE(id->vendor); - id->device = TO_NATIVE(id->device); + DEF_FIELD(symval, sdio_device_id, class); + DEF_FIELD(symval, sdio_device_id, vendor); + DEF_FIELD(symval, sdio_device_id, device); strcpy(alias, "sdio:"); - ADD(alias, "c", id->class != (__u8)SDIO_ANY_ID, id->class); - ADD(alias, "v", id->vendor != (__u16)SDIO_ANY_ID, id->vendor); - ADD(alias, "d", id->device != (__u16)SDIO_ANY_ID, id->device); + ADD(alias, "c", class != (__u8)SDIO_ANY_ID, class); + ADD(alias, "v", vendor != (__u16)SDIO_ANY_ID, vendor); + ADD(alias, "d", device != (__u16)SDIO_ANY_ID, device); add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("sdio", struct sdio_device_id, do_sdio_entry); +ADD_TO_DEVTABLE("sdio", sdio_device_id, do_sdio_entry); /* Looks like: ssb:vNidNrevN. */ static int do_ssb_entry(const char *filename, - struct ssb_device_id *id, char *alias) + void *symval, char *alias) { - id->vendor = TO_NATIVE(id->vendor); - id->coreid = TO_NATIVE(id->coreid); - id->revision = TO_NATIVE(id->revision); + DEF_FIELD(symval, ssb_device_id, vendor); + DEF_FIELD(symval, ssb_device_id, coreid); + DEF_FIELD(symval, ssb_device_id, revision); strcpy(alias, "ssb:"); - ADD(alias, "v", id->vendor != SSB_ANY_VENDOR, id->vendor); - ADD(alias, "id", id->coreid != SSB_ANY_ID, id->coreid); - ADD(alias, "rev", id->revision != SSB_ANY_REV, id->revision); + ADD(alias, "v", vendor != SSB_ANY_VENDOR, vendor); + ADD(alias, "id", coreid != SSB_ANY_ID, coreid); + ADD(alias, "rev", revision != SSB_ANY_REV, revision); add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("ssb", struct ssb_device_id, do_ssb_entry); +ADD_TO_DEVTABLE("ssb", ssb_device_id, do_ssb_entry); /* Looks like: bcma:mNidNrevNclN. */ static int do_bcma_entry(const char *filename, - struct bcma_device_id *id, char *alias) + void *symval, char *alias) { - id->manuf = TO_NATIVE(id->manuf); - id->id = TO_NATIVE(id->id); - id->rev = TO_NATIVE(id->rev); - id->class = TO_NATIVE(id->class); + DEF_FIELD(symval, bcma_device_id, manuf); + DEF_FIELD(symval, bcma_device_id, id); + DEF_FIELD(symval, bcma_device_id, rev); + DEF_FIELD(symval, bcma_device_id, class); strcpy(alias, "bcma:"); - ADD(alias, "m", id->manuf != BCMA_ANY_MANUF, id->manuf); - ADD(alias, "id", id->id != BCMA_ANY_ID, id->id); - ADD(alias, "rev", id->rev != BCMA_ANY_REV, id->rev); - ADD(alias, "cl", id->class != BCMA_ANY_CLASS, id->class); + ADD(alias, "m", manuf != BCMA_ANY_MANUF, manuf); + ADD(alias, "id", id != BCMA_ANY_ID, id); + ADD(alias, "rev", rev != BCMA_ANY_REV, rev); + ADD(alias, "cl", class != BCMA_ANY_CLASS, class); add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("bcma", struct bcma_device_id, do_bcma_entry); +ADD_TO_DEVTABLE("bcma", bcma_device_id, do_bcma_entry); /* Looks like: virtio:dNvN */ -static int do_virtio_entry(const char *filename, struct virtio_device_id *id, +static int do_virtio_entry(const char *filename, void *symval, char *alias) { - id->device = TO_NATIVE(id->device); - id->vendor = TO_NATIVE(id->vendor); + DEF_FIELD(symval, virtio_device_id, device); + DEF_FIELD(symval, virtio_device_id, vendor); strcpy(alias, "virtio:"); - ADD(alias, "d", id->device != VIRTIO_DEV_ANY_ID, id->device); - ADD(alias, "v", id->vendor != VIRTIO_DEV_ANY_ID, id->vendor); + ADD(alias, "d", device != VIRTIO_DEV_ANY_ID, device); + ADD(alias, "v", vendor != VIRTIO_DEV_ANY_ID, vendor); add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("virtio", struct virtio_device_id, do_virtio_entry); +ADD_TO_DEVTABLE("virtio", virtio_device_id, do_virtio_entry); /* * Looks like: vmbus:guid @@ -819,41 +866,44 @@ ADD_TO_DEVTABLE("virtio", struct virtio_device_id, do_virtio_entry); * in the name. */ -static int do_vmbus_entry(const char *filename, struct hv_vmbus_device_id *id, +static int do_vmbus_entry(const char *filename, void *symval, char *alias) { int i; - char guid_name[((sizeof(id->guid) + 1)) * 2]; + DEF_FIELD_ADDR(symval, hv_vmbus_device_id, guid); + char guid_name[(sizeof(*guid) + 1) * 2]; - for (i = 0; i < (sizeof(id->guid) * 2); i += 2) - sprintf(&guid_name[i], "%02x", id->guid[i/2]); + for (i = 0; i < (sizeof(*guid) * 2); i += 2) + sprintf(&guid_name[i], "%02x", TO_NATIVE((*guid)[i/2])); strcpy(alias, "vmbus:"); strcat(alias, guid_name); return 1; } -ADD_TO_DEVTABLE("vmbus", struct hv_vmbus_device_id, do_vmbus_entry); +ADD_TO_DEVTABLE("vmbus", hv_vmbus_device_id, do_vmbus_entry); /* Looks like: i2c:S */ -static int do_i2c_entry(const char *filename, struct i2c_device_id *id, +static int do_i2c_entry(const char *filename, void *symval, char *alias) { - sprintf(alias, I2C_MODULE_PREFIX "%s", id->name); + DEF_FIELD_ADDR(symval, i2c_device_id, name); + sprintf(alias, I2C_MODULE_PREFIX "%s", *name); return 1; } -ADD_TO_DEVTABLE("i2c", struct i2c_device_id, do_i2c_entry); +ADD_TO_DEVTABLE("i2c", i2c_device_id, do_i2c_entry); /* Looks like: spi:S */ -static int do_spi_entry(const char *filename, struct spi_device_id *id, +static int do_spi_entry(const char *filename, void *symval, char *alias) { - sprintf(alias, SPI_MODULE_PREFIX "%s", id->name); + DEF_FIELD_ADDR(symval, spi_device_id, name); + sprintf(alias, SPI_MODULE_PREFIX "%s", *name); return 1; } -ADD_TO_DEVTABLE("spi", struct spi_device_id, do_spi_entry); +ADD_TO_DEVTABLE("spi", spi_device_id, do_spi_entry); static const struct dmifield { const char *prefix; @@ -885,21 +935,21 @@ static void dmi_ascii_filter(char *d, const char *s) } -static int do_dmi_entry(const char *filename, struct dmi_system_id *id, +static int do_dmi_entry(const char *filename, void *symval, char *alias) { int i, j; - + DEF_FIELD_ADDR(symval, dmi_system_id, matches); sprintf(alias, "dmi*"); for (i = 0; i < ARRAY_SIZE(dmi_fields); i++) { for (j = 0; j < 4; j++) { - if (id->matches[j].slot && - id->matches[j].slot == dmi_fields[i].field) { + if ((*matches)[j].slot && + (*matches)[j].slot == dmi_fields[i].field) { sprintf(alias + strlen(alias), ":%s*", dmi_fields[i].prefix); dmi_ascii_filter(alias + strlen(alias), - id->matches[j].substr); + (*matches)[j].substr); strcat(alias, "*"); } } @@ -908,27 +958,30 @@ static int do_dmi_entry(const char *filename, struct dmi_system_id *id, strcat(alias, ":"); return 1; } -ADD_TO_DEVTABLE("dmi", struct dmi_system_id, do_dmi_entry); +ADD_TO_DEVTABLE("dmi", dmi_system_id, do_dmi_entry); static int do_platform_entry(const char *filename, - struct platform_device_id *id, char *alias) + void *symval, char *alias) { - sprintf(alias, PLATFORM_MODULE_PREFIX "%s", id->name); + DEF_FIELD_ADDR(symval, platform_device_id, name); + sprintf(alias, PLATFORM_MODULE_PREFIX "%s", *name); return 1; } -ADD_TO_DEVTABLE("platform", struct platform_device_id, do_platform_entry); +ADD_TO_DEVTABLE("platform", platform_device_id, do_platform_entry); static int do_mdio_entry(const char *filename, - struct mdio_device_id *id, char *alias) + void *symval, char *alias) { int i; + DEF_FIELD(symval, mdio_device_id, phy_id); + DEF_FIELD(symval, mdio_device_id, phy_id_mask); alias += sprintf(alias, MDIO_MODULE_PREFIX); for (i = 0; i < 32; i++) { - if (!((id->phy_id_mask >> (31-i)) & 1)) + if (!((phy_id_mask >> (31-i)) & 1)) *(alias++) = '?'; - else if ((id->phy_id >> (31-i)) & 1) + else if ((phy_id >> (31-i)) & 1) *(alias++) = '1'; else *(alias++) = '0'; @@ -939,47 +992,50 @@ static int do_mdio_entry(const char *filename, return 1; } -ADD_TO_DEVTABLE("mdio", struct mdio_device_id, do_mdio_entry); +ADD_TO_DEVTABLE("mdio", mdio_device_id, do_mdio_entry); /* Looks like: zorro:iN. */ -static int do_zorro_entry(const char *filename, struct zorro_device_id *id, +static int do_zorro_entry(const char *filename, void *symval, char *alias) { - id->id = TO_NATIVE(id->id); + DEF_FIELD(symval, zorro_device_id, id); strcpy(alias, "zorro:"); - ADD(alias, "i", id->id != ZORRO_WILDCARD, id->id); + ADD(alias, "i", id != ZORRO_WILDCARD, id); return 1; } -ADD_TO_DEVTABLE("zorro", struct zorro_device_id, do_zorro_entry); +ADD_TO_DEVTABLE("zorro", zorro_device_id, do_zorro_entry); /* looks like: "pnp:dD" */ static int do_isapnp_entry(const char *filename, - struct isapnp_device_id *id, char *alias) + void *symval, char *alias) { + DEF_FIELD(symval, isapnp_device_id, vendor); + DEF_FIELD(symval, isapnp_device_id, function); sprintf(alias, "pnp:d%c%c%c%x%x%x%x*", - 'A' + ((id->vendor >> 2) & 0x3f) - 1, - 'A' + (((id->vendor & 3) << 3) | ((id->vendor >> 13) & 7)) - 1, - 'A' + ((id->vendor >> 8) & 0x1f) - 1, - (id->function >> 4) & 0x0f, id->function & 0x0f, - (id->function >> 12) & 0x0f, (id->function >> 8) & 0x0f); + 'A' + ((vendor >> 2) & 0x3f) - 1, + 'A' + (((vendor & 3) << 3) | ((vendor >> 13) & 7)) - 1, + 'A' + ((vendor >> 8) & 0x1f) - 1, + (function >> 4) & 0x0f, function & 0x0f, + (function >> 12) & 0x0f, (function >> 8) & 0x0f); return 1; } -ADD_TO_DEVTABLE("isapnp", struct isapnp_device_id, do_isapnp_entry); +ADD_TO_DEVTABLE("isapnp", isapnp_device_id, do_isapnp_entry); /* Looks like: "ipack:fNvNdN". */ static int do_ipack_entry(const char *filename, - struct ipack_device_id *id, char *alias) + void *symval, char *alias) { - id->vendor = TO_NATIVE(id->vendor); - id->device = TO_NATIVE(id->device); + DEF_FIELD(symval, ipack_device_id, format); + DEF_FIELD(symval, ipack_device_id, vendor); + DEF_FIELD(symval, ipack_device_id, device); strcpy(alias, "ipack:"); - ADD(alias, "f", id->format != IPACK_ANY_FORMAT, id->format); - ADD(alias, "v", id->vendor != IPACK_ANY_ID, id->vendor); - ADD(alias, "d", id->device != IPACK_ANY_ID, id->device); + ADD(alias, "f", format != IPACK_ANY_FORMAT, format); + ADD(alias, "v", vendor != IPACK_ANY_ID, vendor); + ADD(alias, "d", device != IPACK_ANY_ID, device); add_wildcard(alias); return 1; } -ADD_TO_DEVTABLE("ipack", struct ipack_device_id, do_ipack_entry); +ADD_TO_DEVTABLE("ipack", ipack_device_id, do_ipack_entry); /* * Append a match expression for a single masked hex digit. @@ -1030,25 +1086,27 @@ static void append_nibble_mask(char **outp, * a ? or [] pattern matching exactly one digit. */ static int do_amba_entry(const char *filename, - struct amba_id *id, char *alias) + void *symval, char *alias) { unsigned int digit; char *p = alias; + DEF_FIELD(symval, amba_id, id); + DEF_FIELD(symval, amba_id, mask); - if ((id->id & id->mask) != id->id) + if ((id & mask) != id) fatal("%s: Masked-off bit(s) of AMBA device ID are non-zero: " "id=0x%08X, mask=0x%08X. Please fix this driver.\n", - filename, id->id, id->mask); + filename, id, mask); p += sprintf(alias, "amba:d"); for (digit = 0; digit < 8; digit++) append_nibble_mask(&p, - (id->id >> (4 * (7 - digit))) & 0xf, - (id->mask >> (4 * (7 - digit))) & 0xf); + (id >> (4 * (7 - digit))) & 0xf, + (mask >> (4 * (7 - digit))) & 0xf); return 1; } -ADD_TO_DEVTABLE("amba", struct amba_id, do_amba_entry); +ADD_TO_DEVTABLE("amba", amba_id, do_amba_entry); /* LOOKS like x86cpu:vendor:VVVV:family:FFFF:model:MMMM:feature:*,FEAT,* * All fields are numbers. It would be nicer to use strings for vendor @@ -1056,24 +1114,24 @@ ADD_TO_DEVTABLE("amba", struct amba_id, do_amba_entry); * complicated. */ -static int do_x86cpu_entry(const char *filename, struct x86_cpu_id *id, +static int do_x86cpu_entry(const char *filename, void *symval, char *alias) { - id->feature = TO_NATIVE(id->feature); - id->family = TO_NATIVE(id->family); - id->model = TO_NATIVE(id->model); - id->vendor = TO_NATIVE(id->vendor); + DEF_FIELD(symval, x86_cpu_id, feature); + DEF_FIELD(symval, x86_cpu_id, family); + DEF_FIELD(symval, x86_cpu_id, model); + DEF_FIELD(symval, x86_cpu_id, vendor); strcpy(alias, "x86cpu:"); - ADD(alias, "vendor:", id->vendor != X86_VENDOR_ANY, id->vendor); - ADD(alias, ":family:", id->family != X86_FAMILY_ANY, id->family); - ADD(alias, ":model:", id->model != X86_MODEL_ANY, id->model); + ADD(alias, "vendor:", vendor != X86_VENDOR_ANY, vendor); + ADD(alias, ":family:", family != X86_FAMILY_ANY, family); + ADD(alias, ":model:", model != X86_MODEL_ANY, model); strcat(alias, ":feature:*"); - if (id->feature != X86_FEATURE_ANY) - sprintf(alias + strlen(alias), "%04X*", id->feature); + if (feature != X86_FEATURE_ANY) + sprintf(alias + strlen(alias), "%04X*", feature); return 1; } -ADD_TO_DEVTABLE("x86cpu", struct x86_cpu_id, do_x86cpu_entry); +ADD_TO_DEVTABLE("x86cpu", x86_cpu_id, do_x86cpu_entry); /* Does namelen bytes of name exactly match the symbol? */ static bool sym_is(const char *name, unsigned namelen, const char *symbol) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 1c6fbb1a4f8e..78b30c1548e9 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -2128,7 +2128,7 @@ int main(int argc, char **argv) struct ext_sym_list *extsym_iter; struct ext_sym_list *extsym_start = NULL; - while ((opt = getopt(argc, argv, "i:I:e:cmsSo:awM:K:")) != -1) { + while ((opt = getopt(argc, argv, "i:I:e:msSo:awM:K:")) != -1) { switch (opt) { case 'i': kernel_read = optarg; @@ -2137,9 +2137,6 @@ int main(int argc, char **argv) module_read = optarg; external_module = 1; break; - case 'c': - cross_build = 1; - break; case 'e': external_module = 1; extsym_iter = diff --git a/scripts/package/mkspec b/scripts/package/mkspec index 4bf17ddf7c7f..fbbfd08853d3 100755 --- a/scripts/package/mkspec +++ b/scripts/package/mkspec @@ -95,7 +95,7 @@ echo 'cp $KBUILD_IMAGE $RPM_BUILD_ROOT'"/boot/vmlinuz-$KERNELRELEASE" echo "%endif" echo "%endif" -echo 'make %{?_smp_mflags} INSTALL_HDR_PATH=$RPM_BUILD_ROOT/usr headers_install' +echo 'make %{?_smp_mflags} INSTALL_HDR_PATH=$RPM_BUILD_ROOT/usr KBUILD_SRC= headers_install' echo 'cp System.map $RPM_BUILD_ROOT'"/boot/System.map-$KERNELRELEASE" echo 'cp .config $RPM_BUILD_ROOT'"/boot/config-$KERNELRELEASE" diff --git a/scripts/setlocalversion b/scripts/setlocalversion index bd6dca8a0ab2..84b88f109b80 100755 --- a/scripts/setlocalversion +++ b/scripts/setlocalversion @@ -108,7 +108,7 @@ scm_version() fi # Check for svn and a svn repo. - if rev=`svn info 2>/dev/null | grep '^Last Changed Rev'`; then + if rev=`LANG= LC_ALL= LC_MESSAGES=C svn info 2>/dev/null | grep '^Last Changed Rev'`; then rev=`echo $rev | awk '{print $NF}'` printf -- '-svn%s' "$rev" diff --git a/scripts/tags.sh b/scripts/tags.sh index 65f9595acea9..26a87e68afed 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -217,34 +217,34 @@ exuberant() emacs() { all_target_sources | xargs $1 -a \ - --regex='/^(ENTRY|_GLOBAL)(\([^)]*\)).*/\2/' \ + --regex='/^\(ENTRY\|_GLOBAL\)(\([^)]*\)).*/\2/' \ --regex='/^SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/sys_\1/' \ --regex='/^TRACE_EVENT(\([^,)]*\).*/trace_\1/' \ --regex='/^DEFINE_EVENT([^,)]*, *\([^,)]*\).*/trace_\1/' \ - --regex='/PAGEFLAG\(([^,)]*).*/Page\1/' \ - --regex='/PAGEFLAG\(([^,)]*).*/SetPage\1/' \ - --regex='/PAGEFLAG\(([^,)]*).*/ClearPage\1/' \ - --regex='/TESTSETFLAG\(([^,)]*).*/TestSetPage\1/' \ - --regex='/TESTPAGEFLAG\(([^,)]*).*/Page\1/' \ - --regex='/SETPAGEFLAG\(([^,)]*).*/SetPage\1/' \ - --regex='/__SETPAGEFLAG\(([^,)]*).*/__SetPage\1/' \ - --regex='/TESTCLEARFLAG\(([^,)]*).*/TestClearPage\1/' \ - --regex='/__TESTCLEARFLAG\(([^,)]*).*/TestClearPage\1/' \ - --regex='/CLEARPAGEFLAG\(([^,)]*).*/ClearPage\1/' \ - --regex='/__CLEARPAGEFLAG\(([^,)]*).*/__ClearPage\1/' \ - --regex='/__PAGEFLAG\(([^,)]*).*/__SetPage\1/' \ - --regex='/__PAGEFLAG\(([^,)]*).*/__ClearPage\1/' \ - --regex='/PAGEFLAG_FALSE\(([^,)]*).*/Page\1/' \ - --regex='/TESTSCFLAG\(([^,)]*).*/TestSetPage\1/' \ - --regex='/TESTSCFLAG\(([^,)]*).*/TestClearPage\1/' \ - --regex='/SETPAGEFLAG_NOOP\(([^,)]*).*/SetPage\1/' \ - --regex='/CLEARPAGEFLAG_NOOP\(([^,)]*).*/ClearPage\1/' \ - --regex='/__CLEARPAGEFLAG_NOOP\(([^,)]*).*/__ClearPage\1/' \ - --regex='/TESTCLEARFLAG_FALSE\(([^,)]*).*/TestClearPage\1/' \ - --regex='/__TESTCLEARFLAG_FALSE\(([^,)]*).*/__TestClearPage\1/' \ - --regex='/_PE\(([^,)]*).*/PEVENT_ERRNO__\1/' \ - --regex='/PCI_OP_READ\(([a-z]*[a-z]).*[1-4]\)/pci_bus_read_config_\1/' \ - --regex='/PCI_OP_WRITE\(([a-z]*[a-z]).*[1-4]\)/pci_bus_write_config_\1/' + --regex='/PAGEFLAG(\([^,)]*\).*/Page\1/' \ + --regex='/PAGEFLAG(\([^,)]*\).*/SetPage\1/' \ + --regex='/PAGEFLAG(\([^,)]*\).*/ClearPage\1/' \ + --regex='/TESTSETFLAG(\([^,)]*\).*/TestSetPage\1/' \ + --regex='/TESTPAGEFLAG(\([^,)]*\).*/Page\1/' \ + --regex='/SETPAGEFLAG(\([^,)]*\).*/SetPage\1/' \ + --regex='/__SETPAGEFLAG(\([^,)]*\).*/__SetPage\1/' \ + --regex='/TESTCLEARFLAG(\([^,)]*\).*/TestClearPage\1/' \ + --regex='/__TESTCLEARFLAG(\([^,)]*\).*/TestClearPage\1/' \ + --regex='/CLEARPAGEFLAG(\([^,)]*\).*/ClearPage\1/' \ + --regex='/__CLEARPAGEFLAG(\([^,)]*\).*/__ClearPage\1/' \ + --regex='/__PAGEFLAG(\([^,)]*\).*/__SetPage\1/' \ + --regex='/__PAGEFLAG(\([^,)]*\).*/__ClearPage\1/' \ + --regex='/PAGEFLAG_FALSE(\([^,)]*\).*/Page\1/' \ + --regex='/TESTSCFLAG(\([^,)]*\).*/TestSetPage\1/' \ + --regex='/TESTSCFLAG(\([^,)]*\).*/TestClearPage\1/' \ + --regex='/SETPAGEFLAG_NOOP(\([^,)]*\).*/SetPage\1/' \ + --regex='/CLEARPAGEFLAG_NOOP(\([^,)]*\).*/ClearPage\1/' \ + --regex='/__CLEARPAGEFLAG_NOOP(\([^,)]*\).*/__ClearPage\1/' \ + --regex='/TESTCLEARFLAG_FALSE(\([^,)]*\).*/TestClearPage\1/' \ + --regex='/__TESTCLEARFLAG_FALSE(\([^,)]*\).*/__TestClearPage\1/' \ + --regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/' \ + --regex='/PCI_OP_READ(\([a-z]*[a-z]\).*[1-4])/pci_bus_read_config_\1/' \ + --regex='/PCI_OP_WRITE(\([a-z]*[a-z]\).*[1-4])/pci_bus_write_config_\1/' all_kconfigs | xargs $1 -a \ --regex='/^[ \t]*\(\(menu\)*config\)[ \t]+\([a-zA-Z0-9_]+\)/\3/' diff --git a/security/integrity/ima/ima_queue.c b/security/integrity/ima/ima_queue.c index 55a6271bce7a..ff63fe00c195 100644 --- a/security/integrity/ima/ima_queue.c +++ b/security/integrity/ima/ima_queue.c @@ -45,12 +45,11 @@ static struct ima_queue_entry *ima_lookup_digest_entry(u8 *digest_value) { struct ima_queue_entry *qe, *ret = NULL; unsigned int key; - struct hlist_node *pos; int rc; key = ima_hash_key(digest_value); rcu_read_lock(); - hlist_for_each_entry_rcu(qe, pos, &ima_htable.queue[key], hnext) { + hlist_for_each_entry_rcu(qe, &ima_htable.queue[key], hnext) { rc = memcmp(qe->entry->digest, digest_value, IMA_DIGEST_SIZE); if (rc == 0) { ret = qe; diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 4d3fab47e643..dad36a6ab45f 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -188,11 +188,9 @@ int avc_get_hash_stats(char *page) for (i = 0; i < AVC_CACHE_SLOTS; i++) { head = &avc_cache.slots[i]; if (!hlist_empty(head)) { - struct hlist_node *next; - slots_used++; chain_len = 0; - hlist_for_each_entry_rcu(node, next, head, list) + hlist_for_each_entry_rcu(node, head, list) chain_len++; if (chain_len > max_chain_len) max_chain_len = chain_len; @@ -241,7 +239,6 @@ static inline int avc_reclaim_node(void) int hvalue, try, ecx; unsigned long flags; struct hlist_head *head; - struct hlist_node *next; spinlock_t *lock; for (try = 0, ecx = 0; try < AVC_CACHE_SLOTS; try++) { @@ -253,7 +250,7 @@ static inline int avc_reclaim_node(void) continue; rcu_read_lock(); - hlist_for_each_entry(node, next, head, list) { + hlist_for_each_entry(node, head, list) { avc_node_delete(node); avc_cache_stats_incr(reclaims); ecx++; @@ -301,11 +298,10 @@ static inline struct avc_node *avc_search_node(u32 ssid, u32 tsid, u16 tclass) struct avc_node *node, *ret = NULL; int hvalue; struct hlist_head *head; - struct hlist_node *next; hvalue = avc_hash(ssid, tsid, tclass); head = &avc_cache.slots[hvalue]; - hlist_for_each_entry_rcu(node, next, head, list) { + hlist_for_each_entry_rcu(node, head, list) { if (ssid == node->ae.ssid && tclass == node->ae.tclass && tsid == node->ae.tsid) { @@ -394,7 +390,6 @@ static struct avc_node *avc_insert(u32 ssid, u32 tsid, u16 tclass, struct av_dec node = avc_alloc_node(); if (node) { struct hlist_head *head; - struct hlist_node *next; spinlock_t *lock; hvalue = avc_hash(ssid, tsid, tclass); @@ -404,7 +399,7 @@ static struct avc_node *avc_insert(u32 ssid, u32 tsid, u16 tclass, struct av_dec lock = &avc_cache.slots_lock[hvalue]; spin_lock_irqsave(lock, flag); - hlist_for_each_entry(pos, next, head, list) { + hlist_for_each_entry(pos, head, list) { if (pos->ae.ssid == ssid && pos->ae.tsid == tsid && pos->ae.tclass == tclass) { @@ -541,7 +536,6 @@ static int avc_update_node(u32 event, u32 perms, u32 ssid, u32 tsid, u16 tclass, unsigned long flag; struct avc_node *pos, *node, *orig = NULL; struct hlist_head *head; - struct hlist_node *next; spinlock_t *lock; node = avc_alloc_node(); @@ -558,7 +552,7 @@ static int avc_update_node(u32 event, u32 perms, u32 ssid, u32 tsid, u16 tclass, spin_lock_irqsave(lock, flag); - hlist_for_each_entry(pos, next, head, list) { + hlist_for_each_entry(pos, head, list) { if (ssid == pos->ae.ssid && tsid == pos->ae.tsid && tclass == pos->ae.tclass && @@ -614,7 +608,6 @@ out: static void avc_flush(void) { struct hlist_head *head; - struct hlist_node *next; struct avc_node *node; spinlock_t *lock; unsigned long flag; @@ -630,7 +623,7 @@ static void avc_flush(void) * prevent RCU grace periods from ending. */ rcu_read_lock(); - hlist_for_each_entry(node, next, head, list) + hlist_for_each_entry(node, head, list) avc_node_delete(node); rcu_read_unlock(); spin_unlock_irqrestore(lock, flag); diff --git a/sound/pci/bt87x.c b/sound/pci/bt87x.c index cdd100dae855..9febe5509748 100644 --- a/sound/pci/bt87x.c +++ b/sound/pci/bt87x.c @@ -836,6 +836,8 @@ static struct { {0x7063, 0x2000}, /* pcHDTV HD-2000 TV */ }; +static struct pci_driver driver; + /* return the id of the card, or a negative value if it's blacklisted */ static int snd_bt87x_detect_card(struct pci_dev *pci) { @@ -962,11 +964,24 @@ static DEFINE_PCI_DEVICE_TABLE(snd_bt87x_default_ids) = { { } }; -static struct pci_driver bt87x_driver = { +static struct pci_driver driver = { .name = KBUILD_MODNAME, .id_table = snd_bt87x_ids, .probe = snd_bt87x_probe, .remove = snd_bt87x_remove, }; -module_pci_driver(bt87x_driver); +static int __init alsa_card_bt87x_init(void) +{ + if (load_all) + driver.id_table = snd_bt87x_default_ids; + return pci_register_driver(&driver); +} + +static void __exit alsa_card_bt87x_exit(void) +{ + pci_unregister_driver(&driver); +} + +module_init(alsa_card_bt87x_init) +module_exit(alsa_card_bt87x_exit) diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index a7c296a36a17..e6b016693240 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -862,6 +862,12 @@ static int snd_emu10k1_emu1010_init(struct snd_emu10k1 *emu) filename, emu->firmware->size); } + err = snd_emu1010_load_firmware(emu); + if (err != 0) { + snd_printk(KERN_INFO "emu1010: Loading Firmware failed\n"); + return err; + } + /* ID, should read & 0x7f = 0x55 when FPGA programmed. */ snd_emu1010_fpga_read(emu, EMU_HANA_ID, ®); if ((reg & 0x3f) != 0x15) { diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c index 748a286277eb..5ae1d045bdcb 100644 --- a/sound/pci/emu10k1/emupcm.c +++ b/sound/pci/emu10k1/emupcm.c @@ -1127,7 +1127,7 @@ static int snd_emu10k1_playback_open(struct snd_pcm_substream *substream) struct snd_emu10k1_pcm *epcm; struct snd_emu10k1_pcm_mixer *mix; struct snd_pcm_runtime *runtime = substream->runtime; - int i, err; + int i, err, sample_rate; epcm = kzalloc(sizeof(*epcm), GFP_KERNEL); if (epcm == NULL) @@ -1146,7 +1146,11 @@ static int snd_emu10k1_playback_open(struct snd_pcm_substream *substream) kfree(epcm); return err; } - err = snd_pcm_hw_rule_noresample(runtime, 48000); + if (emu->card_capabilities->emu_model && emu->emu1010.internal_clock == 0) + sample_rate = 44100; + else + sample_rate = 48000; + err = snd_pcm_hw_rule_noresample(runtime, sample_rate); if (err < 0) { kfree(epcm); return err; diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 21425fb51fe0..78e1827d0a95 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1640,6 +1640,9 @@ static int generic_hdmi_build_jack(struct hda_codec *codec, int pin_idx) if (pcmdev > 0) sprintf(hdmi_str + strlen(hdmi_str), ",pcm=%d", pcmdev); + if (!is_jack_detectable(codec, per_pin->pin_nid)) + strncat(hdmi_str, " Phantom", + sizeof(hdmi_str) - strlen(hdmi_str) - 1); return snd_hda_jack_add_kctl(codec, per_pin->pin_nid, hdmi_str, 0); } diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 61478fd82565..2d4237bc0d8e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -928,6 +928,7 @@ static int alc_codec_rename_from_preset(struct hda_codec *codec) static const struct snd_pci_quirk beep_white_list[] = { SND_PCI_QUIRK(0x1043, 0x103c, "ASUS", 1), SND_PCI_QUIRK(0x1043, 0x829f, "ASUS", 1), + SND_PCI_QUIRK(0x1043, 0x8376, "EeePC", 1), SND_PCI_QUIRK(0x1043, 0x83ce, "EeePC", 1), SND_PCI_QUIRK(0x1043, 0x831a, "EeePC", 1), SND_PCI_QUIRK(0x1043, 0x834a, "EeePC", 1), diff --git a/sound/pci/ice1712/revo.c b/sound/pci/ice1712/revo.c index 7641080a9b5d..1112ec1953be 100644 --- a/sound/pci/ice1712/revo.c +++ b/sound/pci/ice1712/revo.c @@ -35,6 +35,7 @@ struct revo51_spec { struct snd_i2c_device *dev; struct snd_pt2258 *pt2258; + struct ak4114 *ak4114; }; static void revo_i2s_mclk_changed(struct snd_ice1712 *ice) @@ -359,9 +360,9 @@ static struct snd_ak4xxx_private akm_ap192_priv = { .cif = 0, .data_mask = VT1724_REVO_CDOUT, .clk_mask = VT1724_REVO_CCLK, - .cs_mask = VT1724_REVO_CS0 | VT1724_REVO_CS1, - .cs_addr = VT1724_REVO_CS1, - .cs_none = VT1724_REVO_CS0 | VT1724_REVO_CS1, + .cs_mask = VT1724_REVO_CS0 | VT1724_REVO_CS3, + .cs_addr = VT1724_REVO_CS3, + .cs_none = VT1724_REVO_CS0 | VT1724_REVO_CS3, .add_flags = VT1724_REVO_CCLK, /* high at init */ .mask_flags = 0, }; @@ -372,7 +373,7 @@ static struct snd_ak4xxx_private akm_ap192_priv = { * CCLK (pin 34) -- GPIO1 pin 51 (shared with AK4358) * CSN (pin 35) -- GPIO7 pin 59 */ -#define AK4114_ADDR 0x02 +#define AK4114_ADDR 0x00 static void write_data(struct snd_ice1712 *ice, unsigned int gpio, unsigned int data, int idx) @@ -426,7 +427,7 @@ static unsigned int ap192_4wire_start(struct snd_ice1712 *ice) tmp = snd_ice1712_gpio_read(ice); tmp |= VT1724_REVO_CCLK; /* high at init */ tmp |= VT1724_REVO_CS0; - tmp &= ~VT1724_REVO_CS1; + tmp &= ~VT1724_REVO_CS3; snd_ice1712_gpio_write(ice, tmp); udelay(1); return tmp; @@ -434,7 +435,7 @@ static unsigned int ap192_4wire_start(struct snd_ice1712 *ice) static void ap192_4wire_finish(struct snd_ice1712 *ice, unsigned int tmp) { - tmp |= VT1724_REVO_CS1; + tmp |= VT1724_REVO_CS3; tmp |= VT1724_REVO_CS0; snd_ice1712_gpio_write(ice, tmp); udelay(1); @@ -470,27 +471,32 @@ static unsigned char ap192_ak4114_read(void *private_data, unsigned char addr) static int ap192_ak4114_init(struct snd_ice1712 *ice) { static const unsigned char ak4114_init_vals[] = { - AK4114_RST | AK4114_PWN | AK4114_OCKS0 | AK4114_OCKS1, + AK4114_RST | AK4114_PWN | AK4114_OCKS0, AK4114_DIF_I24I2S, AK4114_TX1E, - AK4114_EFH_1024 | AK4114_DIT | AK4114_IPS(1), + AK4114_EFH_1024 | AK4114_DIT | AK4114_IPS(0), 0, 0 }; static const unsigned char ak4114_init_txcsb[] = { 0x41, 0x02, 0x2c, 0x00, 0x00 }; - struct ak4114 *ak; int err; + struct revo51_spec *spec; + spec = kzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + ice->spec = spec; + err = snd_ak4114_create(ice->card, ap192_ak4114_read, ap192_ak4114_write, ak4114_init_vals, ak4114_init_txcsb, - ice, &ak); + ice, &spec->ak4114); /* AK4114 in Revo cannot detect external rate correctly. * No reason to stop capture stream due to incorrect checks */ - ak->check_flags = AK4114_CHECK_NO_RATE; + spec->ak4114->check_flags = AK4114_CHECK_NO_RATE; return 0; /* error ignored; it's no fatal error */ } @@ -562,6 +568,9 @@ static int revo_init(struct snd_ice1712 *ice) ice); if (err < 0) return err; + err = ap192_ak4114_init(ice); + if (err < 0) + return err; /* unmute all codecs */ snd_ice1712_gpio_write_bits(ice, VT1724_REVO_MUTE, @@ -575,7 +584,7 @@ static int revo_init(struct snd_ice1712 *ice) static int revo_add_controls(struct snd_ice1712 *ice) { - struct revo51_spec *spec; + struct revo51_spec *spec = ice->spec; int err; switch (ice->eeprom.subvendor) { @@ -597,7 +606,9 @@ static int revo_add_controls(struct snd_ice1712 *ice) err = snd_ice1712_akm4xxx_build_controls(ice); if (err < 0) return err; - err = ap192_ak4114_init(ice); + /* only capture SPDIF over AK4114 */ + err = snd_ak4114_build(spec->ak4114, NULL, + ice->pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream); if (err < 0) return err; break; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index bc4ad7977438..c8be0fbc5145 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -314,7 +314,6 @@ static int perf_evlist__id_add_fd(struct perf_evlist *evlist, struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) { struct hlist_head *head; - struct hlist_node *pos; struct perf_sample_id *sid; int hash; @@ -324,7 +323,7 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) hash = hash_64(id, PERF_EVLIST__HLIST_BITS); head = &evlist->heads[hash]; - hlist_for_each_entry(sid, pos, head, node) + hlist_for_each_entry(sid, head, node) if (sid->id == id) return sid->evsel; diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 85baf11e2acd..3cc0ad7ae863 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -1,4 +1,10 @@ -TARGETS = breakpoints kcmp mqueue vm cpu-hotplug memory-hotplug +TARGETS = breakpoints +TARGETS += kcmp +TARGETS += mqueue +TARGETS += vm +TARGETS += cpu-hotplug +TARGETS += memory-hotplug +TARGETS += efivarfs all: for TARGET in $(TARGETS); do \ diff --git a/tools/testing/selftests/README.txt b/tools/testing/selftests/README.txt new file mode 100644 index 000000000000..5e2faf9c55d3 --- /dev/null +++ b/tools/testing/selftests/README.txt @@ -0,0 +1,42 @@ +Linux Kernel Selftests + +The kernel contains a set of "self tests" under the tools/testing/selftests/ +directory. These are intended to be small unit tests to exercise individual +code paths in the kernel. + +Running the selftests +===================== + +To build the tests: + + $ make -C tools/testing/selftests + + +To run the tests: + + $ make -C tools/testing/selftests run_tests + +- note that some tests will require root privileges. + + +To run only tests targetted for a single subsystem: + + $ make -C tools/testing/selftests TARGETS=cpu-hotplug run_tests + +See the top-level tools/testing/selftests/Makefile for the list of all possible +targets. + + +Contributing new tests +====================== + +In general, the rules for for selftests are + + * Do as much as you can if you're not root; + + * Don't take too long; + + * Don't break the build on any architecture, and + + * Don't cause the top-level "make run_tests" to fail if your feature is + unconfigured. diff --git a/tools/testing/selftests/efivarfs/Makefile b/tools/testing/selftests/efivarfs/Makefile new file mode 100644 index 000000000000..29e8c6bc81b0 --- /dev/null +++ b/tools/testing/selftests/efivarfs/Makefile @@ -0,0 +1,12 @@ +CC = $(CROSS_COMPILE)gcc +CFLAGS = -Wall + +test_objs = open-unlink create-read + +all: $(test_objs) + +run_tests: all + @/bin/bash ./efivarfs.sh || echo "efivarfs selftests: [FAIL]" + +clean: + rm -f $(test_objs) diff --git a/tools/testing/selftests/efivarfs/create-read.c b/tools/testing/selftests/efivarfs/create-read.c new file mode 100644 index 000000000000..7feef1880968 --- /dev/null +++ b/tools/testing/selftests/efivarfs/create-read.c @@ -0,0 +1,38 @@ +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <string.h> + +int main(int argc, char **argv) +{ + const char *path; + char buf[4]; + int fd, rc; + + if (argc < 2) { + fprintf(stderr, "usage: %s <path>\n", argv[0]); + return EXIT_FAILURE; + } + + path = argv[1]; + + /* create a test variable */ + fd = open(path, O_RDWR | O_CREAT, 0600); + if (fd < 0) { + perror("open(O_WRONLY)"); + return EXIT_FAILURE; + } + + rc = read(fd, buf, sizeof(buf)); + if (rc != 0) { + fprintf(stderr, "Reading a new var should return EOF\n"); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh new file mode 100644 index 000000000000..880cdd5dc63f --- /dev/null +++ b/tools/testing/selftests/efivarfs/efivarfs.sh @@ -0,0 +1,139 @@ +#!/bin/bash + +efivarfs_mount=/sys/firmware/efi/efivars +test_guid=210be57c-9849-4fc7-a635-e6382d1aec27 + +check_prereqs() +{ + local msg="skip all tests:" + + if [ $UID != 0 ]; then + echo $msg must be run as root >&2 + exit 0 + fi + + if ! grep -q "^\S\+ $efivarfs_mount efivarfs" /proc/mounts; then + echo $msg efivarfs is not mounted on $efivarfs_mount >&2 + exit 0 + fi +} + +run_test() +{ + local test="$1" + + echo "--------------------" + echo "running $test" + echo "--------------------" + + if [ "$(type -t $test)" = 'function' ]; then + ( $test ) + else + ( ./$test ) + fi + + if [ $? -ne 0 ]; then + echo " [FAIL]" + rc=1 + else + echo " [PASS]" + fi +} + +test_create() +{ + local attrs='\x07\x00\x00\x00' + local file=$efivarfs_mount/$FUNCNAME-$test_guid + + printf "$attrs\x00" > $file + + if [ ! -e $file ]; then + echo "$file couldn't be created" >&2 + exit 1 + fi + + if [ $(stat -c %s $file) -ne 5 ]; then + echo "$file has invalid size" >&2 + exit 1 + fi +} + +test_create_empty() +{ + local file=$efivarfs_mount/$FUNCNAME-$test_guid + + : > $file + + if [ ! -e $file ]; then + echo "$file can not be created without writing" >&2 + exit 1 + fi +} + +test_create_read() +{ + local file=$efivarfs_mount/$FUNCNAME-$test_guid + ./create-read $file +} + +test_delete() +{ + local attrs='\x07\x00\x00\x00' + local file=$efivarfs_mount/$FUNCNAME-$test_guid + + printf "$attrs\x00" > $file + + if [ ! -e $file ]; then + echo "$file couldn't be created" >&2 + exit 1 + fi + + rm $file + + if [ -e $file ]; then + echo "$file couldn't be deleted" >&2 + exit 1 + fi + +} + +# test that we can remove a variable by issuing a write with only +# attributes specified +test_zero_size_delete() +{ + local attrs='\x07\x00\x00\x00' + local file=$efivarfs_mount/$FUNCNAME-$test_guid + + printf "$attrs\x00" > $file + + if [ ! -e $file ]; then + echo "$file does not exist" >&2 + exit 1 + fi + + printf "$attrs" > $file + + if [ -e $file ]; then + echo "$file should have been deleted" >&2 + exit 1 + fi +} + +test_open_unlink() +{ + local file=$efivarfs_mount/$FUNCNAME-$test_guid + ./open-unlink $file +} + +check_prereqs + +rc=0 + +run_test test_create +run_test test_create_empty +run_test test_create_read +run_test test_delete +run_test test_zero_size_delete +run_test test_open_unlink + +exit $rc diff --git a/tools/testing/selftests/efivarfs/open-unlink.c b/tools/testing/selftests/efivarfs/open-unlink.c new file mode 100644 index 000000000000..8c0764407b3c --- /dev/null +++ b/tools/testing/selftests/efivarfs/open-unlink.c @@ -0,0 +1,63 @@ +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +int main(int argc, char **argv) +{ + const char *path; + char buf[5]; + int fd, rc; + + if (argc < 2) { + fprintf(stderr, "usage: %s <path>\n", argv[0]); + return EXIT_FAILURE; + } + + path = argv[1]; + + /* attributes: EFI_VARIABLE_NON_VOLATILE | + * EFI_VARIABLE_BOOTSERVICE_ACCESS | + * EFI_VARIABLE_RUNTIME_ACCESS + */ + *(uint32_t *)buf = 0x7; + buf[4] = 0; + + /* create a test variable */ + fd = open(path, O_WRONLY | O_CREAT); + if (fd < 0) { + perror("open(O_WRONLY)"); + return EXIT_FAILURE; + } + + rc = write(fd, buf, sizeof(buf)); + if (rc != sizeof(buf)) { + perror("write"); + return EXIT_FAILURE; + } + + close(fd); + + fd = open(path, O_RDONLY); + if (fd < 0) { + perror("open"); + return EXIT_FAILURE; + } + + if (unlink(path) < 0) { + perror("unlink"); + return EXIT_FAILURE; + } + + rc = read(fd, buf, sizeof(buf)); + if (rc > 0) { + fprintf(stderr, "reading from an unlinked variable " + "shouldn't be possible\n"); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index b6eea5cc7b34..adb17f266b28 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -268,14 +268,13 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, struct kvm_irq_routing_table *irq_rt) { struct kvm_kernel_irq_routing_entry *e; - struct hlist_node *n; if (irqfd->gsi >= irq_rt->nr_rt_entries) { rcu_assign_pointer(irqfd->irq_entry, NULL); return; } - hlist_for_each_entry(e, n, &irq_rt->map[irqfd->gsi], link) { + hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) { /* Only fast-path MSI. */ if (e->type == KVM_IRQ_ROUTING_MSI) rcu_assign_pointer(irqfd->irq_entry, e); diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index ff6d40e2c06d..e9073cf4d040 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -173,7 +173,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; int ret = -1, i = 0; struct kvm_irq_routing_table *irq_rt; - struct hlist_node *n; trace_kvm_set_irq(irq, level, irq_source_id); @@ -184,7 +183,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) rcu_read_lock(); irq_rt = rcu_dereference(kvm->irq_routing); if (irq < irq_rt->nr_rt_entries) - hlist_for_each_entry(e, n, &irq_rt->map[irq], link) + hlist_for_each_entry(e, &irq_rt->map[irq], link) irq_set[i++] = *e; rcu_read_unlock(); @@ -212,7 +211,6 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) struct kvm_kernel_irq_routing_entry *e; int ret = -EINVAL; struct kvm_irq_routing_table *irq_rt; - struct hlist_node *n; trace_kvm_set_irq(irq, level, irq_source_id); @@ -227,7 +225,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) rcu_read_lock(); irq_rt = rcu_dereference(kvm->irq_routing); if (irq < irq_rt->nr_rt_entries) - hlist_for_each_entry(e, n, &irq_rt->map[irq], link) { + hlist_for_each_entry(e, &irq_rt->map[irq], link) { if (likely(e->type == KVM_IRQ_ROUTING_MSI)) ret = kvm_set_msi_inatomic(e, kvm); else @@ -241,13 +239,12 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) { struct kvm_irq_ack_notifier *kian; - struct hlist_node *n; int gsi; rcu_read_lock(); gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; if (gsi != -1) - hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) if (kian->gsi == gsi) { rcu_read_unlock(); @@ -263,7 +260,6 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { struct kvm_irq_ack_notifier *kian; - struct hlist_node *n; int gsi; trace_kvm_ack_irq(irqchip, pin); @@ -271,7 +267,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) rcu_read_lock(); gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; if (gsi != -1) - hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) if (kian->gsi == gsi) kian->irq_acked(kian); @@ -369,13 +365,12 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, bool mask) { struct kvm_irq_mask_notifier *kimn; - struct hlist_node *n; int gsi; rcu_read_lock(); gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; if (gsi != -1) - hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link) + hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link) if (kimn->irq == gsi) kimn->func(kimn, mask); rcu_read_unlock(); @@ -396,13 +391,12 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, int delta; unsigned max_pin; struct kvm_kernel_irq_routing_entry *ei; - struct hlist_node *n; /* * Do not allow GSI to be mapped to the same irqchip more than once. * Allow only one to one mapping between GSI and MSI. */ - hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link) + hlist_for_each_entry(ei, &rt->map[ue->gsi], link) if (ei->type == KVM_IRQ_ROUTING_MSI || ue->type == KVM_IRQ_ROUTING_MSI || ue->u.irqchip.irqchip == ei->irqchip.irqchip) |