diff options
183 files changed, 4154 insertions, 2475 deletions
diff --git a/Documentation/device-mapper/dm-crypt.txt b/Documentation/device-mapper/dm-crypt.txt index 6b5c42dbbe84..2c656ae43ba7 100644 --- a/Documentation/device-mapper/dm-crypt.txt +++ b/Documentation/device-mapper/dm-crypt.txt @@ -4,7 +4,8 @@ dm-crypt Device-Mapper's "crypt" target provides transparent encryption of block devices using the kernel crypto API. -Parameters: <cipher> <key> <iv_offset> <device path> <offset> +Parameters: <cipher> <key> <iv_offset> <device path> \ + <offset> [<#opt_params> <opt_params>] <cipher> Encryption cipher and an optional IV generation mode. @@ -37,6 +38,24 @@ Parameters: <cipher> <key> <iv_offset> <device path> <offset> <offset> Starting sector within the device where the encrypted data begins. +<#opt_params> + Number of optional parameters. If there are no optional parameters, + the optional paramaters section can be skipped or #opt_params can be zero. + Otherwise #opt_params is the number of following arguments. + + Example of optional parameters section: + 1 allow_discards + +allow_discards + Block discard requests (a.k.a. TRIM) are passed through the crypt device. + The default is to ignore discard requests. + + WARNING: Assess the specific security risks carefully before enabling this + option. For example, allowing discards on encrypted devices may lead to + the leak of information about the ciphertext device (filesystem type, + used space etc.) if the discarded blocks can be located easily on the + device later. + Example scripts =============== LUKS (Linux Unified Key Setup) is now the preferred way to set up disk diff --git a/Documentation/device-mapper/dm-flakey.txt b/Documentation/device-mapper/dm-flakey.txt index c8efdfd19a65..6ff5c2327227 100644 --- a/Documentation/device-mapper/dm-flakey.txt +++ b/Documentation/device-mapper/dm-flakey.txt @@ -1,17 +1,53 @@ dm-flakey ========= -This target is the same as the linear target except that it returns I/O -errors periodically. It's been found useful in simulating failing -devices for testing purposes. +This target is the same as the linear target except that it exhibits +unreliable behaviour periodically. It's been found useful in simulating +failing devices for testing purposes. Starting from the time the table is loaded, the device is available for -<up interval> seconds, then returns errors for <down interval> seconds, -and then this cycle repeats. +<up interval> seconds, then exhibits unreliable behaviour for <down +interval> seconds, and then this cycle repeats. -Parameters: <dev path> <offset> <up interval> <down interval> +Also, consider using this in combination with the dm-delay target too, +which can delay reads and writes and/or send them to different +underlying devices. + +Table parameters +---------------- + <dev path> <offset> <up interval> <down interval> \ + [<num_features> [<feature arguments>]] + +Mandatory parameters: <dev path>: Full pathname to the underlying block-device, or a "major:minor" device-number. <offset>: Starting sector within the device. <up interval>: Number of seconds device is available. <down interval>: Number of seconds device returns errors. + +Optional feature parameters: + If no feature parameters are present, during the periods of + unreliability, all I/O returns errors. + + drop_writes: + All write I/O is silently ignored. + Read I/O is handled correctly. + + corrupt_bio_byte <Nth_byte> <direction> <value> <flags>: + During <down interval>, replace <Nth_byte> of the data of + each matching bio with <value>. + + <Nth_byte>: The offset of the byte to replace. + Counting starts at 1, to replace the first byte. + <direction>: Either 'r' to corrupt reads or 'w' to corrupt writes. + 'w' is incompatible with drop_writes. + <value>: The value (from 0-255) to write. + <flags>: Perform the replacement only if bio->bi_rw has all the + selected flags set. + +Examples: + corrupt_bio_byte 32 r 1 0 + - replaces the 32nd byte of READ bios with the value 1 + + corrupt_bio_byte 224 w 0 32 + - replaces the 224th byte of REQ_META (=32) bios with the value 0 diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt index 33b6b7071ac8..2a8c11331d2d 100644 --- a/Documentation/device-mapper/dm-raid.txt +++ b/Documentation/device-mapper/dm-raid.txt @@ -1,70 +1,108 @@ -Device-mapper RAID (dm-raid) is a bridge from DM to MD. It -provides a way to use device-mapper interfaces to access the MD RAID -drivers. +dm-raid +------- -As with all device-mapper targets, the nominal public interfaces are the -constructor (CTR) tables and the status outputs (both STATUSTYPE_INFO -and STATUSTYPE_TABLE). The CTR table looks like the following: +The device-mapper RAID (dm-raid) target provides a bridge from DM to MD. +It allows the MD RAID drivers to be accessed using a device-mapper +interface. -1: <s> <l> raid \ -2: <raid_type> <#raid_params> <raid_params> \ -3: <#raid_devs> <meta_dev1> <dev1> .. <meta_devN> <devN> - -Line 1 contains the standard first three arguments to any device-mapper -target - the start, length, and target type fields. The target type in -this case is "raid". - -Line 2 contains the arguments that define the particular raid -type/personality/level, the required arguments for that raid type, and -any optional arguments. Possible raid types include: raid4, raid5_la, -raid5_ls, raid5_rs, raid6_zr, raid6_nr, and raid6_nc. (raid1 is -planned for the future.) The list of required and optional parameters -is the same for all the current raid types. The required parameters are -positional, while the optional parameters are given as key/value pairs. -The possible parameters are as follows: - <chunk_size> Chunk size in sectors. - [[no]sync] Force/Prevent RAID initialization - [rebuild <idx>] Rebuild the drive indicated by the index - [daemon_sleep <ms>] Time between bitmap daemon work to clear bits - [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization - [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization - [max_write_behind <sectors>] See '-write-behind=' (man mdadm) - [stripe_cache <sectors>] Stripe cache size for higher RAIDs - -Line 3 contains the list of devices that compose the array in -metadata/data device pairs. If the metadata is stored separately, a '-' -is given for the metadata device position. If a drive has failed or is -missing at creation time, a '-' can be given for both the metadata and -data drives for a given position. - -NB. Currently all metadata devices must be specified as '-'. - -Examples: -# RAID4 - 4 data drives, 1 parity +The target is named "raid" and it accepts the following parameters: + + <raid_type> <#raid_params> <raid_params> \ + <#raid_devs> <metadata_dev0> <dev0> [.. <metadata_devN> <devN>] + +<raid_type>: + raid1 RAID1 mirroring + raid4 RAID4 dedicated parity disk + raid5_la RAID5 left asymmetric + - rotating parity 0 with data continuation + raid5_ra RAID5 right asymmetric + - rotating parity N with data continuation + raid5_ls RAID5 left symmetric + - rotating parity 0 with data restart + raid5_rs RAID5 right symmetric + - rotating parity N with data restart + raid6_zr RAID6 zero restart + - rotating parity zero (left-to-right) with data restart + raid6_nr RAID6 N restart + - rotating parity N (right-to-left) with data restart + raid6_nc RAID6 N continue + - rotating parity N (right-to-left) with data continuation + + Refererence: Chapter 4 of + http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf + +<#raid_params>: The number of parameters that follow. + +<raid_params> consists of + Mandatory parameters: + <chunk_size>: Chunk size in sectors. This parameter is often known as + "stripe size". It is the only mandatory parameter and + is placed first. + + followed by optional parameters (in any order): + [sync|nosync] Force or prevent RAID initialization. + + [rebuild <idx>] Rebuild drive number idx (first drive is 0). + + [daemon_sleep <ms>] + Interval between runs of the bitmap daemon that + clear bits. A longer interval means less bitmap I/O but + resyncing after a failure is likely to take longer. + + [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization + [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization + [write_mostly <idx>] Drive index is write-mostly + [max_write_behind <sectors>] See '-write-behind=' (man mdadm) + [stripe_cache <sectors>] Stripe cache size (higher RAIDs only) + [region_size <sectors>] + The region_size multiplied by the number of regions is the + logical size of the array. The bitmap records the device + synchronisation state for each region. + +<#raid_devs>: The number of devices composing the array. + Each device consists of two entries. The first is the device + containing the metadata (if any); the second is the one containing the + data. + + If a drive has failed or is missing at creation time, a '-' can be + given for both the metadata and data drives for a given position. + + +Example tables +-------------- +# RAID4 - 4 data drives, 1 parity (no metadata devices) # No metadata devices specified to hold superblock/bitmap info # Chunk size of 1MiB # (Lines separated for easy reading) + 0 1960893648 raid \ raid4 1 2048 \ 5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81 -# RAID4 - 4 data drives, 1 parity (no metadata devices) +# RAID4 - 4 data drives, 1 parity (with metadata devices) # Chunk size of 1MiB, force RAID initialization, # min recovery rate at 20 kiB/sec/disk + 0 1960893648 raid \ - raid4 4 2048 min_recovery_rate 20 sync\ - 5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81 + raid4 4 2048 sync min_recovery_rate 20 \ + 5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82 -Performing a 'dmsetup table' should display the CTR table used to -construct the mapping (with possible reordering of optional -parameters). +'dmsetup table' displays the table used to construct the mapping. +The optional parameters are always printed in the order listed +above with "sync" or "nosync" always output ahead of the other +arguments, regardless of the order used when originally loading the table. +Arguments that can be repeated are ordered by value. -Performing a 'dmsetup status' will yield information on the state and -health of the array. The output is as follows: +'dmsetup status' yields information on the state and health of the +array. +The output is as follows: 1: <s> <l> raid \ 2: <raid_type> <#devices> <1 health char for each dev> <resync_ratio> -Line 1 is standard DM output. Line 2 is best shown by example: +Line 1 is the standard output produced by device-mapper. +Line 2 is produced by the raid target, and best explained by example: 0 1960893648 raid raid4 5 AAAAA 2/490221568 Here we can see the RAID type is raid4, there are 5 devices - all of which are 'A'live, and the array is 2/490221568 complete with recovery. +Faulty or missing devices are marked 'D'. Devices that are out-of-sync +are marked 'a'. diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt index 7be15e44d481..82a5d250d75e 100644 --- a/Documentation/fault-injection/fault-injection.txt +++ b/Documentation/fault-injection/fault-injection.txt @@ -143,8 +143,7 @@ o provide a way to configure fault attributes failslab, fail_page_alloc, and fail_make_request use this way. Helper functions: - init_fault_attr_dentries(entries, attr, name); - void cleanup_fault_attr_dentries(entries); + fault_create_debugfs_attr(name, parent, attr); - module parameters diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index ea0bace0124a..43f48098220d 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -296,15 +296,6 @@ Who: Ravikiran Thirumalai <kiran@scalex86.org> --------------------------- -What: CONFIG_THERMAL_HWMON -When: January 2009 -Why: This option was introduced just to allow older lm-sensors userspace - to keep working over the upgrade to 2.6.26. At the scheduled time of - removal fixed lm-sensors (2.x or 3.x) should be readily available. -Who: Rene Herman <rene.herman@gmail.com> - ---------------------------- - What: Code that is now under CONFIG_WIRELESS_EXT_SYSFS (in net/core/net-sysfs.c) When: After the only user (hal) has seen a release with the patches diff --git a/Documentation/frv/booting.txt b/Documentation/frv/booting.txt index ace200b7c214..37c4d84a0e57 100644 --- a/Documentation/frv/booting.txt +++ b/Documentation/frv/booting.txt @@ -106,13 +106,20 @@ separated by spaces: To use the first on-chip serial port at baud rate 115200, no parity, 8 bits, and no flow control. - (*) root=/dev/<xxxx> + (*) root=<xxxx> - This specifies the device upon which the root filesystem resides. For - example: + This specifies the device upon which the root filesystem resides. It + may be specified by major and minor number, device path, or even + partition uuid, if supported. For example: /dev/nfs NFS root filesystem /dev/mtdblock3 Fourth RedBoot partition on the System Flash + PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=1 + first partition after the partition with the given UUID + 253:0 Device with major 253 and minor 0 + + Authoritative information can be found in + "Documentation/kernel-parameters.txt". (*) rw diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 72ba8d51dbc1..845a191004b1 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -292,6 +292,7 @@ Code Seq#(hex) Include File Comments <mailto:buk@buks.ipn.de> 0xA0 all linux/sdp/sdp.h Industrial Device Project <mailto:kenji@bitgate.com> +0xA2 00-0F arch/tile/include/asm/hardwall.h 0xA3 80-8F Port ACL in development: <mailto:tlewis@mindspring.com> 0xA3 90-9F linux/dtlk.h diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 26a83743af19..865e39f1850c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -163,6 +163,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. See also Documentation/power/pm.txt, pci=noacpi + acpi_rsdp= [ACPI,EFI,KEXEC] + Pass the RSDP address to the kernel, mostly used + on machines running EFI runtime service to boot the + second kernel for kdump. + acpi_apic_instance= [ACPI, IOAPIC] Format: <int> 2: use 2nd APIC table, if available @@ -2240,6 +2245,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ro [KNL] Mount root device read-only on boot root= [KNL] Root filesystem + See name_to_dev_t comment in init/do_mounts.c. rootdelay= [KNL] Delay (in seconds) to pause before attempting to mount the root filesystem diff --git a/Documentation/m68k/kernel-options.txt b/Documentation/m68k/kernel-options.txt index c93bed66e25d..97d45f276fe6 100644 --- a/Documentation/m68k/kernel-options.txt +++ b/Documentation/m68k/kernel-options.txt @@ -129,6 +129,20 @@ decimal 11 is the major of SCSI CD-ROMs, and the minor 0 stands for the first of these. You can find out all valid major numbers by looking into include/linux/major.h. +In addition to major and minor numbers, if the device containing your +root partition uses a partition table format with unique partition +identifiers, then you may use them. For instance, +"root=PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF". It is also +possible to reference another partition on the same device using a +known partition UUID as the starting point. For example, +if partition 5 of the device has the UUID of +00112233-4455-6677-8899-AABBCCDDEEFF then partition 3 may be found as +follows: + PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=-2 + +Authoritative information can be found in +"Documentation/kernel-parameters.txt". + 2.2) ro, rw ----------- diff --git a/MAINTAINERS b/MAINTAINERS index c9c6324a7a9f..6c84a219c833 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4722,6 +4722,7 @@ S: Maintained F: drivers/of F: include/linux/of*.h K: of_get_property +K: of_match_table OPENRISC ARCHITECTURE M: Jonas Bonn <jonas@southpole.se> @@ -6318,6 +6319,7 @@ F: include/linux/sysv_fs.h TARGET SUBSYSTEM M: Nicholas A. Bellinger <nab@linux-iscsi.org> L: linux-scsi@vger.kernel.org +L: target-devel@vger.kernel.org L: http://groups.google.com/group/linux-iscsi-target-dev W: http://www.linux-iscsi.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/nab/lio-core-2.6.git master diff --git a/arch/cris/arch-v10/drivers/sync_serial.c b/arch/cris/arch-v10/drivers/sync_serial.c index 850265373611..466af40c5822 100644 --- a/arch/cris/arch-v10/drivers/sync_serial.c +++ b/arch/cris/arch-v10/drivers/sync_serial.c @@ -158,7 +158,7 @@ static int sync_serial_open(struct inode *inode, struct file *file); static int sync_serial_release(struct inode *inode, struct file *file); static unsigned int sync_serial_poll(struct file *filp, poll_table *wait); -static int sync_serial_ioctl(struct file *file, +static long sync_serial_ioctl(struct file *file, unsigned int cmd, unsigned long arg); static ssize_t sync_serial_write(struct file *file, const char *buf, size_t count, loff_t *ppos); @@ -625,11 +625,11 @@ static int sync_serial_open(struct inode *inode, struct file *file) *R_IRQ_MASK1_SET = 1 << port->data_avail_bit; DEBUG(printk(KERN_DEBUG "sser%d rec started\n", dev)); } - ret = 0; + err = 0; out: mutex_unlock(&sync_serial_mutex); - return ret; + return err; } static int sync_serial_release(struct inode *inode, struct file *file) diff --git a/arch/cris/arch-v10/kernel/irq.c b/arch/cris/arch-v10/kernel/irq.c index 907cfb5a873d..ba0e5965d6e3 100644 --- a/arch/cris/arch-v10/kernel/irq.c +++ b/arch/cris/arch-v10/kernel/irq.c @@ -20,6 +20,9 @@ #define crisv10_mask_irq(irq_nr) (*R_VECT_MASK_CLR = 1 << (irq_nr)); #define crisv10_unmask_irq(irq_nr) (*R_VECT_MASK_SET = 1 << (irq_nr)); +extern void kgdb_init(void); +extern void breakpoint(void); + /* don't use set_int_vector, it bypasses the linux interrupt handlers. it is * global just so that the kernel gdb can use it. */ diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h index 29b74a105830..332f19c54557 100644 --- a/arch/cris/include/asm/thread_info.h +++ b/arch/cris/include/asm/thread_info.h @@ -11,8 +11,6 @@ #ifdef __KERNEL__ -#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR - #ifndef __ASSEMBLY__ #include <asm/types.h> #include <asm/processor.h> @@ -67,8 +65,10 @@ struct thread_info { #define init_thread_info (init_thread_union.thread_info) +#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR /* thread information allocation */ -#define alloc_thread_info(tsk, node) ((struct thread_info *) __get_free_pages(GFP_KERNEL,1)) +#define alloc_thread_info_node(tsk, node) \ + ((struct thread_info *) __get_free_pages(GFP_KERNEL, 1)) #define free_thread_info(ti) free_pages((unsigned long) (ti), 1) #endif /* !__ASSEMBLY__ */ diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index 849ab2fa1f5c..aec60dc06007 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -2,3 +2,41 @@ include include/asm-generic/Kbuild.asm header-y += ucontext.h header-y += hardwall.h + +generic-y += bug.h +generic-y += bugs.h +generic-y += cputime.h +generic-y += device.h +generic-y += div64.h +generic-y += emergency-restart.h +generic-y += errno.h +generic-y += fb.h +generic-y += fcntl.h +generic-y += ioctl.h +generic-y += ioctls.h +generic-y += ipc.h +generic-y += ipcbuf.h +generic-y += irq_regs.h +generic-y += kdebug.h +generic-y += local.h +generic-y += module.h +generic-y += msgbuf.h +generic-y += mutex.h +generic-y += param.h +generic-y += parport.h +generic-y += poll.h +generic-y += posix_types.h +generic-y += resource.h +generic-y += scatterlist.h +generic-y += sembuf.h +generic-y += serial.h +generic-y += shmbuf.h +generic-y += shmparam.h +generic-y += socket.h +generic-y += sockios.h +generic-y += statfs.h +generic-y += termbits.h +generic-y += termios.h +generic-y += types.h +generic-y += ucontext.h +generic-y += xor.h diff --git a/arch/tile/include/asm/bug.h b/arch/tile/include/asm/bug.h deleted file mode 100644 index b12fd89e42e9..000000000000 --- a/arch/tile/include/asm/bug.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/bug.h> diff --git a/arch/tile/include/asm/bugs.h b/arch/tile/include/asm/bugs.h deleted file mode 100644 index 61791e1ad9f5..000000000000 --- a/arch/tile/include/asm/bugs.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/bugs.h> diff --git a/arch/tile/include/asm/cputime.h b/arch/tile/include/asm/cputime.h deleted file mode 100644 index 6d68ad7e0ea3..000000000000 --- a/arch/tile/include/asm/cputime.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/cputime.h> diff --git a/arch/tile/include/asm/device.h b/arch/tile/include/asm/device.h deleted file mode 100644 index f0a4c256403b..000000000000 --- a/arch/tile/include/asm/device.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/device.h> diff --git a/arch/tile/include/asm/div64.h b/arch/tile/include/asm/div64.h deleted file mode 100644 index 6cd978cefb28..000000000000 --- a/arch/tile/include/asm/div64.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/div64.h> diff --git a/arch/tile/include/asm/emergency-restart.h b/arch/tile/include/asm/emergency-restart.h deleted file mode 100644 index 3711bd9d50bd..000000000000 --- a/arch/tile/include/asm/emergency-restart.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/emergency-restart.h> diff --git a/arch/tile/include/asm/errno.h b/arch/tile/include/asm/errno.h deleted file mode 100644 index 4c82b503d92f..000000000000 --- a/arch/tile/include/asm/errno.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/errno.h> diff --git a/arch/tile/include/asm/fb.h b/arch/tile/include/asm/fb.h deleted file mode 100644 index 3a4988e8df45..000000000000 --- a/arch/tile/include/asm/fb.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/fb.h> diff --git a/arch/tile/include/asm/fcntl.h b/arch/tile/include/asm/fcntl.h deleted file mode 100644 index 46ab12db5739..000000000000 --- a/arch/tile/include/asm/fcntl.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/fcntl.h> diff --git a/arch/tile/include/asm/fixmap.h b/arch/tile/include/asm/fixmap.h index 51537ff9265a..c66f7933beaa 100644 --- a/arch/tile/include/asm/fixmap.h +++ b/arch/tile/include/asm/fixmap.h @@ -75,12 +75,6 @@ extern void __set_fixmap(enum fixed_addresses idx, #define set_fixmap(idx, phys) \ __set_fixmap(idx, phys, PAGE_KERNEL) -/* - * Some hardware wants to get fixmapped without caching. - */ -#define set_fixmap_nocache(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) - #define clear_fixmap(idx) \ __set_fixmap(idx, 0, __pgprot(0)) diff --git a/arch/tile/include/asm/ioctl.h b/arch/tile/include/asm/ioctl.h deleted file mode 100644 index b279fe06dfe5..000000000000 --- a/arch/tile/include/asm/ioctl.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ioctl.h> diff --git a/arch/tile/include/asm/ioctls.h b/arch/tile/include/asm/ioctls.h deleted file mode 100644 index ec34c760665e..000000000000 --- a/arch/tile/include/asm/ioctls.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ioctls.h> diff --git a/arch/tile/include/asm/ipc.h b/arch/tile/include/asm/ipc.h deleted file mode 100644 index a46e3d9c2a3f..000000000000 --- a/arch/tile/include/asm/ipc.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ipc.h> diff --git a/arch/tile/include/asm/ipcbuf.h b/arch/tile/include/asm/ipcbuf.h deleted file mode 100644 index 84c7e51cb6d0..000000000000 --- a/arch/tile/include/asm/ipcbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ipcbuf.h> diff --git a/arch/tile/include/asm/irq_regs.h b/arch/tile/include/asm/irq_regs.h deleted file mode 100644 index 3dd9c0b70270..000000000000 --- a/arch/tile/include/asm/irq_regs.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/irq_regs.h> diff --git a/arch/tile/include/asm/kdebug.h b/arch/tile/include/asm/kdebug.h deleted file mode 100644 index 6ece1b037665..000000000000 --- a/arch/tile/include/asm/kdebug.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/kdebug.h> diff --git a/arch/tile/include/asm/local.h b/arch/tile/include/asm/local.h deleted file mode 100644 index c11c530f74d0..000000000000 --- a/arch/tile/include/asm/local.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/local.h> diff --git a/arch/tile/include/asm/module.h b/arch/tile/include/asm/module.h deleted file mode 100644 index 1e4b79fe8584..000000000000 --- a/arch/tile/include/asm/module.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/module.h> diff --git a/arch/tile/include/asm/msgbuf.h b/arch/tile/include/asm/msgbuf.h deleted file mode 100644 index 809134c644a6..000000000000 --- a/arch/tile/include/asm/msgbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/msgbuf.h> diff --git a/arch/tile/include/asm/mutex.h b/arch/tile/include/asm/mutex.h deleted file mode 100644 index ff6101aa2c71..000000000000 --- a/arch/tile/include/asm/mutex.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/mutex-dec.h> diff --git a/arch/tile/include/asm/param.h b/arch/tile/include/asm/param.h deleted file mode 100644 index 965d45427975..000000000000 --- a/arch/tile/include/asm/param.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/param.h> diff --git a/arch/tile/include/asm/parport.h b/arch/tile/include/asm/parport.h deleted file mode 100644 index cf252af64590..000000000000 --- a/arch/tile/include/asm/parport.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/parport.h> diff --git a/arch/tile/include/asm/poll.h b/arch/tile/include/asm/poll.h deleted file mode 100644 index c98509d3149e..000000000000 --- a/arch/tile/include/asm/poll.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/poll.h> diff --git a/arch/tile/include/asm/posix_types.h b/arch/tile/include/asm/posix_types.h deleted file mode 100644 index 22cae6230ceb..000000000000 --- a/arch/tile/include/asm/posix_types.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/posix_types.h> diff --git a/arch/tile/include/asm/resource.h b/arch/tile/include/asm/resource.h deleted file mode 100644 index 04bc4db8921b..000000000000 --- a/arch/tile/include/asm/resource.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/resource.h> diff --git a/arch/tile/include/asm/scatterlist.h b/arch/tile/include/asm/scatterlist.h deleted file mode 100644 index 35d786fe93ae..000000000000 --- a/arch/tile/include/asm/scatterlist.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/scatterlist.h> diff --git a/arch/tile/include/asm/sembuf.h b/arch/tile/include/asm/sembuf.h deleted file mode 100644 index 7673b83cfef7..000000000000 --- a/arch/tile/include/asm/sembuf.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/sembuf.h> diff --git a/arch/tile/include/asm/serial.h b/arch/tile/include/asm/serial.h deleted file mode 100644 index a0cb0caff152..000000000000 --- a/arch/tile/include/asm/serial.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/serial.h> diff --git a/arch/tile/include/asm/shmbuf.h b/arch/tile/include/asm/shmbuf.h deleted file mode 100644 index 83c05fc2de38..000000000000 --- a/arch/tile/include/asm/shmbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/shmbuf.h> diff --git a/arch/tile/include/asm/shmparam.h b/arch/tile/include/asm/shmparam.h deleted file mode 100644 index 93f30deb95d0..000000000000 --- a/arch/tile/include/asm/shmparam.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/shmparam.h> diff --git a/arch/tile/include/asm/socket.h b/arch/tile/include/asm/socket.h deleted file mode 100644 index 6b71384b9d8b..000000000000 --- a/arch/tile/include/asm/socket.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/socket.h> diff --git a/arch/tile/include/asm/sockios.h b/arch/tile/include/asm/sockios.h deleted file mode 100644 index def6d4746ee7..000000000000 --- a/arch/tile/include/asm/sockios.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/sockios.h> diff --git a/arch/tile/include/asm/statfs.h b/arch/tile/include/asm/statfs.h deleted file mode 100644 index 0b91fe198c20..000000000000 --- a/arch/tile/include/asm/statfs.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/statfs.h> diff --git a/arch/tile/include/asm/termbits.h b/arch/tile/include/asm/termbits.h deleted file mode 100644 index 3935b106de79..000000000000 --- a/arch/tile/include/asm/termbits.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/termbits.h> diff --git a/arch/tile/include/asm/termios.h b/arch/tile/include/asm/termios.h deleted file mode 100644 index 280d78a9d966..000000000000 --- a/arch/tile/include/asm/termios.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/termios.h> diff --git a/arch/tile/include/asm/types.h b/arch/tile/include/asm/types.h deleted file mode 100644 index b9e79bc580dd..000000000000 --- a/arch/tile/include/asm/types.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/types.h> diff --git a/arch/tile/include/asm/ucontext.h b/arch/tile/include/asm/ucontext.h deleted file mode 100644 index 9bc07b9f30fb..000000000000 --- a/arch/tile/include/asm/ucontext.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ucontext.h> diff --git a/arch/tile/include/asm/xor.h b/arch/tile/include/asm/xor.h deleted file mode 100644 index c82eb12a5b18..000000000000 --- a/arch/tile/include/asm/xor.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/xor.h> diff --git a/arch/tile/include/hv/drv_srom_intf.h b/arch/tile/include/hv/drv_srom_intf.h new file mode 100644 index 000000000000..6395faa6d9e6 --- /dev/null +++ b/arch/tile/include/hv/drv_srom_intf.h @@ -0,0 +1,41 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file drv_srom_intf.h + * Interface definitions for the SPI Flash ROM driver. + */ + +#ifndef _SYS_HV_INCLUDE_DRV_SROM_INTF_H +#define _SYS_HV_INCLUDE_DRV_SROM_INTF_H + +/** Read this offset to get the total device size. */ +#define SROM_TOTAL_SIZE_OFF 0xF0000000 + +/** Read this offset to get the device sector size. */ +#define SROM_SECTOR_SIZE_OFF 0xF0000004 + +/** Read this offset to get the device page size. */ +#define SROM_PAGE_SIZE_OFF 0xF0000008 + +/** Write this offset to flush any pending writes. */ +#define SROM_FLUSH_OFF 0xF1000000 + +/** Write this offset, plus the byte offset of the start of a sector, to + * erase a sector. Any write data is ignored, but there must be at least + * one byte of write data. Only applies when the driver is in MTD mode. + */ +#define SROM_ERASE_OFF 0xF2000000 + +#endif /* _SYS_HV_INCLUDE_DRV_SROM_INTF_H */ diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index c4be58cc5d50..f6f50f2a5e37 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c @@ -78,7 +78,6 @@ static struct clocksource cycle_counter_cs = { .rating = 300, .read = clocksource_get_cycles, .mask = CLOCKSOURCE_MASK(64), - .shift = 22, /* typical value, e.g. x86 tsc uses this */ .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -91,8 +90,6 @@ void __init setup_clock(void) cycles_per_sec = hv_sysconf(HV_SYSCONF_CPU_SPEED); sched_clock_mult = clocksource_hz2mult(cycles_per_sec, SCHED_CLOCK_SHIFT); - cycle_counter_cs.mult = - clocksource_hz2mult(cycles_per_sec, cycle_counter_cs.shift); } void __init calibrate_delay(void) @@ -107,7 +104,7 @@ void __init calibrate_delay(void) void __init time_init(void) { /* Initialize and register the clock source. */ - clocksource_register(&cycle_counter_cs); + clocksource_register_hz(&cycle_counter_cs, cycles_per_sec); /* Start up the tile-timer interrupt source on the boot cpu. */ setup_tile_timer(); diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 4e10c4023028..7309988c9794 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -836,8 +836,7 @@ void __init mem_init(void) #endif #ifdef CONFIG_FLATMEM - if (!mem_map) - BUG(); + BUG_ON(!mem_map); #endif #ifdef CONFIG_HIGHMEM diff --git a/block/blk-core.c b/block/blk-core.c index b850bedad229..b627558c461f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1368,8 +1368,10 @@ static bool should_fail_request(struct hd_struct *part, unsigned int bytes) static int __init fail_make_request_debugfs(void) { - return init_fault_attr_dentries(&fail_make_request, - "fail_make_request"); + struct dentry *dir = fault_create_debugfs_attr("fail_make_request", + NULL, &fail_make_request); + + return IS_ERR(dir) ? PTR_ERR(dir) : 0; } late_initcall(fail_make_request_debugfs); diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 4f0c06c7a338..780354888958 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -28,7 +28,10 @@ int blk_should_fake_timeout(struct request_queue *q) static int __init fail_io_timeout_debugfs(void) { - return init_fault_attr_dentries(&fail_io_timeout, "fail_io_timeout"); + struct dentry *dir = fault_create_debugfs_attr("fail_io_timeout", + NULL, &fail_io_timeout); + + return IS_ERR(dir) ? PTR_ERR(dir) : 0; } late_initcall(fail_io_timeout_debugfs); diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index 73863d86f022..76dc02f15574 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -126,6 +126,12 @@ u8 ACPI_INIT_GLOBAL(acpi_gbl_copy_dsdt_locally, FALSE); */ u8 ACPI_INIT_GLOBAL(acpi_gbl_truncate_io_addresses, FALSE); +/* + * Disable runtime checking and repair of values returned by control methods. + * Use only if the repair is causing a problem on a particular machine. + */ +u8 ACPI_INIT_GLOBAL(acpi_gbl_disable_auto_repair, FALSE); + /* acpi_gbl_FADT is a local copy of the FADT, converted to a common format. */ struct acpi_table_fadt acpi_gbl_FADT; diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h index c7f743ca395b..5552125d8340 100644 --- a/drivers/acpi/acpica/aclocal.h +++ b/drivers/acpi/acpica/aclocal.h @@ -357,6 +357,7 @@ struct acpi_predefined_data { char *pathname; const union acpi_predefined_info *predefined; union acpi_operand_object *parent_package; + struct acpi_namespace_node *node; u32 flags; u8 node_flags; }; diff --git a/drivers/acpi/acpica/acpredef.h b/drivers/acpi/acpica/acpredef.h index 94e73c97cf85..c445cca490ea 100644 --- a/drivers/acpi/acpica/acpredef.h +++ b/drivers/acpi/acpica/acpredef.h @@ -468,6 +468,7 @@ static const union acpi_predefined_info predefined_names[] = {{"_SWS", 0, ACPI_RTYPE_INTEGER}}, {{"_TC1", 0, ACPI_RTYPE_INTEGER}}, {{"_TC2", 0, ACPI_RTYPE_INTEGER}}, + {{"_TDL", 0, ACPI_RTYPE_INTEGER}}, {{"_TIP", 1, ACPI_RTYPE_INTEGER}}, {{"_TIV", 1, ACPI_RTYPE_INTEGER}}, {{"_TMP", 0, ACPI_RTYPE_INTEGER}}, diff --git a/drivers/acpi/acpica/nspredef.c b/drivers/acpi/acpica/nspredef.c index 9fb03fa8ffde..c845c8089f39 100644 --- a/drivers/acpi/acpica/nspredef.c +++ b/drivers/acpi/acpica/nspredef.c @@ -193,14 +193,20 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node, } /* - * 1) We have a return value, but if one wasn't expected, just exit, this is - * not a problem. For example, if the "Implicit Return" feature is - * enabled, methods will always return a value. + * Return value validation and possible repair. * - * 2) If the return value can be of any type, then we cannot perform any - * validation, exit. + * 1) Don't perform return value validation/repair if this feature + * has been disabled via a global option. + * + * 2) We have a return value, but if one wasn't expected, just exit, + * this is not a problem. For example, if the "Implicit Return" + * feature is enabled, methods will always return a value. + * + * 3) If the return value can be of any type, then we cannot perform + * any validation, just exit. */ - if ((!predefined->info.expected_btypes) || + if (acpi_gbl_disable_auto_repair || + (!predefined->info.expected_btypes) || (predefined->info.expected_btypes == ACPI_RTYPE_ALL)) { goto cleanup; } @@ -212,6 +218,7 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node, goto cleanup; } data->predefined = predefined; + data->node = node; data->node_flags = node->flags; data->pathname = pathname; diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c index 973883babee1..024c4f263f87 100644 --- a/drivers/acpi/acpica/nsrepair2.c +++ b/drivers/acpi/acpica/nsrepair2.c @@ -503,6 +503,21 @@ acpi_ns_repair_TSS(struct acpi_predefined_data *data, { union acpi_operand_object *return_object = *return_object_ptr; acpi_status status; + struct acpi_namespace_node *node; + + /* + * We can only sort the _TSS return package if there is no _PSS in the + * same scope. This is because if _PSS is present, the ACPI specification + * dictates that the _TSS Power Dissipation field is to be ignored, and + * therefore some BIOSs leave garbage values in the _TSS Power field(s). + * In this case, it is best to just return the _TSS package as-is. + * (May, 2011) + */ + status = + acpi_ns_get_node(data->node, "^_PSS", ACPI_NS_NO_UPSEARCH, &node); + if (ACPI_SUCCESS(status)) { + return (AE_OK); + } status = acpi_ns_check_sorted_list(data, return_object, 5, 1, ACPI_SORT_DESCENDING, diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c index 48db0944ce4a..62365f6075dd 100644 --- a/drivers/acpi/acpica/tbinstal.c +++ b/drivers/acpi/acpica/tbinstal.c @@ -126,12 +126,29 @@ acpi_tb_add_table(struct acpi_table_desc *table_desc, u32 *table_index) } /* - * Originally, we checked the table signature for "SSDT" or "PSDT" here. - * Next, we added support for OEMx tables, signature "OEM". - * Valid tables were encountered with a null signature, so we've just - * given up on validating the signature, since it seems to be a waste - * of code. The original code was removed (05/2008). + * Validate the incoming table signature. + * + * 1) Originally, we checked the table signature for "SSDT" or "PSDT". + * 2) We added support for OEMx tables, signature "OEM". + * 3) Valid tables were encountered with a null signature, so we just + * gave up on validating the signature, (05/2008). + * 4) We encountered non-AML tables such as the MADT, which caused + * interpreter errors and kernel faults. So now, we once again allow + * only "SSDT", "OEMx", and now, also a null signature. (05/2011). */ + if ((table_desc->pointer->signature[0] != 0x00) && + (!ACPI_COMPARE_NAME(table_desc->pointer->signature, ACPI_SIG_SSDT)) + && (ACPI_STRNCMP(table_desc->pointer->signature, "OEM", 3))) { + ACPI_ERROR((AE_INFO, + "Table has invalid signature [%4.4s] (0x%8.8X), must be SSDT or OEMx", + acpi_ut_valid_acpi_name(*(u32 *)table_desc-> + pointer-> + signature) ? table_desc-> + pointer->signature : "????", + *(u32 *)table_desc->pointer->signature)); + + return_ACPI_STATUS(AE_BAD_SIGNATURE); + } (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES); diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 2c661353e8f2..87c0a8daa99a 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -55,6 +55,9 @@ #define ACPI_BATTERY_NOTIFY_INFO 0x81 #define ACPI_BATTERY_NOTIFY_THRESHOLD 0x82 +/* Battery power unit: 0 means mW, 1 means mA */ +#define ACPI_BATTERY_POWER_UNIT_MA 1 + #define _COMPONENT ACPI_BATTERY_COMPONENT ACPI_MODULE_NAME("battery"); @@ -91,11 +94,6 @@ MODULE_DEVICE_TABLE(acpi, battery_device_ids); enum { ACPI_BATTERY_ALARM_PRESENT, ACPI_BATTERY_XINFO_PRESENT, - /* For buggy DSDTs that report negative 16-bit values for either - * charging or discharging current and/or report 0 as 65536 - * due to bad math. - */ - ACPI_BATTERY_QUIRK_SIGNED16_CURRENT, ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, }; @@ -301,7 +299,8 @@ static enum power_supply_property energy_battery_props[] = { #ifdef CONFIG_ACPI_PROCFS_POWER inline char *acpi_battery_units(struct acpi_battery *battery) { - return (battery->power_unit)?"mA":"mW"; + return (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) ? + "mA" : "mW"; } #endif @@ -461,9 +460,17 @@ static int acpi_battery_get_state(struct acpi_battery *battery) battery->update_time = jiffies; kfree(buffer.pointer); - if (test_bit(ACPI_BATTERY_QUIRK_SIGNED16_CURRENT, &battery->flags) && - battery->rate_now != -1) + /* For buggy DSDTs that report negative 16-bit values for either + * charging or discharging current and/or report 0 as 65536 + * due to bad math. + */ + if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA && + battery->rate_now != ACPI_BATTERY_VALUE_UNKNOWN && + (s16)(battery->rate_now) < 0) { battery->rate_now = abs((s16)battery->rate_now); + printk_once(KERN_WARNING FW_BUG "battery: (dis)charge rate" + " invalid.\n"); + } if (test_bit(ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, &battery->flags) && battery->capacity_now >= 0 && battery->capacity_now <= 100) @@ -544,7 +551,7 @@ static int sysfs_add_battery(struct acpi_battery *battery) { int result; - if (battery->power_unit) { + if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) { battery->bat.properties = charge_battery_props; battery->bat.num_properties = ARRAY_SIZE(charge_battery_props); @@ -566,18 +573,16 @@ static int sysfs_add_battery(struct acpi_battery *battery) static void sysfs_remove_battery(struct acpi_battery *battery) { - if (!battery->bat.dev) + mutex_lock(&battery->lock); + if (!battery->bat.dev) { + mutex_unlock(&battery->lock); return; + } + device_remove_file(battery->bat.dev, &alarm_attr); power_supply_unregister(&battery->bat); battery->bat.dev = NULL; -} - -static void acpi_battery_quirks(struct acpi_battery *battery) -{ - if (dmi_name_in_vendors("Acer") && battery->power_unit) { - set_bit(ACPI_BATTERY_QUIRK_SIGNED16_CURRENT, &battery->flags); - } + mutex_unlock(&battery->lock); } /* @@ -592,7 +597,7 @@ static void acpi_battery_quirks(struct acpi_battery *battery) * * Handle this correctly so that they won't break userspace. */ -static void acpi_battery_quirks2(struct acpi_battery *battery) +static void acpi_battery_quirks(struct acpi_battery *battery) { if (test_bit(ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, &battery->flags)) return ; @@ -623,13 +628,15 @@ static int acpi_battery_update(struct acpi_battery *battery) result = acpi_battery_get_info(battery); if (result) return result; - acpi_battery_quirks(battery); acpi_battery_init_alarm(battery); } - if (!battery->bat.dev) - sysfs_add_battery(battery); + if (!battery->bat.dev) { + result = sysfs_add_battery(battery); + if (result) + return result; + } result = acpi_battery_get_state(battery); - acpi_battery_quirks2(battery); + acpi_battery_quirks(battery); return result; } @@ -863,7 +870,7 @@ DECLARE_FILE_FUNCTIONS(alarm); }, \ } -static struct battery_file { +static const struct battery_file { struct file_operations ops; mode_t mode; const char *name; @@ -948,9 +955,12 @@ static int battery_notify(struct notifier_block *nb, struct acpi_battery *battery = container_of(nb, struct acpi_battery, pm_nb); switch (mode) { + case PM_POST_HIBERNATION: case PM_POST_SUSPEND: - sysfs_remove_battery(battery); - sysfs_add_battery(battery); + if (battery->bat.dev) { + sysfs_remove_battery(battery); + sysfs_add_battery(battery); + } break; } @@ -975,25 +985,33 @@ static int acpi_battery_add(struct acpi_device *device) if (ACPI_SUCCESS(acpi_get_handle(battery->device->handle, "_BIX", &handle))) set_bit(ACPI_BATTERY_XINFO_PRESENT, &battery->flags); - acpi_battery_update(battery); + result = acpi_battery_update(battery); + if (result) + goto fail; #ifdef CONFIG_ACPI_PROCFS_POWER result = acpi_battery_add_fs(device); #endif - if (!result) { - printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n", - ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device), - device->status.battery_present ? "present" : "absent"); - } else { + if (result) { #ifdef CONFIG_ACPI_PROCFS_POWER acpi_battery_remove_fs(device); #endif - kfree(battery); + goto fail; } + printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n", + ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device), + device->status.battery_present ? "present" : "absent"); + battery->pm_nb.notifier_call = battery_notify; register_pm_notifier(&battery->pm_nb); return result; + +fail: + sysfs_remove_battery(battery); + mutex_destroy(&battery->lock); + kfree(battery); + return result; } static int acpi_battery_remove(struct acpi_device *device, int type) diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c index 1864ad3cf895..19a61136d848 100644 --- a/drivers/acpi/dock.c +++ b/drivers/acpi/dock.c @@ -77,7 +77,7 @@ struct dock_dependent_device { struct list_head list; struct list_head hotplug_list; acpi_handle handle; - struct acpi_dock_ops *ops; + const struct acpi_dock_ops *ops; void *context; }; @@ -589,7 +589,7 @@ EXPORT_SYMBOL_GPL(unregister_dock_notifier); * the dock driver after _DCK is executed. */ int -register_hotplug_dock_device(acpi_handle handle, struct acpi_dock_ops *ops, +register_hotplug_dock_device(acpi_handle handle, const struct acpi_dock_ops *ops, void *context) { struct dock_dependent_device *dd; diff --git a/drivers/acpi/ec_sys.c b/drivers/acpi/ec_sys.c index 05b44201a614..22f918bacd35 100644 --- a/drivers/acpi/ec_sys.c +++ b/drivers/acpi/ec_sys.c @@ -92,7 +92,7 @@ static ssize_t acpi_ec_write_io(struct file *f, const char __user *buf, return count; } -static struct file_operations acpi_ec_io_ops = { +static const struct file_operations acpi_ec_io_ops = { .owner = THIS_MODULE, .open = acpi_ec_open_io, .read = acpi_ec_read_io, diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c index 467479f07c1f..0f0356ca1a9e 100644 --- a/drivers/acpi/fan.c +++ b/drivers/acpi/fan.c @@ -110,7 +110,7 @@ fan_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state) return result; } -static struct thermal_cooling_device_ops fan_cooling_ops = { +static const struct thermal_cooling_device_ops fan_cooling_ops = { .get_max_state = fan_get_max_state, .get_cur_state = fan_get_cur_state, .set_cur_state = fan_set_cur_state, diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 372f9b70f7f4..fa32f584229f 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -155,7 +155,7 @@ static u32 acpi_osi_handler(acpi_string interface, u32 supported) { if (!strcmp("Linux", interface)) { - printk(KERN_NOTICE FW_BUG PREFIX + printk_once(KERN_NOTICE FW_BUG PREFIX "BIOS _OSI(Linux) query %s%s\n", osi_linux.enable ? "honored" : "ignored", osi_linux.cmdline ? " via cmdline" : @@ -237,8 +237,23 @@ void acpi_os_vprintf(const char *fmt, va_list args) #endif } +#ifdef CONFIG_KEXEC +static unsigned long acpi_rsdp; +static int __init setup_acpi_rsdp(char *arg) +{ + acpi_rsdp = simple_strtoul(arg, NULL, 16); + return 0; +} +early_param("acpi_rsdp", setup_acpi_rsdp); +#endif + acpi_physical_address __init acpi_os_get_root_pointer(void) { +#ifdef CONFIG_KEXEC + if (acpi_rsdp) + return acpi_rsdp; +#endif + if (efi_enabled) { if (efi.acpi20 != EFI_INVALID_TABLE_ADDR) return efi.acpi20; @@ -1083,7 +1098,13 @@ struct osi_setup_entry { bool enable; }; -static struct osi_setup_entry __initdata osi_setup_entries[OSI_STRING_ENTRIES_MAX]; +static struct osi_setup_entry __initdata + osi_setup_entries[OSI_STRING_ENTRIES_MAX] = { + {"Module Device", true}, + {"Processor Device", true}, + {"3.0 _SCP Extensions", true}, + {"Processor Aggregator Device", true}, +}; void __init acpi_osi_setup(char *str) { diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index f907cfbfa13c..7f9eba9a0b02 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -303,6 +303,61 @@ void acpi_pci_irq_del_prt(struct pci_bus *bus) /* -------------------------------------------------------------------------- PCI Interrupt Routing Support -------------------------------------------------------------------------- */ +#ifdef CONFIG_X86_IO_APIC +extern int noioapicquirk; +extern int noioapicreroute; + +static int bridge_has_boot_interrupt_variant(struct pci_bus *bus) +{ + struct pci_bus *bus_it; + + for (bus_it = bus ; bus_it ; bus_it = bus_it->parent) { + if (!bus_it->self) + return 0; + if (bus_it->self->irq_reroute_variant) + return bus_it->self->irq_reroute_variant; + } + return 0; +} + +/* + * Some chipsets (e.g. Intel 6700PXH) generate a legacy INTx when the IRQ + * entry in the chipset's IO-APIC is masked (as, e.g. the RT kernel does + * during interrupt handling). When this INTx generation cannot be disabled, + * we reroute these interrupts to their legacy equivalent to get rid of + * spurious interrupts. + */ +static int acpi_reroute_boot_interrupt(struct pci_dev *dev, + struct acpi_prt_entry *entry) +{ + if (noioapicquirk || noioapicreroute) { + return 0; + } else { + switch (bridge_has_boot_interrupt_variant(dev->bus)) { + case 0: + /* no rerouting necessary */ + return 0; + case INTEL_IRQ_REROUTE_VARIANT: + /* + * Remap according to INTx routing table in 6700PXH + * specs, intel order number 302628-002, section + * 2.15.2. Other chipsets (80332, ...) have the same + * mapping and are handled here as well. + */ + dev_info(&dev->dev, "PCI IRQ %d -> rerouted to legacy " + "IRQ %d\n", entry->index, + (entry->index % 4) + 16); + entry->index = (entry->index % 4) + 16; + return 1; + default: + dev_warn(&dev->dev, "Cannot reroute IRQ %d to legacy " + "IRQ: unknown mapping\n", entry->index); + return -1; + } + } +} +#endif /* CONFIG_X86_IO_APIC */ + static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin) { struct acpi_prt_entry *entry; @@ -311,6 +366,9 @@ static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin) entry = acpi_pci_irq_find_prt_entry(dev, pin); if (entry) { +#ifdef CONFIG_X86_IO_APIC + acpi_reroute_boot_interrupt(dev, entry); +#endif /* CONFIG_X86_IO_APIC */ ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %s[%c] _PRT entry\n", pci_name(dev), pin_name(pin))); return entry; diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index d06078d660ad..2672c798272f 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -485,7 +485,8 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) root->secondary.end = 0xFF; printk(KERN_WARNING FW_BUG PREFIX "no secondary bus range in _CRS\n"); - status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN, NULL, &bus); + status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN, + NULL, &bus); if (ACPI_SUCCESS(status)) root->secondary.start = bus; else if (status == AE_NOT_FOUND) diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c index 79cb65332894..870550d6a4bf 100644 --- a/drivers/acpi/processor_thermal.c +++ b/drivers/acpi/processor_thermal.c @@ -244,7 +244,7 @@ processor_set_cur_state(struct thermal_cooling_device *cdev, return result; } -struct thermal_cooling_device_ops processor_cooling_ops = { +const struct thermal_cooling_device_ops processor_cooling_ops = { .get_max_state = processor_get_max_state, .get_cur_state = processor_get_cur_state, .set_cur_state = processor_set_cur_state, diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c index 50658ff887d9..6e36d0c0057c 100644 --- a/drivers/acpi/sbs.c +++ b/drivers/acpi/sbs.c @@ -130,6 +130,9 @@ struct acpi_sbs { #define to_acpi_sbs(x) container_of(x, struct acpi_sbs, charger) +static int acpi_sbs_remove(struct acpi_device *device, int type); +static int acpi_battery_get_state(struct acpi_battery *battery); + static inline int battery_scale(int log) { int scale = 1; @@ -195,6 +198,8 @@ static int acpi_sbs_battery_get_property(struct power_supply *psy, if ((!battery->present) && psp != POWER_SUPPLY_PROP_PRESENT) return -ENODEV; + + acpi_battery_get_state(battery); switch (psp) { case POWER_SUPPLY_PROP_STATUS: if (battery->rate_now < 0) @@ -225,11 +230,17 @@ static int acpi_sbs_battery_get_property(struct power_supply *psy, case POWER_SUPPLY_PROP_POWER_NOW: val->intval = abs(battery->rate_now) * acpi_battery_ipscale(battery) * 1000; + val->intval *= (acpi_battery_mode(battery)) ? + (battery->voltage_now * + acpi_battery_vscale(battery) / 1000) : 1; break; case POWER_SUPPLY_PROP_CURRENT_AVG: case POWER_SUPPLY_PROP_POWER_AVG: val->intval = abs(battery->rate_avg) * acpi_battery_ipscale(battery) * 1000; + val->intval *= (acpi_battery_mode(battery)) ? + (battery->voltage_now * + acpi_battery_vscale(battery) / 1000) : 1; break; case POWER_SUPPLY_PROP_CAPACITY: val->intval = battery->state_of_charge; @@ -903,8 +914,6 @@ static void acpi_sbs_callback(void *context) } } -static int acpi_sbs_remove(struct acpi_device *device, int type); - static int acpi_sbs_add(struct acpi_device *device) { struct acpi_sbs *sbs; diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 6c949602cbd1..3ed80b2ca907 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -428,6 +428,22 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "1000 Series"), }, }, + { + .callback = init_old_suspend_ordering, + .ident = "Asus A8N-SLI DELUXE", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "A8N-SLI DELUXE"), + }, + }, + { + .callback = init_old_suspend_ordering, + .ident = "Asus A8N-SLI Premium", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "A8N-SLI Premium"), + }, + }, {}, }; #endif /* CONFIG_SUSPEND */ diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index 77255f250dbb..c538d0ef10ff 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -149,12 +149,12 @@ static int param_get_debug_level(char *buffer, const struct kernel_param *kp) return result; } -static struct kernel_param_ops param_ops_debug_layer = { +static const struct kernel_param_ops param_ops_debug_layer = { .set = param_set_uint, .get = param_get_debug_layer, }; -static struct kernel_param_ops param_ops_debug_level = { +static const struct kernel_param_ops param_ops_debug_level = { .set = param_set_uint, .get = param_get_debug_level, }; diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 2607e17b520f..48fbc647b178 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -812,7 +812,7 @@ acpi_thermal_unbind_cooling_device(struct thermal_zone_device *thermal, thermal_zone_unbind_cooling_device); } -static struct thermal_zone_device_ops acpi_thermal_zone_ops = { +static const struct thermal_zone_device_ops acpi_thermal_zone_ops = { .bind = acpi_thermal_bind_cooling_device, .unbind = acpi_thermal_unbind_cooling_device, .get_temp = thermal_get_temp, diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index ada4b4d9bdc8..08a44b532f7c 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -307,7 +307,7 @@ video_set_cur_state(struct thermal_cooling_device *cooling_dev, unsigned long st return acpi_video_device_lcd_set_level(video, level); } -static struct thermal_cooling_device_ops video_cooling_ops = { +static const struct thermal_cooling_device_ops video_cooling_ops = { .get_max_state = video_get_max_state, .get_cur_state = video_get_cur_state, .set_cur_state = video_set_cur_state, diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c index e0a5b555cee1..bb7c5f1085cc 100644 --- a/drivers/ata/libata-acpi.c +++ b/drivers/ata/libata-acpi.c @@ -218,12 +218,12 @@ static void ata_acpi_dev_uevent(acpi_handle handle, u32 event, void *data) ata_acpi_uevent(dev->link->ap, dev, event); } -static struct acpi_dock_ops ata_acpi_dev_dock_ops = { +static const struct acpi_dock_ops ata_acpi_dev_dock_ops = { .handler = ata_acpi_dev_notify_dock, .uevent = ata_acpi_dev_uevent, }; -static struct acpi_dock_ops ata_acpi_ap_dock_ops = { +static const struct acpi_dock_ops ata_acpi_ap_dock_ops = { .handler = ata_acpi_ap_notify_dock, .uevent = ata_acpi_ap_uevent, }; diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 49502bc5360a..423fd56bf612 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -616,5 +616,16 @@ config MSM_SMD_PKT Enables userspace clients to read and write to some packet SMD ports via device interface for MSM chipset. +config TILE_SROM + bool "Character-device access via hypervisor to the Tilera SPI ROM" + depends on TILE + default y + ---help--- + This device provides character-level read-write access + to the SROM, typically via the "0", "1", and "2" devices + in /dev/srom/. The Tilera hypervisor makes the flash + device appear much like a simple EEPROM, and knows + how to partition a single ROM for multiple purposes. + endmenu diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 7a00672bd85d..32762ba769c2 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -63,3 +63,5 @@ obj-$(CONFIG_RAMOOPS) += ramoops.o obj-$(CONFIG_JS_RTC) += js-rtc.o js-rtc-y = rtc.o + +obj-$(CONFIG_TILE_SROM) += tile-srom.o diff --git a/drivers/char/ramoops.c b/drivers/char/ramoops.c index fca0c51bbc90..810aff9e750f 100644 --- a/drivers/char/ramoops.c +++ b/drivers/char/ramoops.c @@ -147,6 +147,14 @@ static int __init ramoops_probe(struct platform_device *pdev) cxt->phys_addr = pdata->mem_address; cxt->record_size = pdata->record_size; cxt->dump_oops = pdata->dump_oops; + /* + * Update the module parameter variables as well so they are visible + * through /sys/module/ramoops/parameters/ + */ + mem_size = pdata->mem_size; + mem_address = pdata->mem_address; + record_size = pdata->record_size; + dump_oops = pdata->dump_oops; if (!request_mem_region(cxt->phys_addr, cxt->size, "ramoops")) { pr_err("request mem region failed\n"); diff --git a/drivers/char/tile-srom.c b/drivers/char/tile-srom.c new file mode 100644 index 000000000000..cf3ee008dca2 --- /dev/null +++ b/drivers/char/tile-srom.c @@ -0,0 +1,481 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * SPI Flash ROM driver + * + * This source code is derived from code provided in "Linux Device + * Drivers, Third Edition", by Jonathan Corbet, Alessandro Rubini, and + * Greg Kroah-Hartman, published by O'Reilly Media, Inc. + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/init.h> +#include <linux/kernel.h> /* printk() */ +#include <linux/slab.h> /* kmalloc() */ +#include <linux/fs.h> /* everything... */ +#include <linux/errno.h> /* error codes */ +#include <linux/types.h> /* size_t */ +#include <linux/proc_fs.h> +#include <linux/fcntl.h> /* O_ACCMODE */ +#include <linux/aio.h> +#include <linux/pagemap.h> +#include <linux/hugetlb.h> +#include <linux/uaccess.h> +#include <linux/platform_device.h> +#include <hv/hypervisor.h> +#include <linux/ioctl.h> +#include <linux/cdev.h> +#include <linux/delay.h> +#include <hv/drv_srom_intf.h> + +/* + * Size of our hypervisor I/O requests. We break up large transfers + * so that we don't spend large uninterrupted spans of time in the + * hypervisor. Erasing an SROM sector takes a significant fraction of + * a second, so if we allowed the user to, say, do one I/O to write the + * entire ROM, we'd get soft lockup timeouts, or worse. + */ +#define SROM_CHUNK_SIZE ((size_t)4096) + +/* + * When hypervisor is busy (e.g. erasing), poll the status periodically. + */ + +/* + * Interval to poll the state in msec + */ +#define SROM_WAIT_TRY_INTERVAL 20 + +/* + * Maximum times to poll the state + */ +#define SROM_MAX_WAIT_TRY_TIMES 1000 + +struct srom_dev { + int hv_devhdl; /* Handle for hypervisor device */ + u32 total_size; /* Size of this device */ + u32 sector_size; /* Size of a sector */ + u32 page_size; /* Size of a page */ + struct mutex lock; /* Allow only one accessor at a time */ +}; + +static int srom_major; /* Dynamic major by default */ +module_param(srom_major, int, 0); +MODULE_AUTHOR("Tilera Corporation"); +MODULE_LICENSE("GPL"); + +static int srom_devs; /* Number of SROM partitions */ +static struct cdev srom_cdev; +static struct class *srom_class; +static struct srom_dev *srom_devices; + +/* + * Handle calling the hypervisor and managing EAGAIN/EBUSY. + */ + +static ssize_t _srom_read(int hv_devhdl, void *buf, + loff_t off, size_t count) +{ + int retval, retries = SROM_MAX_WAIT_TRY_TIMES; + for (;;) { + retval = hv_dev_pread(hv_devhdl, 0, (HV_VirtAddr)buf, + count, off); + if (retval >= 0) + return retval; + if (retval == HV_EAGAIN) + continue; + if (retval == HV_EBUSY && --retries > 0) { + msleep(SROM_WAIT_TRY_INTERVAL); + continue; + } + pr_err("_srom_read: error %d\n", retval); + return -EIO; + } +} + +static ssize_t _srom_write(int hv_devhdl, const void *buf, + loff_t off, size_t count) +{ + int retval, retries = SROM_MAX_WAIT_TRY_TIMES; + for (;;) { + retval = hv_dev_pwrite(hv_devhdl, 0, (HV_VirtAddr)buf, + count, off); + if (retval >= 0) + return retval; + if (retval == HV_EAGAIN) + continue; + if (retval == HV_EBUSY && --retries > 0) { + msleep(SROM_WAIT_TRY_INTERVAL); + continue; + } + pr_err("_srom_write: error %d\n", retval); + return -EIO; + } +} + +/** + * srom_open() - Device open routine. + * @inode: Inode for this device. + * @filp: File for this specific open of the device. + * + * Returns zero, or an error code. + */ +static int srom_open(struct inode *inode, struct file *filp) +{ + filp->private_data = &srom_devices[iminor(inode)]; + return 0; +} + + +/** + * srom_release() - Device release routine. + * @inode: Inode for this device. + * @filp: File for this specific open of the device. + * + * Returns zero, or an error code. + */ +static int srom_release(struct inode *inode, struct file *filp) +{ + struct srom_dev *srom = filp->private_data; + char dummy; + + /* Make sure we've flushed anything written to the ROM. */ + mutex_lock(&srom->lock); + if (srom->hv_devhdl >= 0) + _srom_write(srom->hv_devhdl, &dummy, SROM_FLUSH_OFF, 1); + mutex_unlock(&srom->lock); + + filp->private_data = NULL; + + return 0; +} + + +/** + * srom_read() - Read data from the device. + * @filp: File for this specific open of the device. + * @buf: User's data buffer. + * @count: Number of bytes requested. + * @f_pos: File position. + * + * Returns number of bytes read, or an error code. + */ +static ssize_t srom_read(struct file *filp, char __user *buf, + size_t count, loff_t *f_pos) +{ + int retval = 0; + void *kernbuf; + struct srom_dev *srom = filp->private_data; + + kernbuf = kmalloc(SROM_CHUNK_SIZE, GFP_KERNEL); + if (!kernbuf) + return -ENOMEM; + + if (mutex_lock_interruptible(&srom->lock)) { + retval = -ERESTARTSYS; + kfree(kernbuf); + return retval; + } + + while (count) { + int hv_retval; + int bytes_this_pass = min(count, SROM_CHUNK_SIZE); + + hv_retval = _srom_read(srom->hv_devhdl, kernbuf, + *f_pos, bytes_this_pass); + if (hv_retval > 0) { + if (copy_to_user(buf, kernbuf, hv_retval) != 0) { + retval = -EFAULT; + break; + } + } else if (hv_retval <= 0) { + if (retval == 0) + retval = hv_retval; + break; + } + + retval += hv_retval; + *f_pos += hv_retval; + buf += hv_retval; + count -= hv_retval; + } + + mutex_unlock(&srom->lock); + kfree(kernbuf); + + return retval; +} + +/** + * srom_write() - Write data to the device. + * @filp: File for this specific open of the device. + * @buf: User's data buffer. + * @count: Number of bytes requested. + * @f_pos: File position. + * + * Returns number of bytes written, or an error code. + */ +static ssize_t srom_write(struct file *filp, const char __user *buf, + size_t count, loff_t *f_pos) +{ + int retval = 0; + void *kernbuf; + struct srom_dev *srom = filp->private_data; + + kernbuf = kmalloc(SROM_CHUNK_SIZE, GFP_KERNEL); + if (!kernbuf) + return -ENOMEM; + + if (mutex_lock_interruptible(&srom->lock)) { + retval = -ERESTARTSYS; + kfree(kernbuf); + return retval; + } + + while (count) { + int hv_retval; + int bytes_this_pass = min(count, SROM_CHUNK_SIZE); + + if (copy_from_user(kernbuf, buf, bytes_this_pass) != 0) { + retval = -EFAULT; + break; + } + + hv_retval = _srom_write(srom->hv_devhdl, kernbuf, + *f_pos, bytes_this_pass); + if (hv_retval <= 0) { + if (retval == 0) + retval = hv_retval; + break; + } + + retval += hv_retval; + *f_pos += hv_retval; + buf += hv_retval; + count -= hv_retval; + } + + mutex_unlock(&srom->lock); + kfree(kernbuf); + + return retval; +} + +/* Provide our own implementation so we can use srom->total_size. */ +loff_t srom_llseek(struct file *filp, loff_t offset, int origin) +{ + struct srom_dev *srom = filp->private_data; + + if (mutex_lock_interruptible(&srom->lock)) + return -ERESTARTSYS; + + switch (origin) { + case SEEK_END: + offset += srom->total_size; + break; + case SEEK_CUR: + offset += filp->f_pos; + break; + } + + if (offset < 0 || offset > srom->total_size) { + offset = -EINVAL; + } else { + filp->f_pos = offset; + filp->f_version = 0; + } + + mutex_unlock(&srom->lock); + + return offset; +} + +static ssize_t total_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srom_dev *srom = dev_get_drvdata(dev); + return sprintf(buf, "%u\n", srom->total_size); +} + +static ssize_t sector_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srom_dev *srom = dev_get_drvdata(dev); + return sprintf(buf, "%u\n", srom->sector_size); +} + +static ssize_t page_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srom_dev *srom = dev_get_drvdata(dev); + return sprintf(buf, "%u\n", srom->page_size); +} + +static struct device_attribute srom_dev_attrs[] = { + __ATTR(total_size, S_IRUGO, total_show, NULL), + __ATTR(sector_size, S_IRUGO, sector_show, NULL), + __ATTR(page_size, S_IRUGO, page_show, NULL), + __ATTR_NULL +}; + +static char *srom_devnode(struct device *dev, mode_t *mode) +{ + *mode = S_IRUGO | S_IWUSR; + return kasprintf(GFP_KERNEL, "srom/%s", dev_name(dev)); +} + +/* + * The fops + */ +static const struct file_operations srom_fops = { + .owner = THIS_MODULE, + .llseek = srom_llseek, + .read = srom_read, + .write = srom_write, + .open = srom_open, + .release = srom_release, +}; + +/** + * srom_setup_minor() - Initialize per-minor information. + * @srom: Per-device SROM state. + * @index: Device to set up. + */ +static int srom_setup_minor(struct srom_dev *srom, int index) +{ + struct device *dev; + int devhdl = srom->hv_devhdl; + + mutex_init(&srom->lock); + + if (_srom_read(devhdl, &srom->total_size, + SROM_TOTAL_SIZE_OFF, sizeof(srom->total_size)) < 0) + return -EIO; + if (_srom_read(devhdl, &srom->sector_size, + SROM_SECTOR_SIZE_OFF, sizeof(srom->sector_size)) < 0) + return -EIO; + if (_srom_read(devhdl, &srom->page_size, + SROM_PAGE_SIZE_OFF, sizeof(srom->page_size)) < 0) + return -EIO; + + dev = device_create(srom_class, &platform_bus, + MKDEV(srom_major, index), srom, "%d", index); + return IS_ERR(dev) ? PTR_ERR(dev) : 0; +} + +/** srom_init() - Initialize the driver's module. */ +static int srom_init(void) +{ + int result, i; + dev_t dev = MKDEV(srom_major, 0); + + /* + * Start with a plausible number of partitions; the krealloc() call + * below will yield about log(srom_devs) additional allocations. + */ + srom_devices = kzalloc(4 * sizeof(struct srom_dev), GFP_KERNEL); + + /* Discover the number of srom partitions. */ + for (i = 0; ; i++) { + int devhdl; + char buf[20]; + struct srom_dev *new_srom_devices = + krealloc(srom_devices, (i+1) * sizeof(struct srom_dev), + GFP_KERNEL | __GFP_ZERO); + if (!new_srom_devices) { + result = -ENOMEM; + goto fail_mem; + } + srom_devices = new_srom_devices; + sprintf(buf, "srom/0/%d", i); + devhdl = hv_dev_open((HV_VirtAddr)buf, 0); + if (devhdl < 0) { + if (devhdl != HV_ENODEV) + pr_notice("srom/%d: hv_dev_open failed: %d.\n", + i, devhdl); + break; + } + srom_devices[i].hv_devhdl = devhdl; + } + srom_devs = i; + + /* Bail out early if we have no partitions at all. */ + if (srom_devs == 0) { + result = -ENODEV; + goto fail_mem; + } + + /* Register our major, and accept a dynamic number. */ + if (srom_major) + result = register_chrdev_region(dev, srom_devs, "srom"); + else { + result = alloc_chrdev_region(&dev, 0, srom_devs, "srom"); + srom_major = MAJOR(dev); + } + if (result < 0) + goto fail_mem; + + /* Register a character device. */ + cdev_init(&srom_cdev, &srom_fops); + srom_cdev.owner = THIS_MODULE; + srom_cdev.ops = &srom_fops; + result = cdev_add(&srom_cdev, dev, srom_devs); + if (result < 0) + goto fail_chrdev; + + /* Create a sysfs class. */ + srom_class = class_create(THIS_MODULE, "srom"); + if (IS_ERR(srom_class)) { + result = PTR_ERR(srom_class); + goto fail_cdev; + } + srom_class->dev_attrs = srom_dev_attrs; + srom_class->devnode = srom_devnode; + + /* Do per-partition initialization */ + for (i = 0; i < srom_devs; i++) { + result = srom_setup_minor(srom_devices + i, i); + if (result < 0) + goto fail_class; + } + + return 0; + +fail_class: + for (i = 0; i < srom_devs; i++) + device_destroy(srom_class, MKDEV(srom_major, i)); + class_destroy(srom_class); +fail_cdev: + cdev_del(&srom_cdev); +fail_chrdev: + unregister_chrdev_region(dev, srom_devs); +fail_mem: + kfree(srom_devices); + return result; +} + +/** srom_cleanup() - Clean up the driver's module. */ +static void srom_cleanup(void) +{ + int i; + for (i = 0; i < srom_devs; i++) + device_destroy(srom_class, MKDEV(srom_major, i)); + class_destroy(srom_class); + cdev_del(&srom_cdev); + unregister_chrdev_region(MKDEV(srom_major, 0), srom_devs); + kfree(srom_devices); +} + +module_init(srom_init); +module_exit(srom_cleanup); diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 7fc2f108f490..3f4051a7c5a7 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -80,7 +80,7 @@ enum tis_defaults { static LIST_HEAD(tis_chips); static DEFINE_SPINLOCK(tis_lock); -#ifdef CONFIG_PNP +#if defined(CONFIG_PNP) && defined(CONFIG_ACPI) static int is_itpm(struct pnp_dev *dev) { struct acpi_device *acpi = pnp_acpi_device(dev); @@ -93,6 +93,11 @@ static int is_itpm(struct pnp_dev *dev) return 0; } +#else +static inline int is_itpm(struct pnp_dev *dev) +{ + return 0; +} #endif static int check_locality(struct tpm_chip *chip, int l) diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index eacb05e6cfb3..eb80b549ed8d 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -157,7 +157,7 @@ utf16_strnlen(efi_char16_t *s, size_t maxlength) return length; } -static unsigned long +static inline unsigned long utf16_strlen(efi_char16_t *s) { return utf16_strnlen(s, ~0UL); @@ -580,8 +580,8 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type, return -1; } -static u64 efi_pstore_write(enum pstore_type_id type, int part, size_t size, - struct pstore_info *psi) +static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part, + size_t size, struct pstore_info *psi) { return 0; } diff --git a/drivers/input/serio/xilinx_ps2.c b/drivers/input/serio/xilinx_ps2.c index 80baa53da5b1..d64c5a43aaad 100644 --- a/drivers/input/serio/xilinx_ps2.c +++ b/drivers/input/serio/xilinx_ps2.c @@ -23,7 +23,7 @@ #include <linux/init.h> #include <linux/list.h> #include <linux/io.h> - +#include <linux/of_address.h> #include <linux/of_device.h> #include <linux/of_platform.h> diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 8420129fc5ee..f75a66e7d312 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -241,12 +241,13 @@ config DM_MIRROR needed for live data migration tools such as 'pvmove'. config DM_RAID - tristate "RAID 4/5/6 target (EXPERIMENTAL)" + tristate "RAID 1/4/5/6 target (EXPERIMENTAL)" depends on BLK_DEV_DM && EXPERIMENTAL + select MD_RAID1 select MD_RAID456 select BLK_DEV_MD ---help--- - A dm target that supports RAID4, RAID5 and RAID6 mappings + A dm target that supports RAID1, RAID4, RAID5 and RAID6 mappings A RAID-5 set of N drives with a capacity of C MB per drive provides the capacity of C * (N - 1) MB, and protects against a failure diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index bae6c4e23d3f..49da55c1528a 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -30,7 +30,6 @@ #include <linux/device-mapper.h> #define DM_MSG_PREFIX "crypt" -#define MESG_STR(x) x, sizeof(x) /* * context holding the current state of a multi-part conversion @@ -239,7 +238,7 @@ static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv, struct dm_crypt_request *dmreq) { memset(iv, 0, cc->iv_size); - *(u32 *)iv = cpu_to_le32(dmreq->iv_sector & 0xffffffff); + *(__le32 *)iv = cpu_to_le32(dmreq->iv_sector & 0xffffffff); return 0; } @@ -248,7 +247,7 @@ static int crypt_iv_plain64_gen(struct crypt_config *cc, u8 *iv, struct dm_crypt_request *dmreq) { memset(iv, 0, cc->iv_size); - *(u64 *)iv = cpu_to_le64(dmreq->iv_sector); + *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector); return 0; } @@ -415,7 +414,7 @@ static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, struct crypto_cipher *essiv_tfm = this_crypt_config(cc)->iv_private; memset(iv, 0, cc->iv_size); - *(u64 *)iv = cpu_to_le64(dmreq->iv_sector); + *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector); crypto_cipher_encrypt_one(essiv_tfm, iv, iv); return 0; @@ -1575,11 +1574,17 @@ bad_mem: static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct crypt_config *cc; - unsigned int key_size; + unsigned int key_size, opt_params; unsigned long long tmpll; int ret; + struct dm_arg_set as; + const char *opt_string; + + static struct dm_arg _args[] = { + {0, 1, "Invalid number of feature args"}, + }; - if (argc != 5) { + if (argc < 5) { ti->error = "Not enough arguments"; return -EINVAL; } @@ -1648,6 +1653,30 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } cc->start = tmpll; + argv += 5; + argc -= 5; + + /* Optional parameters */ + if (argc) { + as.argc = argc; + as.argv = argv; + + ret = dm_read_arg_group(_args, &as, &opt_params, &ti->error); + if (ret) + goto bad; + + opt_string = dm_shift_arg(&as); + + if (opt_params == 1 && opt_string && + !strcasecmp(opt_string, "allow_discards")) + ti->num_discard_requests = 1; + else if (opt_params) { + ret = -EINVAL; + ti->error = "Invalid feature arguments"; + goto bad; + } + } + ret = -ENOMEM; cc->io_queue = alloc_workqueue("kcryptd_io", WQ_NON_REENTRANT| @@ -1682,9 +1711,16 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, struct dm_crypt_io *io; struct crypt_config *cc; - if (bio->bi_rw & REQ_FLUSH) { + /* + * If bio is REQ_FLUSH or REQ_DISCARD, just bypass crypt queues. + * - for REQ_FLUSH device-mapper core ensures that no IO is in-flight + * - for REQ_DISCARD caller must use flush if IO ordering matters + */ + if (unlikely(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) { cc = ti->private; bio->bi_bdev = cc->dev->bdev; + if (bio_sectors(bio)) + bio->bi_sector = cc->start + dm_target_offset(ti, bio->bi_sector); return DM_MAPIO_REMAPPED; } @@ -1727,6 +1763,10 @@ static int crypt_status(struct dm_target *ti, status_type_t type, DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset, cc->dev->name, (unsigned long long)cc->start); + + if (ti->num_discard_requests) + DMEMIT(" 1 allow_discards"); + break; } return 0; @@ -1770,12 +1810,12 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv) if (argc < 2) goto error; - if (!strnicmp(argv[0], MESG_STR("key"))) { + if (!strcasecmp(argv[0], "key")) { if (!test_bit(DM_CRYPT_SUSPENDED, &cc->flags)) { DMWARN("not suspended during key manipulation."); return -EINVAL; } - if (argc == 3 && !strnicmp(argv[1], MESG_STR("set"))) { + if (argc == 3 && !strcasecmp(argv[1], "set")) { ret = crypt_set_key(cc, argv[2]); if (ret) return ret; @@ -1783,7 +1823,7 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv) ret = cc->iv_gen_ops->init(cc); return ret; } - if (argc == 2 && !strnicmp(argv[1], MESG_STR("wipe"))) { + if (argc == 2 && !strcasecmp(argv[1], "wipe")) { if (cc->iv_gen_ops && cc->iv_gen_ops->wipe) { ret = cc->iv_gen_ops->wipe(cc); if (ret) @@ -1823,7 +1863,7 @@ static int crypt_iterate_devices(struct dm_target *ti, static struct target_type crypt_target = { .name = "crypt", - .version = {1, 10, 0}, + .version = {1, 11, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index ea790623c30b..89f73ca22cfa 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2003 Sistina Software (UK) Limited. - * Copyright (C) 2004, 2010 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004, 2010-2011 Red Hat, Inc. All rights reserved. * * This file is released under the GPL. */ @@ -15,6 +15,9 @@ #define DM_MSG_PREFIX "flakey" +#define all_corrupt_bio_flags_match(bio, fc) \ + (((bio)->bi_rw & (fc)->corrupt_bio_flags) == (fc)->corrupt_bio_flags) + /* * Flakey: Used for testing only, simulates intermittent, * catastrophic device failure. @@ -25,60 +28,189 @@ struct flakey_c { sector_t start; unsigned up_interval; unsigned down_interval; + unsigned long flags; + unsigned corrupt_bio_byte; + unsigned corrupt_bio_rw; + unsigned corrupt_bio_value; + unsigned corrupt_bio_flags; +}; + +enum feature_flag_bits { + DROP_WRITES }; +static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, + struct dm_target *ti) +{ + int r; + unsigned argc; + const char *arg_name; + + static struct dm_arg _args[] = { + {0, 6, "Invalid number of feature args"}, + {1, UINT_MAX, "Invalid corrupt bio byte"}, + {0, 255, "Invalid corrupt value to write into bio byte (0-255)"}, + {0, UINT_MAX, "Invalid corrupt bio flags mask"}, + }; + + /* No feature arguments supplied. */ + if (!as->argc) + return 0; + + r = dm_read_arg_group(_args, as, &argc, &ti->error); + if (r) + return r; + + while (argc) { + arg_name = dm_shift_arg(as); + argc--; + + /* + * drop_writes + */ + if (!strcasecmp(arg_name, "drop_writes")) { + if (test_and_set_bit(DROP_WRITES, &fc->flags)) { + ti->error = "Feature drop_writes duplicated"; + return -EINVAL; + } + + continue; + } + + /* + * corrupt_bio_byte <Nth_byte> <direction> <value> <bio_flags> + */ + if (!strcasecmp(arg_name, "corrupt_bio_byte")) { + if (!argc) + ti->error = "Feature corrupt_bio_byte requires parameters"; + + r = dm_read_arg(_args + 1, as, &fc->corrupt_bio_byte, &ti->error); + if (r) + return r; + argc--; + + /* + * Direction r or w? + */ + arg_name = dm_shift_arg(as); + if (!strcasecmp(arg_name, "w")) + fc->corrupt_bio_rw = WRITE; + else if (!strcasecmp(arg_name, "r")) + fc->corrupt_bio_rw = READ; + else { + ti->error = "Invalid corrupt bio direction (r or w)"; + return -EINVAL; + } + argc--; + + /* + * Value of byte (0-255) to write in place of correct one. + */ + r = dm_read_arg(_args + 2, as, &fc->corrupt_bio_value, &ti->error); + if (r) + return r; + argc--; + + /* + * Only corrupt bios with these flags set. + */ + r = dm_read_arg(_args + 3, as, &fc->corrupt_bio_flags, &ti->error); + if (r) + return r; + argc--; + + continue; + } + + ti->error = "Unrecognised flakey feature requested"; + return -EINVAL; + } + + if (test_bit(DROP_WRITES, &fc->flags) && (fc->corrupt_bio_rw == WRITE)) { + ti->error = "drop_writes is incompatible with corrupt_bio_byte with the WRITE flag set"; + return -EINVAL; + } + + return 0; +} + /* - * Construct a flakey mapping: <dev_path> <offset> <up interval> <down interval> + * Construct a flakey mapping: + * <dev_path> <offset> <up interval> <down interval> [<#feature args> [<arg>]*] + * + * Feature args: + * [drop_writes] + * [corrupt_bio_byte <Nth_byte> <direction> <value> <bio_flags>] + * + * Nth_byte starts from 1 for the first byte. + * Direction is r for READ or w for WRITE. + * bio_flags is ignored if 0. */ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) { + static struct dm_arg _args[] = { + {0, UINT_MAX, "Invalid up interval"}, + {0, UINT_MAX, "Invalid down interval"}, + }; + + int r; struct flakey_c *fc; - unsigned long long tmp; + unsigned long long tmpll; + struct dm_arg_set as; + const char *devname; - if (argc != 4) { - ti->error = "dm-flakey: Invalid argument count"; + as.argc = argc; + as.argv = argv; + + if (argc < 4) { + ti->error = "Invalid argument count"; return -EINVAL; } - fc = kmalloc(sizeof(*fc), GFP_KERNEL); + fc = kzalloc(sizeof(*fc), GFP_KERNEL); if (!fc) { - ti->error = "dm-flakey: Cannot allocate linear context"; + ti->error = "Cannot allocate linear context"; return -ENOMEM; } fc->start_time = jiffies; - if (sscanf(argv[1], "%llu", &tmp) != 1) { - ti->error = "dm-flakey: Invalid device sector"; + devname = dm_shift_arg(&as); + + if (sscanf(dm_shift_arg(&as), "%llu", &tmpll) != 1) { + ti->error = "Invalid device sector"; goto bad; } - fc->start = tmp; + fc->start = tmpll; - if (sscanf(argv[2], "%u", &fc->up_interval) != 1) { - ti->error = "dm-flakey: Invalid up interval"; + r = dm_read_arg(_args, &as, &fc->up_interval, &ti->error); + if (r) goto bad; - } - if (sscanf(argv[3], "%u", &fc->down_interval) != 1) { - ti->error = "dm-flakey: Invalid down interval"; + r = dm_read_arg(_args, &as, &fc->down_interval, &ti->error); + if (r) goto bad; - } if (!(fc->up_interval + fc->down_interval)) { - ti->error = "dm-flakey: Total (up + down) interval is zero"; + ti->error = "Total (up + down) interval is zero"; goto bad; } if (fc->up_interval + fc->down_interval < fc->up_interval) { - ti->error = "dm-flakey: Interval overflow"; + ti->error = "Interval overflow"; goto bad; } - if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &fc->dev)) { - ti->error = "dm-flakey: Device lookup failed"; + r = parse_features(&as, fc, ti); + if (r) + goto bad; + + if (dm_get_device(ti, devname, dm_table_get_mode(ti->table), &fc->dev)) { + ti->error = "Device lookup failed"; goto bad; } ti->num_flush_requests = 1; + ti->num_discard_requests = 1; ti->private = fc; return 0; @@ -99,7 +231,7 @@ static sector_t flakey_map_sector(struct dm_target *ti, sector_t bi_sector) { struct flakey_c *fc = ti->private; - return fc->start + (bi_sector - ti->begin); + return fc->start + dm_target_offset(ti, bi_sector); } static void flakey_map_bio(struct dm_target *ti, struct bio *bio) @@ -111,6 +243,25 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio) bio->bi_sector = flakey_map_sector(ti, bio->bi_sector); } +static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc) +{ + unsigned bio_bytes = bio_cur_bytes(bio); + char *data = bio_data(bio); + + /* + * Overwrite the Nth byte of the data returned. + */ + if (data && bio_bytes >= fc->corrupt_bio_byte) { + data[fc->corrupt_bio_byte - 1] = fc->corrupt_bio_value; + + DMDEBUG("Corrupting data bio=%p by writing %u to byte %u " + "(rw=%c bi_rw=%lu bi_sector=%llu cur_bytes=%u)\n", + bio, fc->corrupt_bio_value, fc->corrupt_bio_byte, + (bio_data_dir(bio) == WRITE) ? 'w' : 'r', + bio->bi_rw, (unsigned long long)bio->bi_sector, bio_bytes); + } +} + static int flakey_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) { @@ -119,18 +270,71 @@ static int flakey_map(struct dm_target *ti, struct bio *bio, /* Are we alive ? */ elapsed = (jiffies - fc->start_time) / HZ; - if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) + if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) { + /* + * Flag this bio as submitted while down. + */ + map_context->ll = 1; + + /* + * Map reads as normal. + */ + if (bio_data_dir(bio) == READ) + goto map_bio; + + /* + * Drop writes? + */ + if (test_bit(DROP_WRITES, &fc->flags)) { + bio_endio(bio, 0); + return DM_MAPIO_SUBMITTED; + } + + /* + * Corrupt matching writes. + */ + if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == WRITE)) { + if (all_corrupt_bio_flags_match(bio, fc)) + corrupt_bio_data(bio, fc); + goto map_bio; + } + + /* + * By default, error all I/O. + */ return -EIO; + } +map_bio: flakey_map_bio(ti, bio); return DM_MAPIO_REMAPPED; } +static int flakey_end_io(struct dm_target *ti, struct bio *bio, + int error, union map_info *map_context) +{ + struct flakey_c *fc = ti->private; + unsigned bio_submitted_while_down = map_context->ll; + + /* + * Corrupt successful READs while in down state. + * If flags were specified, only corrupt those that match. + */ + if (!error && bio_submitted_while_down && + (bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) && + all_corrupt_bio_flags_match(bio, fc)) + corrupt_bio_data(bio, fc); + + return error; +} + static int flakey_status(struct dm_target *ti, status_type_t type, char *result, unsigned int maxlen) { + unsigned sz = 0; struct flakey_c *fc = ti->private; + unsigned drop_writes; switch (type) { case STATUSTYPE_INFO: @@ -138,9 +342,22 @@ static int flakey_status(struct dm_target *ti, status_type_t type, break; case STATUSTYPE_TABLE: - snprintf(result, maxlen, "%s %llu %u %u", fc->dev->name, - (unsigned long long)fc->start, fc->up_interval, - fc->down_interval); + DMEMIT("%s %llu %u %u ", fc->dev->name, + (unsigned long long)fc->start, fc->up_interval, + fc->down_interval); + + drop_writes = test_bit(DROP_WRITES, &fc->flags); + DMEMIT("%u ", drop_writes + (fc->corrupt_bio_byte > 0) * 5); + + if (drop_writes) + DMEMIT("drop_writes "); + + if (fc->corrupt_bio_byte) + DMEMIT("corrupt_bio_byte %u %c %u %u ", + fc->corrupt_bio_byte, + (fc->corrupt_bio_rw == WRITE) ? 'w' : 'r', + fc->corrupt_bio_value, fc->corrupt_bio_flags); + break; } return 0; @@ -177,11 +394,12 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_ static struct target_type flakey_target = { .name = "flakey", - .version = {1, 1, 0}, + .version = {1, 2, 0}, .module = THIS_MODULE, .ctr = flakey_ctr, .dtr = flakey_dtr, .map = flakey_map, + .end_io = flakey_end_io, .status = flakey_status, .ioctl = flakey_ioctl, .merge = flakey_merge, diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 2067288f61f9..ad2eba40e319 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -38,6 +38,8 @@ struct io { struct dm_io_client *client; io_notify_fn callback; void *context; + void *vma_invalidate_address; + unsigned long vma_invalidate_size; } __attribute__((aligned(DM_IO_MAX_REGIONS))); static struct kmem_cache *_dm_io_cache; @@ -116,6 +118,10 @@ static void dec_count(struct io *io, unsigned int region, int error) set_bit(region, &io->error_bits); if (atomic_dec_and_test(&io->count)) { + if (io->vma_invalidate_size) + invalidate_kernel_vmap_range(io->vma_invalidate_address, + io->vma_invalidate_size); + if (io->sleeper) wake_up_process(io->sleeper); @@ -159,6 +165,9 @@ struct dpages { unsigned context_u; void *context_ptr; + + void *vma_invalidate_address; + unsigned long vma_invalidate_size; }; /* @@ -377,6 +386,9 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, io->sleeper = current; io->client = client; + io->vma_invalidate_address = dp->vma_invalidate_address; + io->vma_invalidate_size = dp->vma_invalidate_size; + dispatch_io(rw, num_regions, where, dp, io, 1); while (1) { @@ -415,13 +427,21 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, io->callback = fn; io->context = context; + io->vma_invalidate_address = dp->vma_invalidate_address; + io->vma_invalidate_size = dp->vma_invalidate_size; + dispatch_io(rw, num_regions, where, dp, io, 0); return 0; } -static int dp_init(struct dm_io_request *io_req, struct dpages *dp) +static int dp_init(struct dm_io_request *io_req, struct dpages *dp, + unsigned long size) { /* Set up dpages based on memory type */ + + dp->vma_invalidate_address = NULL; + dp->vma_invalidate_size = 0; + switch (io_req->mem.type) { case DM_IO_PAGE_LIST: list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset); @@ -432,6 +452,11 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp) break; case DM_IO_VMA: + flush_kernel_vmap_range(io_req->mem.ptr.vma, size); + if ((io_req->bi_rw & RW_MASK) == READ) { + dp->vma_invalidate_address = io_req->mem.ptr.vma; + dp->vma_invalidate_size = size; + } vm_dp_init(dp, io_req->mem.ptr.vma); break; @@ -460,7 +485,7 @@ int dm_io(struct dm_io_request *io_req, unsigned num_regions, int r; struct dpages dp; - r = dp_init(io_req, &dp); + r = dp_init(io_req, &dp, (unsigned long)where->count << SECTOR_SHIFT); if (r) return r; diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 4cacdad2270a..2e9a3ca37bdd 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -128,6 +128,24 @@ static struct hash_cell *__get_uuid_cell(const char *str) return NULL; } +static struct hash_cell *__get_dev_cell(uint64_t dev) +{ + struct mapped_device *md; + struct hash_cell *hc; + + md = dm_get_md(huge_decode_dev(dev)); + if (!md) + return NULL; + + hc = dm_get_mdptr(md); + if (!hc) { + dm_put(md); + return NULL; + } + + return hc; +} + /*----------------------------------------------------------------- * Inserting, removing and renaming a device. *---------------------------------------------------------------*/ @@ -718,25 +736,45 @@ static int dev_create(struct dm_ioctl *param, size_t param_size) */ static struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param) { - struct mapped_device *md; - void *mdptr = NULL; + struct hash_cell *hc = NULL; - if (*param->uuid) - return __get_uuid_cell(param->uuid); + if (*param->uuid) { + if (*param->name || param->dev) + return NULL; - if (*param->name) - return __get_name_cell(param->name); + hc = __get_uuid_cell(param->uuid); + if (!hc) + return NULL; + } else if (*param->name) { + if (param->dev) + return NULL; - md = dm_get_md(huge_decode_dev(param->dev)); - if (!md) - goto out; + hc = __get_name_cell(param->name); + if (!hc) + return NULL; + } else if (param->dev) { + hc = __get_dev_cell(param->dev); + if (!hc) + return NULL; + } else + return NULL; - mdptr = dm_get_mdptr(md); - if (!mdptr) - dm_put(md); + /* + * Sneakily write in both the name and the uuid + * while we have the cell. + */ + strlcpy(param->name, hc->name, sizeof(param->name)); + if (hc->uuid) + strlcpy(param->uuid, hc->uuid, sizeof(param->uuid)); + else + param->uuid[0] = '\0'; -out: - return mdptr; + if (hc->new_map) + param->flags |= DM_INACTIVE_PRESENT_FLAG; + else + param->flags &= ~DM_INACTIVE_PRESENT_FLAG; + + return hc; } static struct mapped_device *find_device(struct dm_ioctl *param) @@ -746,24 +784,8 @@ static struct mapped_device *find_device(struct dm_ioctl *param) down_read(&_hash_lock); hc = __find_device_hash_cell(param); - if (hc) { + if (hc) md = hc->md; - - /* - * Sneakily write in both the name and the uuid - * while we have the cell. - */ - strlcpy(param->name, hc->name, sizeof(param->name)); - if (hc->uuid) - strlcpy(param->uuid, hc->uuid, sizeof(param->uuid)); - else - param->uuid[0] = '\0'; - - if (hc->new_map) - param->flags |= DM_INACTIVE_PRESENT_FLAG; - else - param->flags &= ~DM_INACTIVE_PRESENT_FLAG; - } up_read(&_hash_lock); return md; @@ -1402,6 +1424,11 @@ static int target_message(struct dm_ioctl *param, size_t param_size) goto out; } + if (!argc) { + DMWARN("Empty message received."); + goto out; + } + table = dm_get_live_table(md); if (!table) goto out_argv; diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 320401dec104..f82147029636 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -224,8 +224,6 @@ struct kcopyd_job { unsigned int num_dests; struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS]; - sector_t offset; - unsigned int nr_pages; struct page_list *pages; /* @@ -380,7 +378,7 @@ static int run_io_job(struct kcopyd_job *job) .bi_rw = job->rw, .mem.type = DM_IO_PAGE_LIST, .mem.ptr.pl = job->pages, - .mem.offset = job->offset, + .mem.offset = 0, .notify.fn = complete_io, .notify.context = job, .client = job->kc->io_client, @@ -397,10 +395,9 @@ static int run_io_job(struct kcopyd_job *job) static int run_pages_job(struct kcopyd_job *job) { int r; + unsigned nr_pages = dm_div_up(job->dests[0].count, PAGE_SIZE >> 9); - job->nr_pages = dm_div_up(job->dests[0].count + job->offset, - PAGE_SIZE >> 9); - r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages); + r = kcopyd_get_pages(job->kc, nr_pages, &job->pages); if (!r) { /* this job is ready for io */ push(&job->kc->io_jobs, job); @@ -602,8 +599,6 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, job->num_dests = num_dests; memcpy(&job->dests, dests, sizeof(*dests) * num_dests); - job->offset = 0; - job->nr_pages = 0; job->pages = NULL; job->fn = fn; @@ -622,6 +617,37 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, } EXPORT_SYMBOL(dm_kcopyd_copy); +void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc, + dm_kcopyd_notify_fn fn, void *context) +{ + struct kcopyd_job *job; + + job = mempool_alloc(kc->job_pool, GFP_NOIO); + + memset(job, 0, sizeof(struct kcopyd_job)); + job->kc = kc; + job->fn = fn; + job->context = context; + + atomic_inc(&kc->nr_jobs); + + return job; +} +EXPORT_SYMBOL(dm_kcopyd_prepare_callback); + +void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err) +{ + struct kcopyd_job *job = j; + struct dm_kcopyd_client *kc = job->kc; + + job->read_err = read_err; + job->write_err = write_err; + + push(&kc->complete_jobs, job); + wake(kc); +} +EXPORT_SYMBOL(dm_kcopyd_do_callback); + /* * Cancels a kcopyd job, eg. someone might be deactivating a * mirror. diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c index aa2e0c374ab3..1021c8986011 100644 --- a/drivers/md/dm-log-userspace-base.c +++ b/drivers/md/dm-log-userspace-base.c @@ -394,8 +394,7 @@ static int flush_by_group(struct log_c *lc, struct list_head *flush_list) group[count] = fe->region; count++; - list_del(&fe->list); - list_add(&fe->list, &tmp_list); + list_move(&fe->list, &tmp_list); type = fe->type; if (count >= MAX_FLUSH_GROUP_COUNT) diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 948e3f4925bf..3b52bb72bd1f 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -197,15 +197,21 @@ EXPORT_SYMBOL(dm_dirty_log_destroy); #define MIRROR_DISK_VERSION 2 #define LOG_OFFSET 2 -struct log_header { - uint32_t magic; +struct log_header_disk { + __le32 magic; /* * Simple, incrementing version. no backward * compatibility. */ + __le32 version; + __le64 nr_regions; +} __packed; + +struct log_header_core { + uint32_t magic; uint32_t version; - sector_t nr_regions; + uint64_t nr_regions; }; struct log_c { @@ -239,10 +245,10 @@ struct log_c { int log_dev_failed; int log_dev_flush_failed; struct dm_dev *log_dev; - struct log_header header; + struct log_header_core header; struct dm_io_region header_location; - struct log_header *disk_header; + struct log_header_disk *disk_header; }; /* @@ -251,34 +257,34 @@ struct log_c { */ static inline int log_test_bit(uint32_t *bs, unsigned bit) { - return test_bit_le(bit, (unsigned long *) bs) ? 1 : 0; + return test_bit_le(bit, bs) ? 1 : 0; } static inline void log_set_bit(struct log_c *l, uint32_t *bs, unsigned bit) { - __test_and_set_bit_le(bit, (unsigned long *) bs); + __set_bit_le(bit, bs); l->touched_cleaned = 1; } static inline void log_clear_bit(struct log_c *l, uint32_t *bs, unsigned bit) { - __test_and_clear_bit_le(bit, (unsigned long *) bs); + __clear_bit_le(bit, bs); l->touched_dirtied = 1; } /*---------------------------------------------------------------- * Header IO *--------------------------------------------------------------*/ -static void header_to_disk(struct log_header *core, struct log_header *disk) +static void header_to_disk(struct log_header_core *core, struct log_header_disk *disk) { disk->magic = cpu_to_le32(core->magic); disk->version = cpu_to_le32(core->version); disk->nr_regions = cpu_to_le64(core->nr_regions); } -static void header_from_disk(struct log_header *core, struct log_header *disk) +static void header_from_disk(struct log_header_core *core, struct log_header_disk *disk) { core->magic = le32_to_cpu(disk->magic); core->version = le32_to_cpu(disk->version); @@ -486,7 +492,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, memset(lc->sync_bits, (sync == NOSYNC) ? -1 : 0, bitset_size); lc->sync_count = (sync == NOSYNC) ? region_count : 0; - lc->recovering_bits = vmalloc(bitset_size); + lc->recovering_bits = vzalloc(bitset_size); if (!lc->recovering_bits) { DMWARN("couldn't allocate sync bitset"); vfree(lc->sync_bits); @@ -498,7 +504,6 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, kfree(lc); return -ENOMEM; } - memset(lc->recovering_bits, 0, bitset_size); lc->sync_search = 0; log->context = lc; @@ -739,8 +744,7 @@ static int core_get_resync_work(struct dm_dirty_log *log, region_t *region) return 0; do { - *region = find_next_zero_bit_le( - (unsigned long *) lc->sync_bits, + *region = find_next_zero_bit_le(lc->sync_bits, lc->region_count, lc->sync_search); lc->sync_search = *region + 1; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index c3547016f0f1..5e0090ef4182 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -22,7 +22,6 @@ #include <linux/atomic.h> #define DM_MSG_PREFIX "multipath" -#define MESG_STR(x) x, sizeof(x) #define DM_PG_INIT_DELAY_MSECS 2000 #define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1) @@ -505,80 +504,29 @@ static void trigger_event(struct work_struct *work) * <#paths> <#per-path selector args> * [<path> [<arg>]* ]+ ]+ *---------------------------------------------------------------*/ -struct param { - unsigned min; - unsigned max; - char *error; -}; - -static int read_param(struct param *param, char *str, unsigned *v, char **error) -{ - if (!str || - (sscanf(str, "%u", v) != 1) || - (*v < param->min) || - (*v > param->max)) { - *error = param->error; - return -EINVAL; - } - - return 0; -} - -struct arg_set { - unsigned argc; - char **argv; -}; - -static char *shift(struct arg_set *as) -{ - char *r; - - if (as->argc) { - as->argc--; - r = *as->argv; - as->argv++; - return r; - } - - return NULL; -} - -static void consume(struct arg_set *as, unsigned n) -{ - BUG_ON (as->argc < n); - as->argc -= n; - as->argv += n; -} - -static int parse_path_selector(struct arg_set *as, struct priority_group *pg, +static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg, struct dm_target *ti) { int r; struct path_selector_type *pst; unsigned ps_argc; - static struct param _params[] = { + static struct dm_arg _args[] = { {0, 1024, "invalid number of path selector args"}, }; - pst = dm_get_path_selector(shift(as)); + pst = dm_get_path_selector(dm_shift_arg(as)); if (!pst) { ti->error = "unknown path selector type"; return -EINVAL; } - r = read_param(_params, shift(as), &ps_argc, &ti->error); + r = dm_read_arg_group(_args, as, &ps_argc, &ti->error); if (r) { dm_put_path_selector(pst); return -EINVAL; } - if (ps_argc > as->argc) { - dm_put_path_selector(pst); - ti->error = "not enough arguments for path selector"; - return -EINVAL; - } - r = pst->create(&pg->ps, ps_argc, as->argv); if (r) { dm_put_path_selector(pst); @@ -587,12 +535,12 @@ static int parse_path_selector(struct arg_set *as, struct priority_group *pg, } pg->ps.type = pst; - consume(as, ps_argc); + dm_consume_args(as, ps_argc); return 0; } -static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, +static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps, struct dm_target *ti) { int r; @@ -609,7 +557,7 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, if (!p) return ERR_PTR(-ENOMEM); - r = dm_get_device(ti, shift(as), dm_table_get_mode(ti->table), + r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table), &p->path.dev); if (r) { ti->error = "error getting device"; @@ -660,16 +608,16 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, return ERR_PTR(r); } -static struct priority_group *parse_priority_group(struct arg_set *as, +static struct priority_group *parse_priority_group(struct dm_arg_set *as, struct multipath *m) { - static struct param _params[] = { + static struct dm_arg _args[] = { {1, 1024, "invalid number of paths"}, {0, 1024, "invalid number of selector args"} }; int r; - unsigned i, nr_selector_args, nr_params; + unsigned i, nr_selector_args, nr_args; struct priority_group *pg; struct dm_target *ti = m->ti; @@ -693,26 +641,26 @@ static struct priority_group *parse_priority_group(struct arg_set *as, /* * read the paths */ - r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error); + r = dm_read_arg(_args, as, &pg->nr_pgpaths, &ti->error); if (r) goto bad; - r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error); + r = dm_read_arg(_args + 1, as, &nr_selector_args, &ti->error); if (r) goto bad; - nr_params = 1 + nr_selector_args; + nr_args = 1 + nr_selector_args; for (i = 0; i < pg->nr_pgpaths; i++) { struct pgpath *pgpath; - struct arg_set path_args; + struct dm_arg_set path_args; - if (as->argc < nr_params) { + if (as->argc < nr_args) { ti->error = "not enough path parameters"; r = -EINVAL; goto bad; } - path_args.argc = nr_params; + path_args.argc = nr_args; path_args.argv = as->argv; pgpath = parse_path(&path_args, &pg->ps, ti); @@ -723,7 +671,7 @@ static struct priority_group *parse_priority_group(struct arg_set *as, pgpath->pg = pg; list_add_tail(&pgpath->list, &pg->pgpaths); - consume(as, nr_params); + dm_consume_args(as, nr_args); } return pg; @@ -733,28 +681,23 @@ static struct priority_group *parse_priority_group(struct arg_set *as, return ERR_PTR(r); } -static int parse_hw_handler(struct arg_set *as, struct multipath *m) +static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m) { unsigned hw_argc; int ret; struct dm_target *ti = m->ti; - static struct param _params[] = { + static struct dm_arg _args[] = { {0, 1024, "invalid number of hardware handler args"}, }; - if (read_param(_params, shift(as), &hw_argc, &ti->error)) + if (dm_read_arg_group(_args, as, &hw_argc, &ti->error)) return -EINVAL; if (!hw_argc) return 0; - if (hw_argc > as->argc) { - ti->error = "not enough arguments for hardware handler"; - return -EINVAL; - } - - m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL); + m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL); request_module("scsi_dh_%s", m->hw_handler_name); if (scsi_dh_handler_exist(m->hw_handler_name) == 0) { ti->error = "unknown hardware handler type"; @@ -778,7 +721,7 @@ static int parse_hw_handler(struct arg_set *as, struct multipath *m) for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1) j = sprintf(p, "%s", as->argv[i]); } - consume(as, hw_argc - 1); + dm_consume_args(as, hw_argc - 1); return 0; fail: @@ -787,20 +730,20 @@ fail: return ret; } -static int parse_features(struct arg_set *as, struct multipath *m) +static int parse_features(struct dm_arg_set *as, struct multipath *m) { int r; unsigned argc; struct dm_target *ti = m->ti; - const char *param_name; + const char *arg_name; - static struct param _params[] = { + static struct dm_arg _args[] = { {0, 5, "invalid number of feature args"}, {1, 50, "pg_init_retries must be between 1 and 50"}, {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"}, }; - r = read_param(_params, shift(as), &argc, &ti->error); + r = dm_read_arg_group(_args, as, &argc, &ti->error); if (r) return -EINVAL; @@ -808,26 +751,24 @@ static int parse_features(struct arg_set *as, struct multipath *m) return 0; do { - param_name = shift(as); + arg_name = dm_shift_arg(as); argc--; - if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) { + if (!strcasecmp(arg_name, "queue_if_no_path")) { r = queue_if_no_path(m, 1, 0); continue; } - if (!strnicmp(param_name, MESG_STR("pg_init_retries")) && + if (!strcasecmp(arg_name, "pg_init_retries") && (argc >= 1)) { - r = read_param(_params + 1, shift(as), - &m->pg_init_retries, &ti->error); + r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error); argc--; continue; } - if (!strnicmp(param_name, MESG_STR("pg_init_delay_msecs")) && + if (!strcasecmp(arg_name, "pg_init_delay_msecs") && (argc >= 1)) { - r = read_param(_params + 2, shift(as), - &m->pg_init_delay_msecs, &ti->error); + r = dm_read_arg(_args + 2, as, &m->pg_init_delay_msecs, &ti->error); argc--; continue; } @@ -842,15 +783,15 @@ static int parse_features(struct arg_set *as, struct multipath *m) static int multipath_ctr(struct dm_target *ti, unsigned int argc, char **argv) { - /* target parameters */ - static struct param _params[] = { + /* target arguments */ + static struct dm_arg _args[] = { {0, 1024, "invalid number of priority groups"}, {0, 1024, "invalid initial priority group number"}, }; int r; struct multipath *m; - struct arg_set as; + struct dm_arg_set as; unsigned pg_count = 0; unsigned next_pg_num; @@ -871,11 +812,11 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, if (r) goto bad; - r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error); + r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error); if (r) goto bad; - r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error); + r = dm_read_arg(_args + 1, &as, &next_pg_num, &ti->error); if (r) goto bad; @@ -1505,10 +1446,10 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) } if (argc == 1) { - if (!strnicmp(argv[0], MESG_STR("queue_if_no_path"))) { + if (!strcasecmp(argv[0], "queue_if_no_path")) { r = queue_if_no_path(m, 1, 0); goto out; - } else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path"))) { + } else if (!strcasecmp(argv[0], "fail_if_no_path")) { r = queue_if_no_path(m, 0, 0); goto out; } @@ -1519,18 +1460,18 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) goto out; } - if (!strnicmp(argv[0], MESG_STR("disable_group"))) { + if (!strcasecmp(argv[0], "disable_group")) { r = bypass_pg_num(m, argv[1], 1); goto out; - } else if (!strnicmp(argv[0], MESG_STR("enable_group"))) { + } else if (!strcasecmp(argv[0], "enable_group")) { r = bypass_pg_num(m, argv[1], 0); goto out; - } else if (!strnicmp(argv[0], MESG_STR("switch_group"))) { + } else if (!strcasecmp(argv[0], "switch_group")) { r = switch_pg_num(m, argv[1]); goto out; - } else if (!strnicmp(argv[0], MESG_STR("reinstate_path"))) + } else if (!strcasecmp(argv[0], "reinstate_path")) action = reinstate_path; - else if (!strnicmp(argv[0], MESG_STR("fail_path"))) + else if (!strcasecmp(argv[0], "fail_path")) action = fail_path; else { DMWARN("Unrecognised multipath message received."); diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index e5d8904fc8f6..a002dd85db1e 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -8,19 +8,19 @@ #include <linux/slab.h> #include "md.h" +#include "raid1.h" #include "raid5.h" -#include "dm.h" #include "bitmap.h" +#include <linux/device-mapper.h> + #define DM_MSG_PREFIX "raid" /* - * If the MD doesn't support MD_SYNC_STATE_FORCED yet, then - * make it so the flag doesn't set anything. + * The following flags are used by dm-raid.c to set up the array state. + * They must be cleared before md_run is called. */ -#ifndef MD_SYNC_STATE_FORCED -#define MD_SYNC_STATE_FORCED 0 -#endif +#define FirstUse 10 /* rdev flag */ struct raid_dev { /* @@ -43,14 +43,15 @@ struct raid_dev { /* * Flags for rs->print_flags field. */ -#define DMPF_DAEMON_SLEEP 0x1 -#define DMPF_MAX_WRITE_BEHIND 0x2 -#define DMPF_SYNC 0x4 -#define DMPF_NOSYNC 0x8 -#define DMPF_STRIPE_CACHE 0x10 -#define DMPF_MIN_RECOVERY_RATE 0x20 -#define DMPF_MAX_RECOVERY_RATE 0x40 - +#define DMPF_SYNC 0x1 +#define DMPF_NOSYNC 0x2 +#define DMPF_REBUILD 0x4 +#define DMPF_DAEMON_SLEEP 0x8 +#define DMPF_MIN_RECOVERY_RATE 0x10 +#define DMPF_MAX_RECOVERY_RATE 0x20 +#define DMPF_MAX_WRITE_BEHIND 0x40 +#define DMPF_STRIPE_CACHE 0x80 +#define DMPF_REGION_SIZE 0X100 struct raid_set { struct dm_target *ti; @@ -72,6 +73,7 @@ static struct raid_type { const unsigned level; /* RAID level. */ const unsigned algorithm; /* RAID algorithm. */ } raid_types[] = { + {"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */}, {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, {"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC}, {"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC}, @@ -105,7 +107,8 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra } sectors_per_dev = ti->len; - if (sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) { + if ((raid_type->level > 1) && + sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) { ti->error = "Target length not divisible by number of data devices"; return ERR_PTR(-EINVAL); } @@ -147,9 +150,16 @@ static void context_free(struct raid_set *rs) { int i; - for (i = 0; i < rs->md.raid_disks; i++) + for (i = 0; i < rs->md.raid_disks; i++) { + if (rs->dev[i].meta_dev) + dm_put_device(rs->ti, rs->dev[i].meta_dev); + if (rs->dev[i].rdev.sb_page) + put_page(rs->dev[i].rdev.sb_page); + rs->dev[i].rdev.sb_page = NULL; + rs->dev[i].rdev.sb_loaded = 0; if (rs->dev[i].data_dev) dm_put_device(rs->ti, rs->dev[i].data_dev); + } kfree(rs); } @@ -159,7 +169,16 @@ static void context_free(struct raid_set *rs) * <meta_dev>: meta device name or '-' if missing * <data_dev>: data device name or '-' if missing * - * This code parses those words. + * The following are permitted: + * - - + * - <data_dev> + * <meta_dev> <data_dev> + * + * The following is not allowed: + * <meta_dev> - + * + * This code parses those words. If there is a failure, + * the caller must use context_free to unwind the operations. */ static int dev_parms(struct raid_set *rs, char **argv) { @@ -182,8 +201,16 @@ static int dev_parms(struct raid_set *rs, char **argv) rs->dev[i].rdev.mddev = &rs->md; if (strcmp(argv[0], "-")) { - rs->ti->error = "Metadata devices not supported"; - return -EINVAL; + ret = dm_get_device(rs->ti, argv[0], + dm_table_get_mode(rs->ti->table), + &rs->dev[i].meta_dev); + rs->ti->error = "RAID metadata device lookup failure"; + if (ret) + return ret; + + rs->dev[i].rdev.sb_page = alloc_page(GFP_KERNEL); + if (!rs->dev[i].rdev.sb_page) + return -ENOMEM; } if (!strcmp(argv[1], "-")) { @@ -193,6 +220,10 @@ static int dev_parms(struct raid_set *rs, char **argv) return -EINVAL; } + rs->ti->error = "No data device supplied with metadata device"; + if (rs->dev[i].meta_dev) + return -EINVAL; + continue; } @@ -204,6 +235,10 @@ static int dev_parms(struct raid_set *rs, char **argv) return ret; } + if (rs->dev[i].meta_dev) { + metadata_available = 1; + rs->dev[i].rdev.meta_bdev = rs->dev[i].meta_dev->bdev; + } rs->dev[i].rdev.bdev = rs->dev[i].data_dev->bdev; list_add(&rs->dev[i].rdev.same_set, &rs->md.disks); if (!test_bit(In_sync, &rs->dev[i].rdev.flags)) @@ -235,33 +270,109 @@ static int dev_parms(struct raid_set *rs, char **argv) } /* + * validate_region_size + * @rs + * @region_size: region size in sectors. If 0, pick a size (4MiB default). + * + * Set rs->md.bitmap_info.chunksize (which really refers to 'region size'). + * Ensure that (ti->len/region_size < 2^21) - required by MD bitmap. + * + * Returns: 0 on success, -EINVAL on failure. + */ +static int validate_region_size(struct raid_set *rs, unsigned long region_size) +{ + unsigned long min_region_size = rs->ti->len / (1 << 21); + + if (!region_size) { + /* + * Choose a reasonable default. All figures in sectors. + */ + if (min_region_size > (1 << 13)) { + DMINFO("Choosing default region size of %lu sectors", + region_size); + region_size = min_region_size; + } else { + DMINFO("Choosing default region size of 4MiB"); + region_size = 1 << 13; /* sectors */ + } + } else { + /* + * Validate user-supplied value. + */ + if (region_size > rs->ti->len) { + rs->ti->error = "Supplied region size is too large"; + return -EINVAL; + } + + if (region_size < min_region_size) { + DMERR("Supplied region_size (%lu sectors) below minimum (%lu)", + region_size, min_region_size); + rs->ti->error = "Supplied region size is too small"; + return -EINVAL; + } + + if (!is_power_of_2(region_size)) { + rs->ti->error = "Region size is not a power of 2"; + return -EINVAL; + } + + if (region_size < rs->md.chunk_sectors) { + rs->ti->error = "Region size is smaller than the chunk size"; + return -EINVAL; + } + } + + /* + * Convert sectors to bytes. + */ + rs->md.bitmap_info.chunksize = (region_size << 9); + + return 0; +} + +/* * Possible arguments are... - * RAID456: * <chunk_size> [optional_args] * - * Optional args: - * [[no]sync] Force or prevent recovery of the entire array + * Argument definitions + * <chunk_size> The number of sectors per disk that + * will form the "stripe" + * [[no]sync] Force or prevent recovery of the + * entire array * [rebuild <idx>] Rebuild the drive indicated by the index - * [daemon_sleep <ms>] Time between bitmap daemon work to clear bits + * [daemon_sleep <ms>] Time between bitmap daemon work to + * clear bits * [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization * [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization + * [write_mostly <idx>] Indicate a write mostly drive via index * [max_write_behind <sectors>] See '-write-behind=' (man mdadm) * [stripe_cache <sectors>] Stripe cache size for higher RAIDs + * [region_size <sectors>] Defines granularity of bitmap */ static int parse_raid_params(struct raid_set *rs, char **argv, unsigned num_raid_params) { unsigned i, rebuild_cnt = 0; - unsigned long value; + unsigned long value, region_size = 0; char *key; /* * First, parse the in-order required arguments + * "chunk_size" is the only argument of this type. */ - if ((strict_strtoul(argv[0], 10, &value) < 0) || - !is_power_of_2(value) || (value < 8)) { + if ((strict_strtoul(argv[0], 10, &value) < 0)) { rs->ti->error = "Bad chunk size"; return -EINVAL; + } else if (rs->raid_type->level == 1) { + if (value) + DMERR("Ignoring chunk size parameter for RAID 1"); + value = 0; + } else if (!is_power_of_2(value)) { + rs->ti->error = "Chunk size must be a power of 2"; + return -EINVAL; + } else if (value < 8) { + rs->ti->error = "Chunk size value is too small"; + return -EINVAL; } rs->md.new_chunk_sectors = rs->md.chunk_sectors = value; @@ -269,22 +380,39 @@ static int parse_raid_params(struct raid_set *rs, char **argv, num_raid_params--; /* - * Second, parse the unordered optional arguments + * We set each individual device as In_sync with a completed + * 'recovery_offset'. If there has been a device failure or + * replacement then one of the following cases applies: + * + * 1) User specifies 'rebuild'. + * - Device is reset when param is read. + * 2) A new device is supplied. + * - No matching superblock found, resets device. + * 3) Device failure was transient and returns on reload. + * - Failure noticed, resets device for bitmap replay. + * 4) Device hadn't completed recovery after previous failure. + * - Superblock is read and overrides recovery_offset. + * + * What is found in the superblocks of the devices is always + * authoritative, unless 'rebuild' or '[no]sync' was specified. */ - for (i = 0; i < rs->md.raid_disks; i++) + for (i = 0; i < rs->md.raid_disks; i++) { set_bit(In_sync, &rs->dev[i].rdev.flags); + rs->dev[i].rdev.recovery_offset = MaxSector; + } + /* + * Second, parse the unordered optional arguments + */ for (i = 0; i < num_raid_params; i++) { - if (!strcmp(argv[i], "nosync")) { + if (!strcasecmp(argv[i], "nosync")) { rs->md.recovery_cp = MaxSector; rs->print_flags |= DMPF_NOSYNC; - rs->md.flags |= MD_SYNC_STATE_FORCED; continue; } - if (!strcmp(argv[i], "sync")) { + if (!strcasecmp(argv[i], "sync")) { rs->md.recovery_cp = 0; rs->print_flags |= DMPF_SYNC; - rs->md.flags |= MD_SYNC_STATE_FORCED; continue; } @@ -300,9 +428,13 @@ static int parse_raid_params(struct raid_set *rs, char **argv, return -EINVAL; } - if (!strcmp(key, "rebuild")) { - if (++rebuild_cnt > rs->raid_type->parity_devs) { - rs->ti->error = "Too many rebuild drives given"; + if (!strcasecmp(key, "rebuild")) { + rebuild_cnt++; + if (((rs->raid_type->level != 1) && + (rebuild_cnt > rs->raid_type->parity_devs)) || + ((rs->raid_type->level == 1) && + (rebuild_cnt > (rs->md.raid_disks - 1)))) { + rs->ti->error = "Too many rebuild devices specified for given RAID type"; return -EINVAL; } if (value > rs->md.raid_disks) { @@ -311,7 +443,22 @@ static int parse_raid_params(struct raid_set *rs, char **argv, } clear_bit(In_sync, &rs->dev[value].rdev.flags); rs->dev[value].rdev.recovery_offset = 0; - } else if (!strcmp(key, "max_write_behind")) { + rs->print_flags |= DMPF_REBUILD; + } else if (!strcasecmp(key, "write_mostly")) { + if (rs->raid_type->level != 1) { + rs->ti->error = "write_mostly option is only valid for RAID1"; + return -EINVAL; + } + if (value > rs->md.raid_disks) { + rs->ti->error = "Invalid write_mostly drive index given"; + return -EINVAL; + } + set_bit(WriteMostly, &rs->dev[value].rdev.flags); + } else if (!strcasecmp(key, "max_write_behind")) { + if (rs->raid_type->level != 1) { + rs->ti->error = "max_write_behind option is only valid for RAID1"; + return -EINVAL; + } rs->print_flags |= DMPF_MAX_WRITE_BEHIND; /* @@ -324,14 +471,14 @@ static int parse_raid_params(struct raid_set *rs, char **argv, return -EINVAL; } rs->md.bitmap_info.max_write_behind = value; - } else if (!strcmp(key, "daemon_sleep")) { + } else if (!strcasecmp(key, "daemon_sleep")) { rs->print_flags |= DMPF_DAEMON_SLEEP; if (!value || (value > MAX_SCHEDULE_TIMEOUT)) { rs->ti->error = "daemon sleep period out of range"; return -EINVAL; } rs->md.bitmap_info.daemon_sleep = value; - } else if (!strcmp(key, "stripe_cache")) { + } else if (!strcasecmp(key, "stripe_cache")) { rs->print_flags |= DMPF_STRIPE_CACHE; /* @@ -348,20 +495,23 @@ static int parse_raid_params(struct raid_set *rs, char **argv, rs->ti->error = "Bad stripe_cache size"; return -EINVAL; } - } else if (!strcmp(key, "min_recovery_rate")) { + } else if (!strcasecmp(key, "min_recovery_rate")) { rs->print_flags |= DMPF_MIN_RECOVERY_RATE; if (value > INT_MAX) { rs->ti->error = "min_recovery_rate out of range"; return -EINVAL; } rs->md.sync_speed_min = (int)value; - } else if (!strcmp(key, "max_recovery_rate")) { + } else if (!strcasecmp(key, "max_recovery_rate")) { rs->print_flags |= DMPF_MAX_RECOVERY_RATE; if (value > INT_MAX) { rs->ti->error = "max_recovery_rate out of range"; return -EINVAL; } rs->md.sync_speed_max = (int)value; + } else if (!strcasecmp(key, "region_size")) { + rs->print_flags |= DMPF_REGION_SIZE; + region_size = value; } else { DMERR("Unable to parse RAID parameter: %s", key); rs->ti->error = "Unable to parse RAID parameters"; @@ -369,6 +519,19 @@ static int parse_raid_params(struct raid_set *rs, char **argv, } } + if (validate_region_size(rs, region_size)) + return -EINVAL; + + if (rs->md.chunk_sectors) + rs->ti->split_io = rs->md.chunk_sectors; + else + rs->ti->split_io = region_size; + + if (rs->md.chunk_sectors) + rs->ti->split_io = rs->md.chunk_sectors; + else + rs->ti->split_io = region_size; + /* Assume there are no metadata devices until the drives are parsed */ rs->md.persistent = 0; rs->md.external = 1; @@ -387,17 +550,351 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits) { struct raid_set *rs = container_of(cb, struct raid_set, callbacks); + if (rs->raid_type->level == 1) + return md_raid1_congested(&rs->md, bits); + return md_raid5_congested(&rs->md, bits); } /* + * This structure is never routinely used by userspace, unlike md superblocks. + * Devices with this superblock should only ever be accessed via device-mapper. + */ +#define DM_RAID_MAGIC 0x64526D44 +struct dm_raid_superblock { + __le32 magic; /* "DmRd" */ + __le32 features; /* Used to indicate possible future changes */ + + __le32 num_devices; /* Number of devices in this array. (Max 64) */ + __le32 array_position; /* The position of this drive in the array */ + + __le64 events; /* Incremented by md when superblock updated */ + __le64 failed_devices; /* Bit field of devices to indicate failures */ + + /* + * This offset tracks the progress of the repair or replacement of + * an individual drive. + */ + __le64 disk_recovery_offset; + + /* + * This offset tracks the progress of the initial array + * synchronisation/parity calculation. + */ + __le64 array_resync_offset; + + /* + * RAID characteristics + */ + __le32 level; + __le32 layout; + __le32 stripe_sectors; + + __u8 pad[452]; /* Round struct to 512 bytes. */ + /* Always set to 0 when writing. */ +} __packed; + +static int read_disk_sb(mdk_rdev_t *rdev, int size) +{ + BUG_ON(!rdev->sb_page); + + if (rdev->sb_loaded) + return 0; + + if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, 1)) { + DMERR("Failed to read device superblock"); + return -EINVAL; + } + + rdev->sb_loaded = 1; + + return 0; +} + +static void super_sync(mddev_t *mddev, mdk_rdev_t *rdev) +{ + mdk_rdev_t *r, *t; + uint64_t failed_devices; + struct dm_raid_superblock *sb; + + sb = page_address(rdev->sb_page); + failed_devices = le64_to_cpu(sb->failed_devices); + + rdev_for_each(r, t, mddev) + if ((r->raid_disk >= 0) && test_bit(Faulty, &r->flags)) + failed_devices |= (1ULL << r->raid_disk); + + memset(sb, 0, sizeof(*sb)); + + sb->magic = cpu_to_le32(DM_RAID_MAGIC); + sb->features = cpu_to_le32(0); /* No features yet */ + + sb->num_devices = cpu_to_le32(mddev->raid_disks); + sb->array_position = cpu_to_le32(rdev->raid_disk); + + sb->events = cpu_to_le64(mddev->events); + sb->failed_devices = cpu_to_le64(failed_devices); + + sb->disk_recovery_offset = cpu_to_le64(rdev->recovery_offset); + sb->array_resync_offset = cpu_to_le64(mddev->recovery_cp); + + sb->level = cpu_to_le32(mddev->level); + sb->layout = cpu_to_le32(mddev->layout); + sb->stripe_sectors = cpu_to_le32(mddev->chunk_sectors); +} + +/* + * super_load + * + * This function creates a superblock if one is not found on the device + * and will decide which superblock to use if there's a choice. + * + * Return: 1 if use rdev, 0 if use refdev, -Exxx otherwise + */ +static int super_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev) +{ + int ret; + struct dm_raid_superblock *sb; + struct dm_raid_superblock *refsb; + uint64_t events_sb, events_refsb; + + rdev->sb_start = 0; + rdev->sb_size = sizeof(*sb); + + ret = read_disk_sb(rdev, rdev->sb_size); + if (ret) + return ret; + + sb = page_address(rdev->sb_page); + if (sb->magic != cpu_to_le32(DM_RAID_MAGIC)) { + super_sync(rdev->mddev, rdev); + + set_bit(FirstUse, &rdev->flags); + + /* Force writing of superblocks to disk */ + set_bit(MD_CHANGE_DEVS, &rdev->mddev->flags); + + /* Any superblock is better than none, choose that if given */ + return refdev ? 0 : 1; + } + + if (!refdev) + return 1; + + events_sb = le64_to_cpu(sb->events); + + refsb = page_address(refdev->sb_page); + events_refsb = le64_to_cpu(refsb->events); + + return (events_sb > events_refsb) ? 1 : 0; +} + +static int super_init_validation(mddev_t *mddev, mdk_rdev_t *rdev) +{ + int role; + struct raid_set *rs = container_of(mddev, struct raid_set, md); + uint64_t events_sb; + uint64_t failed_devices; + struct dm_raid_superblock *sb; + uint32_t new_devs = 0; + uint32_t rebuilds = 0; + mdk_rdev_t *r, *t; + struct dm_raid_superblock *sb2; + + sb = page_address(rdev->sb_page); + events_sb = le64_to_cpu(sb->events); + failed_devices = le64_to_cpu(sb->failed_devices); + + /* + * Initialise to 1 if this is a new superblock. + */ + mddev->events = events_sb ? : 1; + + /* + * Reshaping is not currently allowed + */ + if ((le32_to_cpu(sb->level) != mddev->level) || + (le32_to_cpu(sb->layout) != mddev->layout) || + (le32_to_cpu(sb->stripe_sectors) != mddev->chunk_sectors)) { + DMERR("Reshaping arrays not yet supported."); + return -EINVAL; + } + + /* We can only change the number of devices in RAID1 right now */ + if ((rs->raid_type->level != 1) && + (le32_to_cpu(sb->num_devices) != mddev->raid_disks)) { + DMERR("Reshaping arrays not yet supported."); + return -EINVAL; + } + + if (!(rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC))) + mddev->recovery_cp = le64_to_cpu(sb->array_resync_offset); + + /* + * During load, we set FirstUse if a new superblock was written. + * There are two reasons we might not have a superblock: + * 1) The array is brand new - in which case, all of the + * devices must have their In_sync bit set. Also, + * recovery_cp must be 0, unless forced. + * 2) This is a new device being added to an old array + * and the new device needs to be rebuilt - in which + * case the In_sync bit will /not/ be set and + * recovery_cp must be MaxSector. + */ + rdev_for_each(r, t, mddev) { + if (!test_bit(In_sync, &r->flags)) { + if (!test_bit(FirstUse, &r->flags)) + DMERR("Superblock area of " + "rebuild device %d should have been " + "cleared.", r->raid_disk); + set_bit(FirstUse, &r->flags); + rebuilds++; + } else if (test_bit(FirstUse, &r->flags)) + new_devs++; + } + + if (!rebuilds) { + if (new_devs == mddev->raid_disks) { + DMINFO("Superblocks created for new array"); + set_bit(MD_ARRAY_FIRST_USE, &mddev->flags); + } else if (new_devs) { + DMERR("New device injected " + "into existing array without 'rebuild' " + "parameter specified"); + return -EINVAL; + } + } else if (new_devs) { + DMERR("'rebuild' devices cannot be " + "injected into an array with other first-time devices"); + return -EINVAL; + } else if (mddev->recovery_cp != MaxSector) { + DMERR("'rebuild' specified while array is not in-sync"); + return -EINVAL; + } + + /* + * Now we set the Faulty bit for those devices that are + * recorded in the superblock as failed. + */ + rdev_for_each(r, t, mddev) { + if (!r->sb_page) + continue; + sb2 = page_address(r->sb_page); + sb2->failed_devices = 0; + + /* + * Check for any device re-ordering. + */ + if (!test_bit(FirstUse, &r->flags) && (r->raid_disk >= 0)) { + role = le32_to_cpu(sb2->array_position); + if (role != r->raid_disk) { + if (rs->raid_type->level != 1) { + rs->ti->error = "Cannot change device " + "positions in RAID array"; + return -EINVAL; + } + DMINFO("RAID1 device #%d now at position #%d", + role, r->raid_disk); + } + + /* + * Partial recovery is performed on + * returning failed devices. + */ + if (failed_devices & (1 << role)) + set_bit(Faulty, &r->flags); + } + } + + return 0; +} + +static int super_validate(mddev_t *mddev, mdk_rdev_t *rdev) +{ + struct dm_raid_superblock *sb = page_address(rdev->sb_page); + + /* + * If mddev->events is not set, we know we have not yet initialized + * the array. + */ + if (!mddev->events && super_init_validation(mddev, rdev)) + return -EINVAL; + + mddev->bitmap_info.offset = 4096 >> 9; /* Enable bitmap creation */ + rdev->mddev->bitmap_info.default_offset = 4096 >> 9; + if (!test_bit(FirstUse, &rdev->flags)) { + rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset); + if (rdev->recovery_offset != MaxSector) + clear_bit(In_sync, &rdev->flags); + } + + /* + * If a device comes back, set it as not In_sync and no longer faulty. + */ + if (test_bit(Faulty, &rdev->flags)) { + clear_bit(Faulty, &rdev->flags); + clear_bit(In_sync, &rdev->flags); + rdev->saved_raid_disk = rdev->raid_disk; + rdev->recovery_offset = 0; + } + + clear_bit(FirstUse, &rdev->flags); + + return 0; +} + +/* + * Analyse superblocks and select the freshest. + */ +static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) +{ + int ret; + mdk_rdev_t *rdev, *freshest, *tmp; + mddev_t *mddev = &rs->md; + + freshest = NULL; + rdev_for_each(rdev, tmp, mddev) { + if (!rdev->meta_bdev) + continue; + + ret = super_load(rdev, freshest); + + switch (ret) { + case 1: + freshest = rdev; + break; + case 0: + break; + default: + ti->error = "Failed to load superblock"; + return ret; + } + } + + if (!freshest) + return 0; + + /* + * Validation of the freshest device provides the source of + * validation for the remaining devices. + */ + ti->error = "Unable to assemble array: Invalid superblocks"; + if (super_validate(mddev, freshest)) + return -EINVAL; + + rdev_for_each(rdev, tmp, mddev) + if ((rdev != freshest) && super_validate(mddev, rdev)) + return -EINVAL; + + return 0; +} + +/* * Construct a RAID4/5/6 mapping: * Args: * <raid_type> <#raid_params> <raid_params> \ * <#raid_devs> { <meta_dev1> <dev1> .. <meta_devN> <devN> } * - * ** metadata devices are not supported yet, use '-' instead ** - * * <raid_params> varies by <raid_type>. See 'parse_raid_params' for * details on possible <raid_params>. */ @@ -465,8 +962,12 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) if (ret) goto bad; + rs->md.sync_super = super_sync; + ret = analyse_superblocks(ti, rs); + if (ret) + goto bad; + INIT_WORK(&rs->md.event_work, do_table_event); - ti->split_io = rs->md.chunk_sectors; ti->private = rs; mutex_lock(&rs->md.reconfig_mutex); @@ -482,6 +983,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) rs->callbacks.congested_fn = raid_is_congested; dm_table_add_target_callbacks(ti->table, &rs->callbacks); + mddev_suspend(&rs->md); return 0; bad: @@ -546,12 +1048,17 @@ static int raid_status(struct dm_target *ti, status_type_t type, break; case STATUSTYPE_TABLE: /* The string you would use to construct this array */ - for (i = 0; i < rs->md.raid_disks; i++) - if (rs->dev[i].data_dev && + for (i = 0; i < rs->md.raid_disks; i++) { + if ((rs->print_flags & DMPF_REBUILD) && + rs->dev[i].data_dev && !test_bit(In_sync, &rs->dev[i].rdev.flags)) - raid_param_cnt++; /* for rebuilds */ + raid_param_cnt += 2; /* for rebuilds */ + if (rs->dev[i].data_dev && + test_bit(WriteMostly, &rs->dev[i].rdev.flags)) + raid_param_cnt += 2; + } - raid_param_cnt += (hweight64(rs->print_flags) * 2); + raid_param_cnt += (hweight64(rs->print_flags & ~DMPF_REBUILD) * 2); if (rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC)) raid_param_cnt--; @@ -565,7 +1072,8 @@ static int raid_status(struct dm_target *ti, status_type_t type, DMEMIT(" nosync"); for (i = 0; i < rs->md.raid_disks; i++) - if (rs->dev[i].data_dev && + if ((rs->print_flags & DMPF_REBUILD) && + rs->dev[i].data_dev && !test_bit(In_sync, &rs->dev[i].rdev.flags)) DMEMIT(" rebuild %u", i); @@ -579,6 +1087,11 @@ static int raid_status(struct dm_target *ti, status_type_t type, if (rs->print_flags & DMPF_MAX_RECOVERY_RATE) DMEMIT(" max_recovery_rate %d", rs->md.sync_speed_max); + for (i = 0; i < rs->md.raid_disks; i++) + if (rs->dev[i].data_dev && + test_bit(WriteMostly, &rs->dev[i].rdev.flags)) + DMEMIT(" write_mostly %u", i); + if (rs->print_flags & DMPF_MAX_WRITE_BEHIND) DMEMIT(" max_write_behind %lu", rs->md.bitmap_info.max_write_behind); @@ -591,9 +1104,16 @@ static int raid_status(struct dm_target *ti, status_type_t type, conf ? conf->max_nr_stripes * 2 : 0); } + if (rs->print_flags & DMPF_REGION_SIZE) + DMEMIT(" region_size %lu", + rs->md.bitmap_info.chunksize >> 9); + DMEMIT(" %d", rs->md.raid_disks); for (i = 0; i < rs->md.raid_disks; i++) { - DMEMIT(" -"); /* metadata device */ + if (rs->dev[i].meta_dev) + DMEMIT(" %s", rs->dev[i].meta_dev->name); + else + DMEMIT(" -"); if (rs->dev[i].data_dev) DMEMIT(" %s", rs->dev[i].data_dev->name); @@ -650,12 +1170,13 @@ static void raid_resume(struct dm_target *ti) { struct raid_set *rs = ti->private; + bitmap_load(&rs->md); mddev_resume(&rs->md); } static struct target_type raid_target = { .name = "raid", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 135c2f1fdbfc..d1f1d7017103 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -58,25 +58,30 @@ #define NUM_SNAPSHOT_HDR_CHUNKS 1 struct disk_header { - uint32_t magic; + __le32 magic; /* * Is this snapshot valid. There is no way of recovering * an invalid snapshot. */ - uint32_t valid; + __le32 valid; /* * Simple, incrementing version. no backward * compatibility. */ - uint32_t version; + __le32 version; /* In sectors */ - uint32_t chunk_size; -}; + __le32 chunk_size; +} __packed; struct disk_exception { + __le64 old_chunk; + __le64 new_chunk; +} __packed; + +struct core_exception { uint64_t old_chunk; uint64_t new_chunk; }; @@ -169,10 +174,9 @@ static int alloc_area(struct pstore *ps) if (!ps->area) goto err_area; - ps->zero_area = vmalloc(len); + ps->zero_area = vzalloc(len); if (!ps->zero_area) goto err_zero_area; - memset(ps->zero_area, 0, len); ps->header_area = vmalloc(len); if (!ps->header_area) @@ -396,32 +400,32 @@ static struct disk_exception *get_exception(struct pstore *ps, uint32_t index) } static void read_exception(struct pstore *ps, - uint32_t index, struct disk_exception *result) + uint32_t index, struct core_exception *result) { - struct disk_exception *e = get_exception(ps, index); + struct disk_exception *de = get_exception(ps, index); /* copy it */ - result->old_chunk = le64_to_cpu(e->old_chunk); - result->new_chunk = le64_to_cpu(e->new_chunk); + result->old_chunk = le64_to_cpu(de->old_chunk); + result->new_chunk = le64_to_cpu(de->new_chunk); } static void write_exception(struct pstore *ps, - uint32_t index, struct disk_exception *de) + uint32_t index, struct core_exception *e) { - struct disk_exception *e = get_exception(ps, index); + struct disk_exception *de = get_exception(ps, index); /* copy it */ - e->old_chunk = cpu_to_le64(de->old_chunk); - e->new_chunk = cpu_to_le64(de->new_chunk); + de->old_chunk = cpu_to_le64(e->old_chunk); + de->new_chunk = cpu_to_le64(e->new_chunk); } static void clear_exception(struct pstore *ps, uint32_t index) { - struct disk_exception *e = get_exception(ps, index); + struct disk_exception *de = get_exception(ps, index); /* clear it */ - e->old_chunk = 0; - e->new_chunk = 0; + de->old_chunk = 0; + de->new_chunk = 0; } /* @@ -437,13 +441,13 @@ static int insert_exceptions(struct pstore *ps, { int r; unsigned int i; - struct disk_exception de; + struct core_exception e; /* presume the area is full */ *full = 1; for (i = 0; i < ps->exceptions_per_area; i++) { - read_exception(ps, i, &de); + read_exception(ps, i, &e); /* * If the new_chunk is pointing at the start of @@ -451,7 +455,7 @@ static int insert_exceptions(struct pstore *ps, * is we know that we've hit the end of the * exceptions. Therefore the area is not full. */ - if (de.new_chunk == 0LL) { + if (e.new_chunk == 0LL) { ps->current_committed = i; *full = 0; break; @@ -460,13 +464,13 @@ static int insert_exceptions(struct pstore *ps, /* * Keep track of the start of the free chunks. */ - if (ps->next_free <= de.new_chunk) - ps->next_free = de.new_chunk + 1; + if (ps->next_free <= e.new_chunk) + ps->next_free = e.new_chunk + 1; /* * Otherwise we add the exception to the snapshot. */ - r = callback(callback_context, de.old_chunk, de.new_chunk); + r = callback(callback_context, e.old_chunk, e.new_chunk); if (r) return r; } @@ -563,7 +567,7 @@ static int persistent_read_metadata(struct dm_exception_store *store, ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) / sizeof(struct disk_exception); ps->callbacks = dm_vcalloc(ps->exceptions_per_area, - sizeof(*ps->callbacks)); + sizeof(*ps->callbacks)); if (!ps->callbacks) return -ENOMEM; @@ -641,12 +645,12 @@ static void persistent_commit_exception(struct dm_exception_store *store, { unsigned int i; struct pstore *ps = get_info(store); - struct disk_exception de; + struct core_exception ce; struct commit_callback *cb; - de.old_chunk = e->old_chunk; - de.new_chunk = e->new_chunk; - write_exception(ps, ps->current_committed++, &de); + ce.old_chunk = e->old_chunk; + ce.new_chunk = e->new_chunk; + write_exception(ps, ps->current_committed++, &ce); /* * Add the callback to the back of the array. This code @@ -670,7 +674,7 @@ static void persistent_commit_exception(struct dm_exception_store *store, * If we completely filled the current area, then wipe the next one. */ if ((ps->current_committed == ps->exceptions_per_area) && - zero_disk_area(ps, ps->current_area + 1)) + zero_disk_area(ps, ps->current_area + 1)) ps->valid = 0; /* @@ -701,7 +705,7 @@ static int persistent_prepare_merge(struct dm_exception_store *store, chunk_t *last_new_chunk) { struct pstore *ps = get_info(store); - struct disk_exception de; + struct core_exception ce; int nr_consecutive; int r; @@ -722,9 +726,9 @@ static int persistent_prepare_merge(struct dm_exception_store *store, ps->current_committed = ps->exceptions_per_area; } - read_exception(ps, ps->current_committed - 1, &de); - *last_old_chunk = de.old_chunk; - *last_new_chunk = de.new_chunk; + read_exception(ps, ps->current_committed - 1, &ce); + *last_old_chunk = ce.old_chunk; + *last_new_chunk = ce.new_chunk; /* * Find number of consecutive chunks within the current area, @@ -733,9 +737,9 @@ static int persistent_prepare_merge(struct dm_exception_store *store, for (nr_consecutive = 1; nr_consecutive < ps->current_committed; nr_consecutive++) { read_exception(ps, ps->current_committed - 1 - nr_consecutive, - &de); - if (de.old_chunk != *last_old_chunk - nr_consecutive || - de.new_chunk != *last_new_chunk - nr_consecutive) + &ce); + if (ce.old_chunk != *last_old_chunk - nr_consecutive || + ce.new_chunk != *last_new_chunk - nr_consecutive) break; } @@ -753,7 +757,7 @@ static int persistent_commit_merge(struct dm_exception_store *store, for (i = 0; i < nr_merged; i++) clear_exception(ps, ps->current_committed - 1 - i); - r = area_io(ps, WRITE); + r = area_io(ps, WRITE_FLUSH_FUA); if (r < 0) return r; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 9ecff5f3023a..6f758870fc19 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -30,16 +30,6 @@ static const char dm_snapshot_merge_target_name[] = "snapshot-merge"; ((ti)->type->name == dm_snapshot_merge_target_name) /* - * The percentage increment we will wake up users at - */ -#define WAKE_UP_PERCENT 5 - -/* - * kcopyd priority of snapshot operations - */ -#define SNAPSHOT_COPY_PRIORITY 2 - -/* * The size of the mempool used to track chunks in use. */ #define MIN_IOS 256 @@ -180,6 +170,13 @@ struct dm_snap_pending_exception { * kcopyd. */ int started; + + /* + * For writing a complete chunk, bypassing the copy. + */ + struct bio *full_bio; + bio_end_io_t *full_bio_end_io; + void *full_bio_private; }; /* @@ -1055,8 +1052,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) { - ti->error = "Cannot allocate snapshot context private " - "structure"; + ti->error = "Cannot allocate private snapshot structure"; r = -ENOMEM; goto bad; } @@ -1380,6 +1376,7 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) struct dm_snapshot *s = pe->snap; struct bio *origin_bios = NULL; struct bio *snapshot_bios = NULL; + struct bio *full_bio = NULL; int error = 0; if (!success) { @@ -1415,10 +1412,15 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) */ dm_insert_exception(&s->complete, e); - out: +out: dm_remove_exception(&pe->e); snapshot_bios = bio_list_get(&pe->snapshot_bios); origin_bios = bio_list_get(&pe->origin_bios); + full_bio = pe->full_bio; + if (full_bio) { + full_bio->bi_end_io = pe->full_bio_end_io; + full_bio->bi_private = pe->full_bio_private; + } free_pending_exception(pe); increment_pending_exceptions_done_count(); @@ -1426,10 +1428,15 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) up_write(&s->lock); /* Submit any pending write bios */ - if (error) + if (error) { + if (full_bio) + bio_io_error(full_bio); error_bios(snapshot_bios); - else + } else { + if (full_bio) + bio_endio(full_bio, 0); flush_bios(snapshot_bios); + } retry_origin_bios(s, origin_bios); } @@ -1480,8 +1487,33 @@ static void start_copy(struct dm_snap_pending_exception *pe) dest.count = src.count; /* Hand over to kcopyd */ - dm_kcopyd_copy(s->kcopyd_client, - &src, 1, &dest, 0, copy_callback, pe); + dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); +} + +static void full_bio_end_io(struct bio *bio, int error) +{ + void *callback_data = bio->bi_private; + + dm_kcopyd_do_callback(callback_data, 0, error ? 1 : 0); +} + +static void start_full_bio(struct dm_snap_pending_exception *pe, + struct bio *bio) +{ + struct dm_snapshot *s = pe->snap; + void *callback_data; + + pe->full_bio = bio; + pe->full_bio_end_io = bio->bi_end_io; + pe->full_bio_private = bio->bi_private; + + callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, + copy_callback, pe); + + bio->bi_end_io = full_bio_end_io; + bio->bi_private = callback_data; + + generic_make_request(bio); } static struct dm_snap_pending_exception * @@ -1519,6 +1551,7 @@ __find_pending_exception(struct dm_snapshot *s, bio_list_init(&pe->origin_bios); bio_list_init(&pe->snapshot_bios); pe->started = 0; + pe->full_bio = NULL; if (s->store->type->prepare_exception(s->store, &pe->e)) { free_pending_exception(pe); @@ -1612,10 +1645,19 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, } remap_exception(s, &pe->e, bio, chunk); - bio_list_add(&pe->snapshot_bios, bio); r = DM_MAPIO_SUBMITTED; + if (!pe->started && + bio->bi_size == (s->store->chunk_size << SECTOR_SHIFT)) { + pe->started = 1; + up_write(&s->lock); + start_full_bio(pe, bio); + goto out; + } + + bio_list_add(&pe->snapshot_bios, bio); + if (!pe->started) { /* this is protected by snap->lock */ pe->started = 1; @@ -1628,9 +1670,9 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, map_context->ptr = track_chunk(s, chunk); } - out_unlock: +out_unlock: up_write(&s->lock); - out: +out: return r; } @@ -1974,7 +2016,7 @@ static int __origin_write(struct list_head *snapshots, sector_t sector, pe_to_start_now = pe; } - next_snapshot: +next_snapshot: up_write(&snap->lock); if (pe_to_start_now) { diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index bfe9c2333cea..986b8754bb08 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -54,7 +54,6 @@ struct dm_table { sector_t *highs; struct dm_target *targets; - unsigned discards_supported:1; unsigned integrity_supported:1; /* @@ -154,12 +153,11 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size) return NULL; size = nmemb * elem_size; - addr = vmalloc(size); - if (addr) - memset(addr, 0, size); + addr = vzalloc(size); return addr; } +EXPORT_SYMBOL(dm_vcalloc); /* * highs, and targets are managed as dynamic arrays during a @@ -209,7 +207,6 @@ int dm_table_create(struct dm_table **result, fmode_t mode, INIT_LIST_HEAD(&t->devices); INIT_LIST_HEAD(&t->target_callbacks); atomic_set(&t->holders, 0); - t->discards_supported = 1; if (!num_targets) num_targets = KEYS_PER_NODE; @@ -281,6 +278,7 @@ void dm_table_get(struct dm_table *t) { atomic_inc(&t->holders); } +EXPORT_SYMBOL(dm_table_get); void dm_table_put(struct dm_table *t) { @@ -290,6 +288,7 @@ void dm_table_put(struct dm_table *t) smp_mb__before_atomic_dec(); atomic_dec(&t->holders); } +EXPORT_SYMBOL(dm_table_put); /* * Checks to see if we need to extend highs or targets. @@ -455,13 +454,14 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, * Add a device to the list, or just increment the usage count if * it's already present. */ -static int __table_get_device(struct dm_table *t, struct dm_target *ti, - const char *path, fmode_t mode, struct dm_dev **result) +int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, + struct dm_dev **result) { int r; dev_t uninitialized_var(dev); struct dm_dev_internal *dd; unsigned int major, minor; + struct dm_table *t = ti->table; BUG_ON(!t); @@ -509,6 +509,7 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti, *result = &dd->dm_dev; return 0; } +EXPORT_SYMBOL(dm_get_device); int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) @@ -539,23 +540,15 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, * If not we'll force DM to use PAGE_SIZE or * smaller I/O, just to be safe. */ - - if (q->merge_bvec_fn && !ti->type->merge) + if (dm_queue_merge_is_compulsory(q) && !ti->type->merge) blk_limits_max_hw_sectors(limits, (unsigned int) (PAGE_SIZE >> 9)); return 0; } EXPORT_SYMBOL_GPL(dm_set_device_limits); -int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, - struct dm_dev **result) -{ - return __table_get_device(ti->table, ti, path, mode, result); -} - - /* - * Decrement a devices use count and remove it if necessary. + * Decrement a device's use count and remove it if necessary. */ void dm_put_device(struct dm_target *ti, struct dm_dev *d) { @@ -568,6 +561,7 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d) kfree(dd); } } +EXPORT_SYMBOL(dm_put_device); /* * Checks to see if the target joins onto the end of the table. @@ -791,8 +785,9 @@ int dm_table_add_target(struct dm_table *t, const char *type, t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; - if (!tgt->num_discard_requests) - t->discards_supported = 0; + if (!tgt->num_discard_requests && tgt->discards_supported) + DMWARN("%s: %s: ignoring discards_supported because num_discard_requests is zero.", + dm_device_name(t->md), type); return 0; @@ -802,6 +797,63 @@ int dm_table_add_target(struct dm_table *t, const char *type, return r; } +/* + * Target argument parsing helpers. + */ +static int validate_next_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, + unsigned *value, char **error, unsigned grouped) +{ + const char *arg_str = dm_shift_arg(arg_set); + + if (!arg_str || + (sscanf(arg_str, "%u", value) != 1) || + (*value < arg->min) || + (*value > arg->max) || + (grouped && arg_set->argc < *value)) { + *error = arg->error; + return -EINVAL; + } + + return 0; +} + +int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, + unsigned *value, char **error) +{ + return validate_next_arg(arg, arg_set, value, error, 0); +} +EXPORT_SYMBOL(dm_read_arg); + +int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set, + unsigned *value, char **error) +{ + return validate_next_arg(arg, arg_set, value, error, 1); +} +EXPORT_SYMBOL(dm_read_arg_group); + +const char *dm_shift_arg(struct dm_arg_set *as) +{ + char *r; + + if (as->argc) { + as->argc--; + r = *as->argv; + as->argv++; + return r; + } + + return NULL; +} +EXPORT_SYMBOL(dm_shift_arg); + +void dm_consume_args(struct dm_arg_set *as, unsigned num_args) +{ + BUG_ON(as->argc < num_args); + as->argc -= num_args; + as->argv += num_args; +} +EXPORT_SYMBOL(dm_consume_args); + static int dm_table_set_type(struct dm_table *t) { unsigned i; @@ -1077,11 +1129,13 @@ void dm_table_event(struct dm_table *t) t->event_fn(t->event_context); mutex_unlock(&_event_lock); } +EXPORT_SYMBOL(dm_table_event); sector_t dm_table_get_size(struct dm_table *t) { return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0; } +EXPORT_SYMBOL(dm_table_get_size); struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index) { @@ -1194,9 +1248,45 @@ static void dm_table_set_integrity(struct dm_table *t) blk_get_integrity(template_disk)); } +static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + unsigned flush = (*(unsigned *)data); + struct request_queue *q = bdev_get_queue(dev->bdev); + + return q && (q->flush_flags & flush); +} + +static bool dm_table_supports_flush(struct dm_table *t, unsigned flush) +{ + struct dm_target *ti; + unsigned i = 0; + + /* + * Require at least one underlying device to support flushes. + * t->devices includes internal dm devices such as mirror logs + * so we need to use iterate_devices here, which targets + * supporting flushes must provide. + */ + while (i < dm_table_get_num_targets(t)) { + ti = dm_table_get_target(t, i++); + + if (!ti->num_flush_requests) + continue; + + if (ti->type->iterate_devices && + ti->type->iterate_devices(ti, device_flush_capable, &flush)) + return 1; + } + + return 0; +} + void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { + unsigned flush = 0; + /* * Copy table's limits to the DM device's request_queue */ @@ -1207,6 +1297,13 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, else queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); + if (dm_table_supports_flush(t, REQ_FLUSH)) { + flush |= REQ_FLUSH; + if (dm_table_supports_flush(t, REQ_FUA)) + flush |= REQ_FUA; + } + blk_queue_flush(q, flush); + dm_table_set_integrity(t); /* @@ -1237,6 +1334,7 @@ fmode_t dm_table_get_mode(struct dm_table *t) { return t->mode; } +EXPORT_SYMBOL(dm_table_get_mode); static void suspend_targets(struct dm_table *t, unsigned postsuspend) { @@ -1345,6 +1443,7 @@ struct mapped_device *dm_table_get_md(struct dm_table *t) { return t->md; } +EXPORT_SYMBOL(dm_table_get_md); static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) @@ -1359,19 +1458,19 @@ bool dm_table_supports_discards(struct dm_table *t) struct dm_target *ti; unsigned i = 0; - if (!t->discards_supported) - return 0; - /* * Unless any target used by the table set discards_supported, * require at least one underlying device to support discards. * t->devices includes internal dm devices such as mirror logs * so we need to use iterate_devices here, which targets - * supporting discard must provide. + * supporting discard selectively must provide. */ while (i < dm_table_get_num_targets(t)) { ti = dm_table_get_target(t, i++); + if (!ti->num_discard_requests) + continue; + if (ti->discards_supported) return 1; @@ -1382,13 +1481,3 @@ bool dm_table_supports_discards(struct dm_table *t) return 0; } - -EXPORT_SYMBOL(dm_vcalloc); -EXPORT_SYMBOL(dm_get_device); -EXPORT_SYMBOL(dm_put_device); -EXPORT_SYMBOL(dm_table_event); -EXPORT_SYMBOL(dm_table_get_size); -EXPORT_SYMBOL(dm_table_get_mode); -EXPORT_SYMBOL(dm_table_get_md); -EXPORT_SYMBOL(dm_table_put); -EXPORT_SYMBOL(dm_table_get); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0cf68b478878..52b39f335bb3 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -37,6 +37,8 @@ static const char *_name = DM_NAME; static unsigned int major = 0; static unsigned int _major = 0; +static DEFINE_IDR(_minor_idr); + static DEFINE_SPINLOCK(_minor_lock); /* * For bio-based dm. @@ -109,6 +111,7 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); #define DMF_FREEING 3 #define DMF_DELETING 4 #define DMF_NOFLUSH_SUSPENDING 5 +#define DMF_MERGE_IS_OPTIONAL 6 /* * Work processed by per-device workqueue. @@ -313,6 +316,12 @@ static void __exit dm_exit(void) while (i--) _exits[i](); + + /* + * Should be empty by this point. + */ + idr_remove_all(&_minor_idr); + idr_destroy(&_minor_idr); } /* @@ -1171,7 +1180,8 @@ static int __clone_and_map_discard(struct clone_info *ci) /* * Even though the device advertised discard support, - * reconfiguration might have changed that since the + * that does not mean every target supports it, and + * reconfiguration might also have changed that since the * check was performed. */ if (!ti->num_discard_requests) @@ -1705,8 +1715,6 @@ static int dm_any_congested(void *congested_data, int bdi_bits) /*----------------------------------------------------------------- * An IDR is used to keep track of allocated minor numbers. *---------------------------------------------------------------*/ -static DEFINE_IDR(_minor_idr); - static void free_minor(int minor) { spin_lock(&_minor_lock); @@ -1800,7 +1808,6 @@ static void dm_init_md_queue(struct mapped_device *md) blk_queue_make_request(md->queue, dm_request); blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); blk_queue_merge_bvec(md->queue, dm_merge_bvec); - blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA); } /* @@ -1986,6 +1993,59 @@ static void __set_size(struct mapped_device *md, sector_t size) } /* + * Return 1 if the queue has a compulsory merge_bvec_fn function. + * + * If this function returns 0, then the device is either a non-dm + * device without a merge_bvec_fn, or it is a dm device that is + * able to split any bios it receives that are too big. + */ +int dm_queue_merge_is_compulsory(struct request_queue *q) +{ + struct mapped_device *dev_md; + + if (!q->merge_bvec_fn) + return 0; + + if (q->make_request_fn == dm_request) { + dev_md = q->queuedata; + if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags)) + return 0; + } + + return 1; +} + +static int dm_device_merge_is_compulsory(struct dm_target *ti, + struct dm_dev *dev, sector_t start, + sector_t len, void *data) +{ + struct block_device *bdev = dev->bdev; + struct request_queue *q = bdev_get_queue(bdev); + + return dm_queue_merge_is_compulsory(q); +} + +/* + * Return 1 if it is acceptable to ignore merge_bvec_fn based + * on the properties of the underlying devices. + */ +static int dm_table_merge_is_optional(struct dm_table *table) +{ + unsigned i = 0; + struct dm_target *ti; + + while (i < dm_table_get_num_targets(table)) { + ti = dm_table_get_target(table, i++); + + if (ti->type->iterate_devices && + ti->type->iterate_devices(ti, dm_device_merge_is_compulsory, NULL)) + return 0; + } + + return 1; +} + +/* * Returns old map, which caller must destroy. */ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, @@ -1995,6 +2055,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, struct request_queue *q = md->queue; sector_t size; unsigned long flags; + int merge_is_optional; size = dm_table_get_size(t); @@ -2020,10 +2081,16 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, __bind_mempools(md, t); + merge_is_optional = dm_table_merge_is_optional(t); + write_lock_irqsave(&md->map_lock, flags); old_map = md->map; md->map = t; dm_table_set_restrictions(t, q, limits); + if (merge_is_optional) + set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); + else + clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); write_unlock_irqrestore(&md->map_lock, flags); return old_map; diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 1aaf16746da8..6745dbd278a4 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -66,6 +66,8 @@ int dm_table_alloc_md_mempools(struct dm_table *t); void dm_table_free_md_mempools(struct dm_table *t); struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); +int dm_queue_merge_is_compulsory(struct request_queue *q); + void dm_lock_md_type(struct mapped_device *md); void dm_unlock_md_type(struct mapped_device *md); void dm_set_md_type(struct mapped_device *md, unsigned type); diff --git a/drivers/of/address.c b/drivers/of/address.c index da1f4b9605df..72c33fbe451d 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -610,6 +610,6 @@ void __iomem *of_iomap(struct device_node *np, int index) if (of_address_to_resource(np, index, &res)) return NULL; - return ioremap(res.start, 1 + res.end - res.start); + return ioremap(res.start, resource_size(&res)); } EXPORT_SYMBOL(of_iomap); diff --git a/drivers/of/base.c b/drivers/of/base.c index 02ed36719def..fb28b5af733b 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -17,14 +17,39 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ +#include <linux/ctype.h> #include <linux/module.h> #include <linux/of.h> #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/proc_fs.h> +/** + * struct alias_prop - Alias property in 'aliases' node + * @link: List node to link the structure in aliases_lookup list + * @alias: Alias property name + * @np: Pointer to device_node that the alias stands for + * @id: Index value from end of alias name + * @stem: Alias string without the index + * + * The structure represents one alias property of 'aliases' node as + * an entry in aliases_lookup list. + */ +struct alias_prop { + struct list_head link; + const char *alias; + struct device_node *np; + int id; + char stem[0]; +}; + +static LIST_HEAD(aliases_lookup); + struct device_node *allnodes; struct device_node *of_chosen; +struct device_node *of_aliases; + +static DEFINE_MUTEX(of_aliases_mutex); /* use when traversing tree through the allnext, child, sibling, * or parent members of struct device_node. @@ -610,8 +635,9 @@ EXPORT_SYMBOL(of_find_node_by_phandle); * * The out_value is modified only if a valid u32 value can be decoded. */ -int of_property_read_u32_array(const struct device_node *np, char *propname, - u32 *out_values, size_t sz) +int of_property_read_u32_array(const struct device_node *np, + const char *propname, u32 *out_values, + size_t sz) { struct property *prop = of_find_property(np, propname, NULL); const __be32 *val; @@ -645,7 +671,7 @@ EXPORT_SYMBOL_GPL(of_property_read_u32_array); * * The out_string pointer is modified only if a valid string can be decoded. */ -int of_property_read_string(struct device_node *np, char *propname, +int of_property_read_string(struct device_node *np, const char *propname, const char **out_string) { struct property *prop = of_find_property(np, propname, NULL); @@ -987,3 +1013,108 @@ out_unlock: } #endif /* defined(CONFIG_OF_DYNAMIC) */ +static void of_alias_add(struct alias_prop *ap, struct device_node *np, + int id, const char *stem, int stem_len) +{ + ap->id = id; + ap->np = np; + strncpy(ap->stem, stem, stem_len); + ap->stem[stem_len] = 0; + list_add_tail(&ap->link, &aliases_lookup); + pr_debug("adding DT alias:%s: stem=%s id=%i node=%s\n", + ap->alias, ap->stem, ap->id, np ? np->full_name : NULL); +} + +/** + * of_alias_scan() - Scan all properties of 'aliases' node + * + * The function scans all the properties of 'aliases' node and populate + * the global lookup table with the properties. It returns the + * number of alias_prop found, or error code in error case. + */ +__init void of_alias_scan(void) +{ + struct property *pp; + + if (!of_aliases) + return; + + for_each_property(pp, of_aliases->properties) { + const char *start = pp->name; + const char *end = start + strlen(start); + struct device_node *np; + struct alias_prop *ap; + int id, len; + + /* Skip those we do not want to proceed */ + if (!strcmp(pp->name, "name") || + !strcmp(pp->name, "phandle") || + !strcmp(pp->name, "linux,phandle")) + continue; + + np = of_find_node_by_path(pp->value); + if (!np) + continue; + + /* walk alias backwards to extract the id and 'stem' string */ + while (isdigit(*(end-1)) && end > start) + end--; + len = end - start; + id = strlen(end) ? simple_strtoul(end, NULL, 10) : -1; + + /* Allocate an alias_prop with enough space for the stem */ + ap = early_init_dt_alloc_memory_arch(sizeof(*ap) + len + 1, 4); + if (!ap) + continue; + ap->alias = start; + of_alias_add(ap, np, id, start, len); + } +} + +/** + * of_alias_get_id() - Get alias id for the given device_node + * @np: Pointer to the given device_node + * @stem: Alias stem of the given device_node + * + * The function travels the lookup table to get alias id for the given + * device_node and alias stem. It returns the alias id if find it. + * If not, dynamically creates one in the lookup table and returns it, + * or returns error code if fail to create. + */ +int of_alias_get_id(struct device_node *np, const char *stem) +{ + struct alias_prop *app; + int id = 0; + bool found = false; + + mutex_lock(&of_aliases_mutex); + list_for_each_entry(app, &aliases_lookup, link) { + if (strcmp(app->stem, stem) != 0) + continue; + + if (np == app->np) { + found = true; + id = app->id; + break; + } + + if (id <= app->id) + id = app->id + 1; + } + + /* If an id is not found, then allocate a new one */ + if (!found) { + app = kzalloc(sizeof(*app) + strlen(stem) + 1, 4); + if (!app) { + id = -ENODEV; + goto out; + } + of_alias_add(app, np, id, stem, strlen(stem)); + } + + out: + mutex_unlock(&of_aliases_mutex); + + return id; +} +EXPORT_SYMBOL_GPL(of_alias_get_id); diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 65200af29c52..13d6d3a96b31 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -707,10 +707,12 @@ void __init unflatten_device_tree(void) __unflatten_device_tree(initial_boot_params, &allnodes, early_init_dt_alloc_memory_arch); - /* Get pointer to OF "/chosen" node for use everywhere */ + /* Get pointer to "/chosen" and "/aliasas" nodes for use everywhere */ of_chosen = of_find_node_by_path("/chosen"); if (of_chosen == NULL) of_chosen = of_find_node_by_path("/chosen@0"); + of_aliases = of_find_node_by_path("/aliases"); + of_alias_scan(); } #endif /* CONFIG_OF_EARLY_FLATTREE */ diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index a70fa89f76fd..220285760b68 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -110,7 +110,7 @@ static int post_dock_fixups(struct notifier_block *nb, unsigned long val, } -static struct acpi_dock_ops acpiphp_dock_ops = { +static const struct acpi_dock_ops acpiphp_dock_ops = { .handler = handle_hotplug_event_func, }; diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index bcae8dd41496..7789002bdd5c 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -368,7 +368,7 @@ static int __init omap_rtc_probe(struct platform_device *pdev) pr_info("%s: already running\n", pdev->name); /* force to 24 hour mode */ - new_ctrl = reg & ~(OMAP_RTC_CTRL_SPLIT|OMAP_RTC_CTRL_AUTO_COMP); + new_ctrl = reg & (OMAP_RTC_CTRL_SPLIT|OMAP_RTC_CTRL_AUTO_COMP); new_ctrl |= OMAP_RTC_CTRL_STOP; /* BOARD-SPECIFIC CUSTOMIZATION CAN GO HERE: diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c index eba88c749fb1..730b4a37b823 100644 --- a/drivers/spi/spi-pl022.c +++ b/drivers/spi/spi-pl022.c @@ -2267,17 +2267,13 @@ static int __devexit pl022_remove(struct amba_device *adev) { struct pl022 *pl022 = amba_get_drvdata(adev); - int status = 0; + if (!pl022) return 0; /* Remove the queue */ - status = destroy_queue(pl022); - if (status != 0) { - dev_err(&adev->dev, - "queue remove failed (%d)\n", status); - return status; - } + if (destroy_queue(pl022) != 0) + dev_err(&adev->dev, "queue remove failed\n"); load_ssp_default_config(pl022); pl022_dma_remove(pl022); free_irq(adev->irq[0], pl022); @@ -2289,7 +2285,6 @@ pl022_remove(struct amba_device *adev) spi_unregister_master(pl022->master); spi_master_put(pl022->master); amba_set_drvdata(adev, NULL); - dev_dbg(&adev->dev, "remove succeeded\n"); return 0; } diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig index 564ff4e0dbc4..8345fb457a40 100644 --- a/drivers/target/iscsi/Kconfig +++ b/drivers/target/iscsi/Kconfig @@ -1,5 +1,6 @@ config ISCSI_TARGET tristate "Linux-iSCSI.org iSCSI Target Mode Stack" + depends on NET select CRYPTO select CRYPTO_CRC32C select CRYPTO_CRC32C_INTEL if X86 diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 14c81c4265bd..c24fb10de60b 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -120,7 +120,7 @@ struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *buf) struct iscsi_tiqn *tiqn = NULL; int ret; - if (strlen(buf) > ISCSI_IQN_LEN) { + if (strlen(buf) >= ISCSI_IQN_LEN) { pr_err("Target IQN exceeds %d bytes\n", ISCSI_IQN_LEN); return ERR_PTR(-EINVAL); @@ -1857,7 +1857,7 @@ static int iscsit_handle_text_cmd( char *text_ptr, *text_in; int cmdsn_ret, niov = 0, rx_got, rx_size; u32 checksum = 0, data_crc = 0, payload_length; - u32 padding = 0, text_length = 0; + u32 padding = 0, pad_bytes = 0, text_length = 0; struct iscsi_cmd *cmd; struct kvec iov[3]; struct iscsi_text *hdr; @@ -1896,7 +1896,7 @@ static int iscsit_handle_text_cmd( padding = ((-payload_length) & 3); if (padding != 0) { - iov[niov].iov_base = cmd->pad_bytes; + iov[niov].iov_base = &pad_bytes; iov[niov++].iov_len = padding; rx_size += padding; pr_debug("Receiving %u additional bytes" @@ -1917,7 +1917,7 @@ static int iscsit_handle_text_cmd( if (conn->conn_ops->DataDigest) { iscsit_do_crypto_hash_buf(&conn->conn_rx_hash, text_in, text_length, - padding, cmd->pad_bytes, + padding, (u8 *)&pad_bytes, (u8 *)&data_crc); if (checksum != data_crc) { @@ -3468,7 +3468,12 @@ static inline void iscsit_thread_check_cpumask( } #else -#define iscsit_thread_get_cpumask(X) ({}) + +void iscsit_thread_get_cpumask(struct iscsi_conn *conn) +{ + return; +} + #define iscsit_thread_check_cpumask(X, Y, Z) ({}) #endif /* CONFIG_SMP */ diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index 32bb92c44450..f095e65b1ccf 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -181,7 +181,7 @@ struct se_tpg_np *lio_target_call_addnptotpg( return ERR_PTR(-EOVERFLOW); } memset(buf, 0, MAX_PORTAL_LEN + 1); - snprintf(buf, MAX_PORTAL_LEN, "%s", name); + snprintf(buf, MAX_PORTAL_LEN + 1, "%s", name); memset(&sockaddr, 0, sizeof(struct __kernel_sockaddr_storage)); diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 713a4d23557a..4d087ac11067 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -978,7 +978,7 @@ struct iscsi_login *iscsi_target_init_negotiation( pr_err("Unable to allocate memory for struct iscsi_login.\n"); iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_NO_RESOURCES); - goto out; + return NULL; } login->req = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index c75a01a1c475..89760329d5d0 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1747,6 +1747,8 @@ int transport_generic_handle_cdb( } EXPORT_SYMBOL(transport_generic_handle_cdb); +static void transport_generic_request_failure(struct se_cmd *, + struct se_device *, int, int); /* * Used by fabric module frontends to queue tasks directly. * Many only be used from process context only @@ -1754,6 +1756,8 @@ EXPORT_SYMBOL(transport_generic_handle_cdb); int transport_handle_cdb_direct( struct se_cmd *cmd) { + int ret; + if (!cmd->se_lun) { dump_stack(); pr_err("cmd->se_lun is NULL\n"); @@ -1765,8 +1769,31 @@ int transport_handle_cdb_direct( " from interrupt context\n"); return -EINVAL; } - - return transport_generic_new_cmd(cmd); + /* + * Set TRANSPORT_NEW_CMD state and cmd->t_transport_active=1 following + * transport_generic_handle_cdb*() -> transport_add_cmd_to_queue() + * in existing usage to ensure that outstanding descriptors are handled + * correctly during shutdown via transport_generic_wait_for_tasks() + * + * Also, we don't take cmd->t_state_lock here as we only expect + * this to be called for initial descriptor submission. + */ + cmd->t_state = TRANSPORT_NEW_CMD; + atomic_set(&cmd->t_transport_active, 1); + /* + * transport_generic_new_cmd() is already handling QUEUE_FULL, + * so follow TRANSPORT_NEW_CMD processing thread context usage + * and call transport_generic_request_failure() if necessary.. + */ + ret = transport_generic_new_cmd(cmd); + if (ret == -EAGAIN) + return 0; + else if (ret < 0) { + cmd->transport_error_status = ret; + transport_generic_request_failure(cmd, NULL, 0, + (cmd->data_direction != DMA_TO_DEVICE)); + } + return 0; } EXPORT_SYMBOL(transport_handle_cdb_direct); @@ -3324,7 +3351,7 @@ static int transport_generic_cmd_sequencer( goto out_invalid_cdb_field; } - cmd->t_task_lba = get_unaligned_be16(&cdb[2]); + cmd->t_task_lba = get_unaligned_be64(&cdb[2]); passthrough = (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV); /* diff --git a/drivers/target/tcm_fc/tcm_fc.h b/drivers/target/tcm_fc/tcm_fc.h index f7fff7ed63c3..bd4fe21a23b8 100644 --- a/drivers/target/tcm_fc/tcm_fc.h +++ b/drivers/target/tcm_fc/tcm_fc.h @@ -187,4 +187,9 @@ void ft_dump_cmd(struct ft_cmd *, const char *caller); ssize_t ft_format_wwn(char *, size_t, u64); +/* + * Underlying HW specific helper function + */ +void ft_invl_hw_context(struct ft_cmd *); + #endif /* __TCM_FC_H__ */ diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c index 09df38b4610c..5654dc22f7ae 100644 --- a/drivers/target/tcm_fc/tfc_cmd.c +++ b/drivers/target/tcm_fc/tfc_cmd.c @@ -320,6 +320,7 @@ static void ft_recv_seq(struct fc_seq *sp, struct fc_frame *fp, void *arg) default: pr_debug("%s: unhandled frame r_ctl %x\n", __func__, fh->fh_r_ctl); + ft_invl_hw_context(cmd); fc_frame_free(fp); transport_generic_free_cmd(&cmd->se_cmd, 0, 0); break; diff --git a/drivers/target/tcm_fc/tfc_io.c b/drivers/target/tcm_fc/tfc_io.c index 8e2a46ddcccb..c37f4cd96452 100644 --- a/drivers/target/tcm_fc/tfc_io.c +++ b/drivers/target/tcm_fc/tfc_io.c @@ -213,62 +213,49 @@ void ft_recv_write_data(struct ft_cmd *cmd, struct fc_frame *fp) if (!(ntoh24(fh->fh_f_ctl) & FC_FC_REL_OFF)) goto drop; + f_ctl = ntoh24(fh->fh_f_ctl); + ep = fc_seq_exch(seq); + lport = ep->lp; + if (cmd->was_ddp_setup) { + BUG_ON(!ep); + BUG_ON(!lport); + } + /* - * Doesn't expect even single byte of payload. Payload + * Doesn't expect payload if DDP is setup. Payload * is expected to be copied directly to user buffers - * due to DDP (Large Rx offload) feature, hence - * BUG_ON if BUF is non-NULL + * due to DDP (Large Rx offload), */ buf = fc_frame_payload_get(fp, 1); - if (cmd->was_ddp_setup && buf) { - pr_debug("%s: When DDP was setup, not expected to" - "receive frame with payload, Payload shall be" - "copied directly to buffer instead of coming " - "via. legacy receive queues\n", __func__); - BUG_ON(buf); - } + if (buf) + pr_err("%s: xid 0x%x, f_ctl 0x%x, cmd->sg %p, " + "cmd->sg_cnt 0x%x. DDP was setup" + " hence not expected to receive frame with " + "payload, Frame will be dropped if " + "'Sequence Initiative' bit in f_ctl is " + "not set\n", __func__, ep->xid, f_ctl, + cmd->sg, cmd->sg_cnt); + /* + * Invalidate HW DDP context if it was setup for respective + * command. Invalidation of HW DDP context is requited in both + * situation (success and error). + */ + ft_invl_hw_context(cmd); /* - * If ft_cmd indicated 'ddp_setup', in that case only the last frame - * should come with 'TSI bit being set'. If 'TSI bit is not set and if - * data frame appears here, means error condition. In both the cases - * release the DDP context (ddp_put) and in error case, as well - * initiate error recovery mechanism. + * If "Sequence Initiative (TSI)" bit set in f_ctl, means last + * write data frame is received successfully where payload is + * posted directly to user buffer and only the last frame's + * header is posted in receive queue. + * + * If "Sequence Initiative (TSI)" bit is not set, means error + * condition w.r.t. DDP, hence drop the packet and let explict + * ABORTS from other end of exchange timer trigger the recovery. */ - ep = fc_seq_exch(seq); - if (cmd->was_ddp_setup) { - BUG_ON(!ep); - lport = ep->lp; - BUG_ON(!lport); - } - if (cmd->was_ddp_setup && ep->xid != FC_XID_UNKNOWN) { - f_ctl = ntoh24(fh->fh_f_ctl); - /* - * If TSI bit set in f_ctl, means last write data frame is - * received successfully where payload is posted directly - * to user buffer and only the last frame's header is posted - * in legacy receive queue - */ - if (f_ctl & FC_FC_SEQ_INIT) { /* TSI bit set in FC frame */ - cmd->write_data_len = lport->tt.ddp_done(lport, - ep->xid); - goto last_frame; - } else { - /* - * Updating the write_data_len may be meaningless at - * this point, but just in case if required in future - * for debugging or any other purpose - */ - pr_err("%s: Received frame with TSI bit not" - " being SET, dropping the frame, " - "cmd->sg <%p>, cmd->sg_cnt <0x%x>\n", - __func__, cmd->sg, cmd->sg_cnt); - cmd->write_data_len = lport->tt.ddp_done(lport, - ep->xid); - lport->tt.seq_exch_abort(cmd->seq, 0); - goto drop; - } - } + if (f_ctl & FC_FC_SEQ_INIT) + goto last_frame; + else + goto drop; rel_off = ntohl(fh->fh_parm_offset); frame_len = fr_len(fp); @@ -331,3 +318,39 @@ last_frame: drop: fc_frame_free(fp); } + +/* + * Handle and cleanup any HW specific resources if + * received ABORTS, errors, timeouts. + */ +void ft_invl_hw_context(struct ft_cmd *cmd) +{ + struct fc_seq *seq = cmd->seq; + struct fc_exch *ep = NULL; + struct fc_lport *lport = NULL; + + BUG_ON(!cmd); + + /* Cleanup the DDP context in HW if DDP was setup */ + if (cmd->was_ddp_setup && seq) { + ep = fc_seq_exch(seq); + if (ep) { + lport = ep->lp; + if (lport && (ep->xid <= lport->lro_xid)) + /* + * "ddp_done" trigger invalidation of HW + * specific DDP context + */ + cmd->write_data_len = lport->tt.ddp_done(lport, + ep->xid); + + /* + * Resetting same variable to indicate HW's + * DDP context has been invalidated to avoid + * re_invalidation of same context (context is + * identified using ep->xid) + */ + cmd->was_ddp_setup = 0; + } + } +} diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index bf7c687519ef..f7f71b2d3101 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -14,11 +14,7 @@ menuconfig THERMAL If you want this support, you should say Y or M here. config THERMAL_HWMON - bool "Hardware monitoring support" + bool depends on THERMAL depends on HWMON=y || HWMON=THERMAL - help - The generic thermal sysfs driver's hardware monitoring support - requires a 2.10.7/3.0.2 or later lm-sensors userspace. - - Say Y if your user-space is new enough. + default y diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index 0b1c82ad6805..708f8e92771a 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -420,6 +420,29 @@ thermal_cooling_device_trip_point_show(struct device *dev, /* hwmon sys I/F */ #include <linux/hwmon.h> + +/* thermal zone devices with the same type share one hwmon device */ +struct thermal_hwmon_device { + char type[THERMAL_NAME_LENGTH]; + struct device *device; + int count; + struct list_head tz_list; + struct list_head node; +}; + +struct thermal_hwmon_attr { + struct device_attribute attr; + char name[16]; +}; + +/* one temperature input for each thermal zone */ +struct thermal_hwmon_temp { + struct list_head hwmon_node; + struct thermal_zone_device *tz; + struct thermal_hwmon_attr temp_input; /* hwmon sys attr */ + struct thermal_hwmon_attr temp_crit; /* hwmon sys attr */ +}; + static LIST_HEAD(thermal_hwmon_list); static ssize_t @@ -437,9 +460,10 @@ temp_input_show(struct device *dev, struct device_attribute *attr, char *buf) int ret; struct thermal_hwmon_attr *hwmon_attr = container_of(attr, struct thermal_hwmon_attr, attr); - struct thermal_zone_device *tz - = container_of(hwmon_attr, struct thermal_zone_device, + struct thermal_hwmon_temp *temp + = container_of(hwmon_attr, struct thermal_hwmon_temp, temp_input); + struct thermal_zone_device *tz = temp->tz; ret = tz->ops->get_temp(tz, &temperature); @@ -455,9 +479,10 @@ temp_crit_show(struct device *dev, struct device_attribute *attr, { struct thermal_hwmon_attr *hwmon_attr = container_of(attr, struct thermal_hwmon_attr, attr); - struct thermal_zone_device *tz - = container_of(hwmon_attr, struct thermal_zone_device, + struct thermal_hwmon_temp *temp + = container_of(hwmon_attr, struct thermal_hwmon_temp, temp_crit); + struct thermal_zone_device *tz = temp->tz; long temperature; int ret; @@ -469,22 +494,54 @@ temp_crit_show(struct device *dev, struct device_attribute *attr, } -static int -thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) +static struct thermal_hwmon_device * +thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz) { struct thermal_hwmon_device *hwmon; - int new_hwmon_device = 1; - int result; mutex_lock(&thermal_list_lock); list_for_each_entry(hwmon, &thermal_hwmon_list, node) if (!strcmp(hwmon->type, tz->type)) { - new_hwmon_device = 0; mutex_unlock(&thermal_list_lock); - goto register_sys_interface; + return hwmon; + } + mutex_unlock(&thermal_list_lock); + + return NULL; +} + +/* Find the temperature input matching a given thermal zone */ +static struct thermal_hwmon_temp * +thermal_hwmon_lookup_temp(const struct thermal_hwmon_device *hwmon, + const struct thermal_zone_device *tz) +{ + struct thermal_hwmon_temp *temp; + + mutex_lock(&thermal_list_lock); + list_for_each_entry(temp, &hwmon->tz_list, hwmon_node) + if (temp->tz == tz) { + mutex_unlock(&thermal_list_lock); + return temp; } mutex_unlock(&thermal_list_lock); + return NULL; +} + +static int +thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) +{ + struct thermal_hwmon_device *hwmon; + struct thermal_hwmon_temp *temp; + int new_hwmon_device = 1; + int result; + + hwmon = thermal_hwmon_lookup_by_type(tz); + if (hwmon) { + new_hwmon_device = 0; + goto register_sys_interface; + } + hwmon = kzalloc(sizeof(struct thermal_hwmon_device), GFP_KERNEL); if (!hwmon) return -ENOMEM; @@ -502,30 +559,36 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) goto free_mem; register_sys_interface: - tz->hwmon = hwmon; + temp = kzalloc(sizeof(struct thermal_hwmon_temp), GFP_KERNEL); + if (!temp) { + result = -ENOMEM; + goto unregister_name; + } + + temp->tz = tz; hwmon->count++; - snprintf(tz->temp_input.name, THERMAL_NAME_LENGTH, + snprintf(temp->temp_input.name, THERMAL_NAME_LENGTH, "temp%d_input", hwmon->count); - tz->temp_input.attr.attr.name = tz->temp_input.name; - tz->temp_input.attr.attr.mode = 0444; - tz->temp_input.attr.show = temp_input_show; - sysfs_attr_init(&tz->temp_input.attr.attr); - result = device_create_file(hwmon->device, &tz->temp_input.attr); + temp->temp_input.attr.attr.name = temp->temp_input.name; + temp->temp_input.attr.attr.mode = 0444; + temp->temp_input.attr.show = temp_input_show; + sysfs_attr_init(&temp->temp_input.attr.attr); + result = device_create_file(hwmon->device, &temp->temp_input.attr); if (result) - goto unregister_name; + goto free_temp_mem; if (tz->ops->get_crit_temp) { unsigned long temperature; if (!tz->ops->get_crit_temp(tz, &temperature)) { - snprintf(tz->temp_crit.name, THERMAL_NAME_LENGTH, + snprintf(temp->temp_crit.name, THERMAL_NAME_LENGTH, "temp%d_crit", hwmon->count); - tz->temp_crit.attr.attr.name = tz->temp_crit.name; - tz->temp_crit.attr.attr.mode = 0444; - tz->temp_crit.attr.show = temp_crit_show; - sysfs_attr_init(&tz->temp_crit.attr.attr); + temp->temp_crit.attr.attr.name = temp->temp_crit.name; + temp->temp_crit.attr.attr.mode = 0444; + temp->temp_crit.attr.show = temp_crit_show; + sysfs_attr_init(&temp->temp_crit.attr.attr); result = device_create_file(hwmon->device, - &tz->temp_crit.attr); + &temp->temp_crit.attr); if (result) goto unregister_input; } @@ -534,13 +597,15 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) mutex_lock(&thermal_list_lock); if (new_hwmon_device) list_add_tail(&hwmon->node, &thermal_hwmon_list); - list_add_tail(&tz->hwmon_node, &hwmon->tz_list); + list_add_tail(&temp->hwmon_node, &hwmon->tz_list); mutex_unlock(&thermal_list_lock); return 0; unregister_input: - device_remove_file(hwmon->device, &tz->temp_input.attr); + device_remove_file(hwmon->device, &temp->temp_input.attr); + free_temp_mem: + kfree(temp); unregister_name: if (new_hwmon_device) { device_remove_file(hwmon->device, &dev_attr_name); @@ -556,15 +621,30 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) static void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) { - struct thermal_hwmon_device *hwmon = tz->hwmon; + struct thermal_hwmon_device *hwmon; + struct thermal_hwmon_temp *temp; + + hwmon = thermal_hwmon_lookup_by_type(tz); + if (unlikely(!hwmon)) { + /* Should never happen... */ + dev_dbg(&tz->device, "hwmon device lookup failed!\n"); + return; + } + + temp = thermal_hwmon_lookup_temp(hwmon, tz); + if (unlikely(!temp)) { + /* Should never happen... */ + dev_dbg(&tz->device, "temperature input lookup failed!\n"); + return; + } - tz->hwmon = NULL; - device_remove_file(hwmon->device, &tz->temp_input.attr); + device_remove_file(hwmon->device, &temp->temp_input.attr); if (tz->ops->get_crit_temp) - device_remove_file(hwmon->device, &tz->temp_crit.attr); + device_remove_file(hwmon->device, &temp->temp_crit.attr); mutex_lock(&thermal_list_lock); - list_del(&tz->hwmon_node); + list_del(&temp->hwmon_node); + kfree(temp); if (!list_empty(&hwmon->tz_list)) { mutex_unlock(&thermal_list_lock); return; diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig index 69407e72aac1..278aeaa92505 100644 --- a/drivers/video/backlight/Kconfig +++ b/drivers/video/backlight/Kconfig @@ -336,7 +336,7 @@ config BACKLIGHT_PCF50633 enable its driver. config BACKLIGHT_AAT2870 - bool "AnalogicTech AAT2870 Backlight" + tristate "AnalogicTech AAT2870 Backlight" depends on BACKLIGHT_CLASS_DEVICE && MFD_AAT2870_CORE help If you have a AnalogicTech AAT2870 say Y to enable the diff --git a/drivers/video/backlight/aat2870_bl.c b/drivers/video/backlight/aat2870_bl.c index 4952a617563d..331f1ef1dad5 100644 --- a/drivers/video/backlight/aat2870_bl.c +++ b/drivers/video/backlight/aat2870_bl.c @@ -44,7 +44,7 @@ static inline int aat2870_brightness(struct aat2870_bl_driver_data *aat2870_bl, struct backlight_device *bd = aat2870_bl->bd; int val; - val = brightness * aat2870_bl->max_current; + val = brightness * (aat2870_bl->max_current - 1); val /= bd->props.max_brightness; return val; @@ -158,10 +158,10 @@ static int aat2870_bl_probe(struct platform_device *pdev) props.type = BACKLIGHT_RAW; bd = backlight_device_register("aat2870-backlight", &pdev->dev, aat2870_bl, &aat2870_bl_ops, &props); - if (!bd) { + if (IS_ERR(bd)) { dev_err(&pdev->dev, "Failed allocate memory for backlight device\n"); - ret = -ENOMEM; + ret = PTR_ERR(bd); goto out_kfree; } @@ -175,7 +175,7 @@ static int aat2870_bl_probe(struct platform_device *pdev) else aat2870_bl->channels = AAT2870_BL_CH_ALL; - if (pdata->max_brightness > 0) + if (pdata->max_current > 0) aat2870_bl->max_current = pdata->max_current; else aat2870_bl->max_current = AAT2870_CURRENT_27_9; diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index f441726ddf2b..86b0735e6aa0 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -36,9 +36,6 @@ config WATCHDOG_CORE and gives them the /dev/watchdog interface (and later also the sysfs interface). - To compile this driver as a module, choose M here: the module will - be called watchdog. - config WATCHDOG_NOWAYOUT bool "Disable watchdog shutdown on close" help diff --git a/drivers/watchdog/nv_tco.c b/drivers/watchdog/nv_tco.c index afa78a54711e..809f41c30c44 100644 --- a/drivers/watchdog/nv_tco.c +++ b/drivers/watchdog/nv_tco.c @@ -458,7 +458,15 @@ static int __devexit nv_tco_remove(struct platform_device *dev) static void nv_tco_shutdown(struct platform_device *dev) { + u32 val; + tco_timer_stop(); + + /* Some BIOSes fail the POST (once) if the NO_REBOOT flag is not + * unset during shutdown. */ + pci_read_config_dword(tco_pci, MCP51_SMBUS_SETUP_B, &val); + val &= ~MCP51_SMBUS_SETUP_B_TCO_REBOOT; + pci_write_config_dword(tco_pci, MCP51_SMBUS_SETUP_B, val); } static struct platform_driver nv_tco_driver = { diff --git a/drivers/watchdog/shwdt.c b/drivers/watchdog/shwdt.c index db84f2322d1a..a267dc078daf 100644 --- a/drivers/watchdog/shwdt.c +++ b/drivers/watchdog/shwdt.c @@ -64,7 +64,7 @@ * misses its deadline, the kernel timer will allow the WDT to overflow. */ static int clock_division_ratio = WTCSR_CKS_4096; -#define next_ping_period(cks) msecs_to_jiffies(cks - 4) +#define next_ping_period(cks) (jiffies + msecs_to_jiffies(cks - 4)) static const struct watchdog_info sh_wdt_info; static struct platform_device *sh_wdt_dev; diff --git a/fs/Kconfig b/fs/Kconfig index 19891aab9c6e..9fe0b349f4cd 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -127,14 +127,21 @@ config TMPFS_POSIX_ACL select TMPFS_XATTR select GENERIC_ACL help - POSIX Access Control Lists (ACLs) support permissions for users and - groups beyond the owner/group/world scheme. + POSIX Access Control Lists (ACLs) support additional access rights + for users and groups beyond the standard owner/group/world scheme, + and this option selects support for ACLs specifically for tmpfs + filesystems. + + If you've selected TMPFS, it's possible that you'll also need + this option as there are a number of Linux distros that require + POSIX ACL support under /dev for certain features to work properly. + For example, some distros need this feature for ALSA-related /dev + files for sound to work properly. In short, if you're not sure, + say Y. To learn more about Access Control Lists, visit the POSIX ACLs for Linux website <http://acl.bestbits.at/>. - If you don't know what Access Control Lists are, say N. - config TMPFS_XATTR bool "Tmpfs extended attributes" depends on TMPFS diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 9b72dcf1cd25..40e6ac08c21f 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -6,5 +6,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ transaction.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ - export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ + export.o tree-log.o free-space-cache.o zlib.o lzo.o \ compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o + +btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 4cc5c0164ed6..eb159aaa5a11 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -28,8 +28,6 @@ #include "btrfs_inode.h" #include "xattr.h" -#ifdef CONFIG_BTRFS_FS_POSIX_ACL - struct posix_acl *btrfs_get_acl(struct inode *inode, int type) { int size; @@ -276,18 +274,3 @@ const struct xattr_handler btrfs_xattr_acl_access_handler = { .get = btrfs_xattr_acl_get, .set = btrfs_xattr_acl_set, }; - -#else /* CONFIG_BTRFS_FS_POSIX_ACL */ - -int btrfs_acl_chmod(struct inode *inode) -{ - return 0; -} - -int btrfs_init_acl(struct btrfs_trans_handle *trans, - struct inode *inode, struct inode *dir) -{ - return 0; -} - -#endif /* CONFIG_BTRFS_FS_POSIX_ACL */ diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index bfe42b03eaf9..8ec5d86f1734 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -338,6 +338,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, u64 first_byte = disk_start; struct block_device *bdev; int ret; + int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); @@ -392,8 +393,11 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); BUG_ON(ret); - ret = btrfs_csum_one_bio(root, inode, bio, start, 1); - BUG_ON(ret); + if (!skip_sum) { + ret = btrfs_csum_one_bio(root, inode, bio, + start, 1); + BUG_ON(ret); + } ret = btrfs_map_bio(root, WRITE, bio, 0, 1); BUG_ON(ret); @@ -418,8 +422,10 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); BUG_ON(ret); - ret = btrfs_csum_one_bio(root, inode, bio, start, 1); - BUG_ON(ret); + if (!skip_sum) { + ret = btrfs_csum_one_bio(root, inode, bio, start, 1); + BUG_ON(ret); + } ret = btrfs_map_bio(root, WRITE, bio, 0, 1); BUG_ON(ret); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 365c4e1dde04..0469263e327e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2406,8 +2406,8 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key); int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); int btrfs_find_orphan_roots(struct btrfs_root *tree_root); -int btrfs_set_root_node(struct btrfs_root_item *item, - struct extent_buffer *node); +void btrfs_set_root_node(struct btrfs_root_item *item, + struct extent_buffer *node); void btrfs_check_and_init_root_item(struct btrfs_root_item *item); /* dir-item.c */ @@ -2523,6 +2523,14 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag #define PageChecked PageFsMisc #endif +/* This forces readahead on a given range of bytes in an inode */ +static inline void btrfs_force_ra(struct address_space *mapping, + struct file_ra_state *ra, struct file *file, + pgoff_t offset, unsigned long req_size) +{ + page_cache_sync_readahead(mapping, ra, file, offset, req_size); +} + struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); int btrfs_set_inode_index(struct inode *dir, u64 *index); int btrfs_unlink_inode(struct btrfs_trans_handle *trans, @@ -2551,9 +2559,6 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags); -unsigned long btrfs_force_ra(struct address_space *mapping, - struct file_ra_state *ra, struct file *file, - pgoff_t offset, pgoff_t last_index); int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); int btrfs_readpage(struct file *file, struct page *page); void btrfs_evict_inode(struct inode *inode); @@ -2648,12 +2653,21 @@ do { \ /* acl.c */ #ifdef CONFIG_BTRFS_FS_POSIX_ACL struct posix_acl *btrfs_get_acl(struct inode *inode, int type); -#else -#define btrfs_get_acl NULL -#endif int btrfs_init_acl(struct btrfs_trans_handle *trans, struct inode *inode, struct inode *dir); int btrfs_acl_chmod(struct inode *inode); +#else +#define btrfs_get_acl NULL +static inline int btrfs_init_acl(struct btrfs_trans_handle *trans, + struct inode *inode, struct inode *dir) +{ + return 0; +} +static inline int btrfs_acl_chmod(struct inode *inode) +{ + return 0; +} +#endif /* relocation.c */ int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index c360a848d97f..31d84e78129b 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -198,8 +198,6 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_key key; int ins_len = mod < 0 ? -1 : 0; int cow = mod != 0; - struct btrfs_key found_key; - struct extent_buffer *leaf; key.objectid = dir; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); @@ -209,18 +207,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); if (ret < 0) return ERR_PTR(ret); - if (ret > 0) { - if (path->slots[0] == 0) - return NULL; - path->slots[0]--; - } - - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - - if (found_key.objectid != dir || - btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY || - found_key.offset != key.offset) + if (ret > 0) return NULL; return btrfs_match_dir_item_name(root, path, name, name_len); @@ -315,8 +302,6 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, struct btrfs_key key; int ins_len = mod < 0 ? -1 : 0; int cow = mod != 0; - struct btrfs_key found_key; - struct extent_buffer *leaf; key.objectid = dir; btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); @@ -324,18 +309,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); if (ret < 0) return ERR_PTR(ret); - if (ret > 0) { - if (path->slots[0] == 0) - return NULL; - path->slots[0]--; - } - - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - - if (found_key.objectid != dir || - btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY || - found_key.offset != key.offset) + if (ret > 0) return NULL; return btrfs_match_dir_item_name(root, path, name, name_len); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4d08ed79405d..66bac226944e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -663,7 +663,9 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) struct btrfs_path *path; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; + key.objectid = start; key.offset = len; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -3272,6 +3274,9 @@ again: } ret = btrfs_alloc_chunk(trans, extent_root, flags); + if (ret < 0 && ret != -ENOSPC) + goto out; + spin_lock(&space_info->lock); if (ret) space_info->full = 1; @@ -3281,6 +3286,7 @@ again: space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; space_info->chunk_alloc = 0; spin_unlock(&space_info->lock); +out: mutex_unlock(&extent_root->fs_info->chunk_mutex); return ret; } @@ -4456,7 +4462,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, printk(KERN_ERR "umm, got %d back from search" ", was looking for %llu\n", ret, (unsigned long long)bytenr); - btrfs_print_leaf(extent_root, path->nodes[0]); + if (ret > 0) + btrfs_print_leaf(extent_root, + path->nodes[0]); } BUG_ON(ret); extent_slot = path->slots[0]; @@ -5073,7 +5081,9 @@ have_block_group: * group is does point to and try again */ if (!last_ptr_loop && last_ptr->block_group && - last_ptr->block_group != block_group) { + last_ptr->block_group != block_group && + index <= + get_block_group_index(last_ptr->block_group)) { btrfs_put_block_group(block_group); block_group = last_ptr->block_group; @@ -5501,7 +5511,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; path->leave_spinning = 1; ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, @@ -6272,10 +6283,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int level; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; wc = kzalloc(sizeof(*wc), GFP_NOFS); - BUG_ON(!wc); + if (!wc) { + btrfs_free_path(path); + return -ENOMEM; + } trans = btrfs_start_transaction(tree_root, 0); BUG_ON(IS_ERR(trans)); @@ -6538,8 +6553,6 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force) u64 min_allocable_bytes; int ret = -ENOSPC; - if (cache->ro) - return 0; /* * We need some metadata space and system metadata space for @@ -6555,6 +6568,12 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force) spin_lock(&sinfo->lock); spin_lock(&cache->lock); + + if (cache->ro) { + ret = 0; + goto out; + } + num_bytes = cache->key.offset - cache->reserved - cache->pinned - cache->bytes_super - btrfs_block_group_used(&cache->item); @@ -6568,7 +6587,7 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force) cache->ro = 1; ret = 0; } - +out: spin_unlock(&cache->lock); spin_unlock(&sinfo->lock); return ret; @@ -7183,11 +7202,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, spin_unlock(&cluster->refill_lock); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) { + ret = -ENOMEM; + goto out; + } inode = lookup_free_space_inode(root, block_group, path); if (!IS_ERR(inode)) { - btrfs_orphan_add(trans, inode); + ret = btrfs_orphan_add(trans, inode); + BUG_ON(ret); clear_nlink(inode); /* One for the block groups ref */ spin_lock(&block_group->lock); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 067b1747421b..d418164a35f1 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -254,14 +254,14 @@ static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, * * This should be called with the tree lock held. */ -static int merge_state(struct extent_io_tree *tree, - struct extent_state *state) +static void merge_state(struct extent_io_tree *tree, + struct extent_state *state) { struct extent_state *other; struct rb_node *other_node; if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) - return 0; + return; other_node = rb_prev(&state->rb_node); if (other_node) { @@ -287,19 +287,13 @@ static int merge_state(struct extent_io_tree *tree, free_extent_state(other); } } - - return 0; } -static int set_state_cb(struct extent_io_tree *tree, +static void set_state_cb(struct extent_io_tree *tree, struct extent_state *state, int *bits) { - if (tree->ops && tree->ops->set_bit_hook) { - return tree->ops->set_bit_hook(tree->mapping->host, - state, bits); - } - - return 0; + if (tree->ops && tree->ops->set_bit_hook) + tree->ops->set_bit_hook(tree->mapping->host, state, bits); } static void clear_state_cb(struct extent_io_tree *tree, @@ -309,6 +303,9 @@ static void clear_state_cb(struct extent_io_tree *tree, tree->ops->clear_bit_hook(tree->mapping->host, state, bits); } +static void set_state_bits(struct extent_io_tree *tree, + struct extent_state *state, int *bits); + /* * insert an extent_state struct into the tree. 'bits' are set on the * struct before it is inserted. @@ -324,8 +321,6 @@ static int insert_state(struct extent_io_tree *tree, int *bits) { struct rb_node *node; - int bits_to_set = *bits & ~EXTENT_CTLBITS; - int ret; if (end < start) { printk(KERN_ERR "btrfs end < start %llu %llu\n", @@ -335,13 +330,9 @@ static int insert_state(struct extent_io_tree *tree, } state->start = start; state->end = end; - ret = set_state_cb(tree, state, bits); - if (ret) - return ret; - if (bits_to_set & EXTENT_DIRTY) - tree->dirty_bytes += end - start + 1; - state->state |= bits_to_set; + set_state_bits(tree, state, bits); + node = tree_insert(&tree->state, end, &state->rb_node); if (node) { struct extent_state *found; @@ -357,13 +348,11 @@ static int insert_state(struct extent_io_tree *tree, return 0; } -static int split_cb(struct extent_io_tree *tree, struct extent_state *orig, +static void split_cb(struct extent_io_tree *tree, struct extent_state *orig, u64 split) { if (tree->ops && tree->ops->split_extent_hook) - return tree->ops->split_extent_hook(tree->mapping->host, - orig, split); - return 0; + tree->ops->split_extent_hook(tree->mapping->host, orig, split); } /* @@ -659,34 +648,25 @@ again: if (start > end) break; - if (need_resched()) { - spin_unlock(&tree->lock); - cond_resched(); - spin_lock(&tree->lock); - } + cond_resched_lock(&tree->lock); } out: spin_unlock(&tree->lock); return 0; } -static int set_state_bits(struct extent_io_tree *tree, +static void set_state_bits(struct extent_io_tree *tree, struct extent_state *state, int *bits) { - int ret; int bits_to_set = *bits & ~EXTENT_CTLBITS; - ret = set_state_cb(tree, state, bits); - if (ret) - return ret; + set_state_cb(tree, state, bits); if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { u64 range = state->end - state->start + 1; tree->dirty_bytes += range; } state->state |= bits_to_set; - - return 0; } static void cache_state(struct extent_state *state, @@ -779,9 +759,7 @@ hit_next: goto out; } - err = set_state_bits(tree, state, &bits); - if (err) - goto out; + set_state_bits(tree, state, &bits); cache_state(state, cached_state); merge_state(tree, state); @@ -830,9 +808,7 @@ hit_next: if (err) goto out; if (state->end <= end) { - err = set_state_bits(tree, state, &bits); - if (err) - goto out; + set_state_bits(tree, state, &bits); cache_state(state, cached_state); merge_state(tree, state); if (last_end == (u64)-1) @@ -893,11 +869,7 @@ hit_next: err = split_state(tree, state, prealloc, end + 1); BUG_ON(err == -EEXIST); - err = set_state_bits(tree, prealloc, &bits); - if (err) { - prealloc = NULL; - goto out; - } + set_state_bits(tree, prealloc, &bits); cache_state(prealloc, cached_state); merge_state(tree, prealloc); prealloc = NULL; @@ -1059,46 +1031,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) return 0; } -/* - * find the first offset in the io tree with 'bits' set. zero is - * returned if we find something, and *start_ret and *end_ret are - * set to reflect the state struct that was found. - * - * If nothing was found, 1 is returned, < 0 on error - */ -int find_first_extent_bit(struct extent_io_tree *tree, u64 start, - u64 *start_ret, u64 *end_ret, int bits) -{ - struct rb_node *node; - struct extent_state *state; - int ret = 1; - - spin_lock(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(tree, start); - if (!node) - goto out; - - while (1) { - state = rb_entry(node, struct extent_state, rb_node); - if (state->end >= start && (state->state & bits)) { - *start_ret = state->start; - *end_ret = state->end; - ret = 0; - break; - } - node = rb_next(node); - if (!node) - break; - } -out: - spin_unlock(&tree->lock); - return ret; -} - /* find the first state struct with 'bits' set after 'start', and * return it. tree->lock must be held. NULL will returned if * nothing was found after 'start' @@ -1131,6 +1063,30 @@ out: } /* + * find the first offset in the io tree with 'bits' set. zero is + * returned if we find something, and *start_ret and *end_ret are + * set to reflect the state struct that was found. + * + * If nothing was found, 1 is returned, < 0 on error + */ +int find_first_extent_bit(struct extent_io_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits) +{ + struct extent_state *state; + int ret = 1; + + spin_lock(&tree->lock); + state = find_first_extent_bit_state(tree, start, bits); + if (state) { + *start_ret = state->start; + *end_ret = state->end; + ret = 0; + } + spin_unlock(&tree->lock); + return ret; +} + +/* * find a contiguous range of bytes in the file marked as delalloc, not * more than 'max_bytes'. start and end are used to return the range, * @@ -2546,7 +2502,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, struct writeback_control *wbc) { int ret; - struct address_space *mapping = page->mapping; struct extent_page_data epd = { .bio = NULL, .tree = tree, @@ -2554,17 +2509,9 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, .extent_locked = 0, .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; - struct writeback_control wbc_writepages = { - .sync_mode = wbc->sync_mode, - .nr_to_write = 64, - .range_start = page_offset(page) + PAGE_CACHE_SIZE, - .range_end = (loff_t)-1, - }; ret = __extent_writepage(page, wbc, &epd); - extent_write_cache_pages(tree, mapping, &wbc_writepages, - __extent_writepage, &epd, flush_write_bio); flush_epd_write_bio(&epd); return ret; } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 21a7ca9e7282..7b2f0c3e7929 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -76,15 +76,15 @@ struct extent_io_ops { struct extent_state *state); int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state, int uptodate); - int (*set_bit_hook)(struct inode *inode, struct extent_state *state, - int *bits); - int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, - int *bits); - int (*merge_extent_hook)(struct inode *inode, - struct extent_state *new, - struct extent_state *other); - int (*split_extent_hook)(struct inode *inode, - struct extent_state *orig, u64 split); + void (*set_bit_hook)(struct inode *inode, struct extent_state *state, + int *bits); + void (*clear_bit_hook)(struct inode *inode, struct extent_state *state, + int *bits); + void (*merge_extent_hook)(struct inode *inode, + struct extent_state *new, + struct extent_state *other); + void (*split_extent_hook)(struct inode *inode, + struct extent_state *orig, u64 split); int (*write_cache_pages_lock_hook)(struct page *page); }; @@ -108,8 +108,6 @@ struct extent_state { wait_queue_head_t wq; atomic_t refs; unsigned long state; - u64 split_start; - u64 split_end; /* for use by the FS */ u64 private; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 2d0410344ea3..7c97b3301459 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -183,22 +183,10 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next) return 0; } -int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) +static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) { - int ret = 0; struct extent_map *merge = NULL; struct rb_node *rb; - struct extent_map *em; - - write_lock(&tree->lock); - em = lookup_extent_mapping(tree, start, len); - - WARN_ON(!em || em->start != start); - - if (!em) - goto out; - - clear_bit(EXTENT_FLAG_PINNED, &em->flags); if (em->start != 0) { rb = rb_prev(&em->rb_node); @@ -225,6 +213,24 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) merge->in_tree = 0; free_extent_map(merge); } +} + +int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) +{ + int ret = 0; + struct extent_map *em; + + write_lock(&tree->lock); + em = lookup_extent_mapping(tree, start, len); + + WARN_ON(!em || em->start != start); + + if (!em) + goto out; + + clear_bit(EXTENT_FLAG_PINNED, &em->flags); + + try_merge_map(tree, em); free_extent_map(em); out: @@ -247,7 +253,6 @@ int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) { int ret = 0; - struct extent_map *merge = NULL; struct rb_node *rb; struct extent_map *exist; @@ -263,30 +268,8 @@ int add_extent_mapping(struct extent_map_tree *tree, goto out; } atomic_inc(&em->refs); - if (em->start != 0) { - rb = rb_prev(&em->rb_node); - if (rb) - merge = rb_entry(rb, struct extent_map, rb_node); - if (rb && mergable_maps(merge, em)) { - em->start = merge->start; - em->len += merge->len; - em->block_len += merge->block_len; - em->block_start = merge->block_start; - merge->in_tree = 0; - rb_erase(&merge->rb_node, &tree->map); - free_extent_map(merge); - } - } - rb = rb_next(&em->rb_node); - if (rb) - merge = rb_entry(rb, struct extent_map, rb_node); - if (rb && mergable_maps(em, merge)) { - em->len += merge->len; - em->block_len += merge->len; - rb_erase(&merge->rb_node, &tree->map); - merge->in_tree = 0; - free_extent_map(merge); - } + + try_merge_map(tree, em); out: return ret; } @@ -299,19 +282,8 @@ static u64 range_end(u64 start, u64 len) return start + len; } -/** - * lookup_extent_mapping - lookup extent_map - * @tree: tree to lookup in - * @start: byte offset to start the search - * @len: length of the lookup range - * - * Find and return the first extent_map struct in @tree that intersects the - * [start, len] range. There may be additional objects in the tree that - * intersect, so check the object returned carefully to make sure that no - * additional lookups are needed. - */ -struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, - u64 start, u64 len) +struct extent_map *__lookup_extent_mapping(struct extent_map_tree *tree, + u64 start, u64 len, int strict) { struct extent_map *em; struct rb_node *rb_node; @@ -320,38 +292,42 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, u64 end = range_end(start, len); rb_node = __tree_search(&tree->map, start, &prev, &next); - if (!rb_node && prev) { - em = rb_entry(prev, struct extent_map, rb_node); - if (end > em->start && start < extent_map_end(em)) - goto found; - } - if (!rb_node && next) { - em = rb_entry(next, struct extent_map, rb_node); - if (end > em->start && start < extent_map_end(em)) - goto found; - } if (!rb_node) { - em = NULL; - goto out; - } - if (IS_ERR(rb_node)) { - em = ERR_CAST(rb_node); - goto out; + if (prev) + rb_node = prev; + else if (next) + rb_node = next; + else + return NULL; } + em = rb_entry(rb_node, struct extent_map, rb_node); - if (end > em->start && start < extent_map_end(em)) - goto found; - em = NULL; - goto out; + if (strict && !(end > em->start && start < extent_map_end(em))) + return NULL; -found: atomic_inc(&em->refs); -out: return em; } /** + * lookup_extent_mapping - lookup extent_map + * @tree: tree to lookup in + * @start: byte offset to start the search + * @len: length of the lookup range + * + * Find and return the first extent_map struct in @tree that intersects the + * [start, len] range. There may be additional objects in the tree that + * intersect, so check the object returned carefully to make sure that no + * additional lookups are needed. + */ +struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, + u64 start, u64 len) +{ + return __lookup_extent_mapping(tree, start, len, 1); +} + +/** * search_extent_mapping - find a nearby extent map * @tree: tree to lookup in * @start: byte offset to start the search @@ -365,38 +341,7 @@ out: struct extent_map *search_extent_mapping(struct extent_map_tree *tree, u64 start, u64 len) { - struct extent_map *em; - struct rb_node *rb_node; - struct rb_node *prev = NULL; - struct rb_node *next = NULL; - - rb_node = __tree_search(&tree->map, start, &prev, &next); - if (!rb_node && prev) { - em = rb_entry(prev, struct extent_map, rb_node); - goto found; - } - if (!rb_node && next) { - em = rb_entry(next, struct extent_map, rb_node); - goto found; - } - if (!rb_node) { - em = NULL; - goto out; - } - if (IS_ERR(rb_node)) { - em = ERR_CAST(rb_node); - goto out; - } - em = rb_entry(rb_node, struct extent_map, rb_node); - goto found; - - em = NULL; - goto out; - -found: - atomic_inc(&em->refs); -out: - return em; + return __lookup_extent_mapping(tree, start, len, 0); } /** diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 08bcfa92a222..b910694f61ed 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -291,7 +291,8 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; if (search_commit) { path->skip_locking = 1; @@ -677,7 +678,9 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, btrfs_super_csum_size(&root->fs_info->super_copy); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; + sector_sum = sums->sums; again: next_offset = (u64)-1; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a35e51c9f235..658d66959abe 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -74,7 +74,7 @@ struct inode_defrag { * If an existing record is found the defrag item you * pass in is freed */ -static int __btrfs_add_inode_defrag(struct inode *inode, +static void __btrfs_add_inode_defrag(struct inode *inode, struct inode_defrag *defrag) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -106,11 +106,11 @@ static int __btrfs_add_inode_defrag(struct inode *inode, BTRFS_I(inode)->in_defrag = 1; rb_link_node(&defrag->rb_node, parent, p); rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); - return 0; + return; exists: kfree(defrag); - return 0; + return; } @@ -123,7 +123,6 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, { struct btrfs_root *root = BTRFS_I(inode)->root; struct inode_defrag *defrag; - int ret = 0; u64 transid; if (!btrfs_test_opt(root, AUTO_DEFRAG)) @@ -150,9 +149,9 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, spin_lock(&root->fs_info->defrag_inodes_lock); if (!BTRFS_I(inode)->in_defrag) - ret = __btrfs_add_inode_defrag(inode, defrag); + __btrfs_add_inode_defrag(inode, defrag); spin_unlock(&root->fs_info->defrag_inodes_lock); - return ret; + return 0; } /* @@ -855,7 +854,8 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, btrfs_drop_extent_cache(inode, start, end - 1, 0); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; again: recow = 0; split = start; @@ -1059,7 +1059,7 @@ static int prepare_uptodate_page(struct page *page, u64 pos) static noinline int prepare_pages(struct btrfs_root *root, struct file *file, struct page **pages, size_t num_pages, loff_t pos, unsigned long first_index, - unsigned long last_index, size_t write_bytes) + size_t write_bytes) { struct extent_state *cached_state = NULL; int i; @@ -1159,7 +1159,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, struct btrfs_root *root = BTRFS_I(inode)->root; struct page **pages = NULL; unsigned long first_index; - unsigned long last_index; size_t num_written = 0; int nrptrs; int ret = 0; @@ -1172,7 +1171,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, return -ENOMEM; first_index = pos >> PAGE_CACHE_SHIFT; - last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT; while (iov_iter_count(i) > 0) { size_t offset = pos & (PAGE_CACHE_SIZE - 1); @@ -1206,8 +1204,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, * contents of pages from loop to loop */ ret = prepare_pages(root, file, pages, num_pages, - pos, first_index, last_index, - write_bytes); + pos, first_index, write_bytes); if (ret) { btrfs_delalloc_release_space(inode, num_pages << PAGE_CACHE_SHIFT); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ae762dab37f8..15fceefbca0a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1061,7 +1061,8 @@ static noinline int run_delalloc_nocow(struct inode *inode, u64 ino = btrfs_ino(inode); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; nolock = btrfs_is_free_space_inode(root, inode); @@ -1282,17 +1283,16 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, return ret; } -static int btrfs_split_extent_hook(struct inode *inode, - struct extent_state *orig, u64 split) +static void btrfs_split_extent_hook(struct inode *inode, + struct extent_state *orig, u64 split) { /* not delalloc, ignore it */ if (!(orig->state & EXTENT_DELALLOC)) - return 0; + return; spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->outstanding_extents++; spin_unlock(&BTRFS_I(inode)->lock); - return 0; } /* @@ -1301,18 +1301,17 @@ static int btrfs_split_extent_hook(struct inode *inode, * extents, such as when we are doing sequential writes, so we can properly * account for the metadata space we'll need. */ -static int btrfs_merge_extent_hook(struct inode *inode, - struct extent_state *new, - struct extent_state *other) +static void btrfs_merge_extent_hook(struct inode *inode, + struct extent_state *new, + struct extent_state *other) { /* not delalloc, ignore it */ if (!(other->state & EXTENT_DELALLOC)) - return 0; + return; spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->outstanding_extents--; spin_unlock(&BTRFS_I(inode)->lock); - return 0; } /* @@ -1320,8 +1319,8 @@ static int btrfs_merge_extent_hook(struct inode *inode, * bytes in this file, and to maintain the list of inodes that * have pending delalloc work to be done. */ -static int btrfs_set_bit_hook(struct inode *inode, - struct extent_state *state, int *bits) +static void btrfs_set_bit_hook(struct inode *inode, + struct extent_state *state, int *bits) { /* @@ -1351,14 +1350,13 @@ static int btrfs_set_bit_hook(struct inode *inode, } spin_unlock(&root->fs_info->delalloc_lock); } - return 0; } /* * extent_io.c clear_bit_hook, see set_bit_hook for why */ -static int btrfs_clear_bit_hook(struct inode *inode, - struct extent_state *state, int *bits) +static void btrfs_clear_bit_hook(struct inode *inode, + struct extent_state *state, int *bits) { /* * set_bit and clear bit hooks normally require _irqsave/restore @@ -1395,7 +1393,6 @@ static int btrfs_clear_bit_hook(struct inode *inode, } spin_unlock(&root->fs_info->delalloc_lock); } - return 0; } /* @@ -1645,7 +1642,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, int ret; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; path->leave_spinning = 1; @@ -2215,7 +2213,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) if (!root->orphan_block_rsv) { block_rsv = btrfs_alloc_block_rsv(root); - BUG_ON(!block_rsv); + if (!block_rsv) + return -ENOMEM; } spin_lock(&root->orphan_lock); @@ -2517,7 +2516,9 @@ static void btrfs_read_locked_inode(struct inode *inode) filled = true; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + goto make_bad; + path->leave_spinning = 1; memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); @@ -2998,13 +2999,16 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, dentry->d_name.name, dentry->d_name.len); - BUG_ON(ret); + if (ret) + goto out; if (inode->i_nlink == 0) { ret = btrfs_orphan_add(trans, inode); - BUG_ON(ret); + if (ret) + goto out; } +out: nr = trans->blocks_used; __unlink_end_trans(trans, root); btrfs_btree_balance_dirty(root, nr); @@ -3147,6 +3151,11 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + path->reada = -1; + if (root->ref_cows || root == root->fs_info->tree_root) btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); @@ -3159,10 +3168,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, if (min_type == 0 && root == BTRFS_I(inode)->root) btrfs_kill_delayed_inode_items(inode); - path = btrfs_alloc_path(); - BUG_ON(!path); - path->reada = -1; - key.objectid = ino; key.offset = (u64)-1; key.type = (u8)-1; @@ -3690,7 +3695,8 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, int ret = 0; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name, namelen, 0); @@ -3946,6 +3952,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, struct btrfs_root *root, int *new) { struct inode *inode; + int bad_inode = 0; inode = btrfs_iget_locked(s, location->objectid, root); if (!inode) @@ -3955,10 +3962,19 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, BTRFS_I(inode)->root = root; memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); btrfs_read_locked_inode(inode); - inode_tree_add(inode); - unlock_new_inode(inode); - if (new) - *new = 1; + if (!is_bad_inode(inode)) { + inode_tree_add(inode); + unlock_new_inode(inode); + if (new) + *new = 1; + } else { + bad_inode = 1; + } + } + + if (bad_inode) { + iput(inode); + inode = ERR_PTR(-ESTALE); } return inode; @@ -4451,7 +4467,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, int owner; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return ERR_PTR(-ENOMEM); inode = new_inode(root->fs_info->sb); if (!inode) { @@ -6711,19 +6728,6 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, return 0; } -/* helper function for file defrag and space balancing. This - * forces readahead on a given range of bytes in an inode - */ -unsigned long btrfs_force_ra(struct address_space *mapping, - struct file_ra_state *ra, struct file *file, - pgoff_t offset, pgoff_t last_index) -{ - pgoff_t req_size = last_index - offset + 1; - - page_cache_sync_readahead(mapping, ra, file, offset, req_size); - return offset + req_size; -} - struct inode *btrfs_alloc_inode(struct super_block *sb) { struct btrfs_inode *ei; @@ -7206,7 +7210,11 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, goto out_unlock; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) { + err = -ENOMEM; + drop_inode = 1; + goto out_unlock; + } key.objectid = btrfs_ino(inode); key.offset = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0b980afc5edd..7cf013349941 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1749,11 +1749,10 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, key.objectid = key.offset; key.offset = (u64)-1; dirid = key.objectid; - } if (ptr < name) goto out; - memcpy(name, ptr, total_len); + memmove(name, ptr, total_len); name[total_len]='\0'; ret = 0; out: diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c deleted file mode 100644 index 82d569cb6267..000000000000 --- a/fs/btrfs/ref-cache.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (C) 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/sort.h> -#include "ctree.h" -#include "ref-cache.h" -#include "transaction.h" - -static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, - struct rb_node *node) -{ - struct rb_node **p = &root->rb_node; - struct rb_node *parent = NULL; - struct btrfs_leaf_ref *entry; - - while (*p) { - parent = *p; - entry = rb_entry(parent, struct btrfs_leaf_ref, rb_node); - - if (bytenr < entry->bytenr) - p = &(*p)->rb_left; - else if (bytenr > entry->bytenr) - p = &(*p)->rb_right; - else - return parent; - } - - entry = rb_entry(node, struct btrfs_leaf_ref, rb_node); - rb_link_node(node, parent, p); - rb_insert_color(node, root); - return NULL; -} - -static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) -{ - struct rb_node *n = root->rb_node; - struct btrfs_leaf_ref *entry; - - while (n) { - entry = rb_entry(n, struct btrfs_leaf_ref, rb_node); - WARN_ON(!entry->in_tree); - - if (bytenr < entry->bytenr) - n = n->rb_left; - else if (bytenr > entry->bytenr) - n = n->rb_right; - else - return n; - } - return NULL; -} diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h deleted file mode 100644 index 24f7001f6387..000000000000 --- a/fs/btrfs/ref-cache.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (C) 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ -#ifndef __REFCACHE__ -#define __REFCACHE__ - -struct btrfs_extent_info { - /* bytenr and num_bytes find the extent in the extent allocation tree */ - u64 bytenr; - u64 num_bytes; - - /* objectid and offset find the back reference for the file */ - u64 objectid; - u64 offset; -}; - -struct btrfs_leaf_ref { - struct rb_node rb_node; - struct btrfs_leaf_ref_tree *tree; - int in_tree; - atomic_t usage; - - u64 root_gen; - u64 bytenr; - u64 owner; - u64 generation; - int nritems; - - struct list_head list; - struct btrfs_extent_info extents[]; -}; - -static inline size_t btrfs_leaf_ref_size(int nr_extents) -{ - return sizeof(struct btrfs_leaf_ref) + - sizeof(struct btrfs_extent_info) * nr_extents; -} -#endif diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index ebe45443de06..f4099904565a 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -71,13 +71,12 @@ out: return ret; } -int btrfs_set_root_node(struct btrfs_root_item *item, - struct extent_buffer *node) +void btrfs_set_root_node(struct btrfs_root_item *item, + struct extent_buffer *node) { btrfs_set_root_bytenr(item, node->start); btrfs_set_root_level(item, btrfs_header_level(node)); btrfs_set_root_generation(item, btrfs_header_generation(node)); - return 0; } /* diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index eb55863bb4ae..7dc36fab4afc 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -216,17 +216,11 @@ static void wait_current_trans(struct btrfs_root *root) spin_lock(&root->fs_info->trans_lock); cur_trans = root->fs_info->running_transaction; if (cur_trans && cur_trans->blocked) { - DEFINE_WAIT(wait); atomic_inc(&cur_trans->use_count); spin_unlock(&root->fs_info->trans_lock); - while (1) { - prepare_to_wait(&root->fs_info->transaction_wait, &wait, - TASK_UNINTERRUPTIBLE); - if (!cur_trans->blocked) - break; - schedule(); - } - finish_wait(&root->fs_info->transaction_wait, &wait); + + wait_event(root->fs_info->transaction_wait, + !cur_trans->blocked); put_transaction(cur_trans); } else { spin_unlock(&root->fs_info->trans_lock); @@ -357,19 +351,10 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root } /* wait for a transaction commit to be fully complete */ -static noinline int wait_for_commit(struct btrfs_root *root, +static noinline void wait_for_commit(struct btrfs_root *root, struct btrfs_transaction *commit) { - DEFINE_WAIT(wait); - while (!commit->commit_done) { - prepare_to_wait(&commit->commit_wait, &wait, - TASK_UNINTERRUPTIBLE); - if (commit->commit_done) - break; - schedule(); - } - finish_wait(&commit->commit_wait, &wait); - return 0; + wait_event(commit->commit_wait, commit->commit_done); } int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) @@ -1085,22 +1070,7 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info) static void wait_current_trans_commit_start(struct btrfs_root *root, struct btrfs_transaction *trans) { - DEFINE_WAIT(wait); - - if (trans->in_commit) - return; - - while (1) { - prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait, - TASK_UNINTERRUPTIBLE); - if (trans->in_commit) { - finish_wait(&root->fs_info->transaction_blocked_wait, - &wait); - break; - } - schedule(); - finish_wait(&root->fs_info->transaction_blocked_wait, &wait); - } + wait_event(root->fs_info->transaction_blocked_wait, trans->in_commit); } /* @@ -1110,24 +1080,8 @@ static void wait_current_trans_commit_start(struct btrfs_root *root, static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, struct btrfs_transaction *trans) { - DEFINE_WAIT(wait); - - if (trans->commit_done || (trans->in_commit && !trans->blocked)) - return; - - while (1) { - prepare_to_wait(&root->fs_info->transaction_wait, &wait, - TASK_UNINTERRUPTIBLE); - if (trans->commit_done || - (trans->in_commit && !trans->blocked)) { - finish_wait(&root->fs_info->transaction_wait, - &wait); - break; - } - schedule(); - finish_wait(&root->fs_info->transaction_wait, - &wait); - } + wait_event(root->fs_info->transaction_wait, + trans->commit_done || (trans->in_commit && !trans->blocked)); } /* @@ -1234,8 +1188,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, atomic_inc(&cur_trans->use_count); btrfs_end_transaction(trans, root); - ret = wait_for_commit(root, cur_trans); - BUG_ON(ret); + wait_for_commit(root, cur_trans); put_transaction(cur_trans); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ac278dd83175..babee65f8eda 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1617,7 +1617,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, return 0; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; nritems = btrfs_header_nritems(eb); for (i = 0; i < nritems; i++) { @@ -1723,7 +1724,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, return -ENOMEM; if (*level == 1) { - wc->process_func(root, next, wc, ptr_gen); + ret = wc->process_func(root, next, wc, ptr_gen); + if (ret) + return ret; path->slots[*level]++; if (wc->free) { @@ -1788,8 +1791,11 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, parent = path->nodes[*level + 1]; root_owner = btrfs_header_owner(parent); - wc->process_func(root, path->nodes[*level], wc, + ret = wc->process_func(root, path->nodes[*level], wc, btrfs_header_generation(path->nodes[*level])); + if (ret) + return ret; + if (wc->free) { struct extent_buffer *next; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b89e372c7544..53875ae73ad4 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1037,7 +1037,8 @@ static noinline int find_next_chunk(struct btrfs_root *root, struct btrfs_key found_key; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; key.objectid = objectid; key.offset = (u64)-1; @@ -2061,8 +2062,10 @@ int btrfs_balance(struct btrfs_root *dev_root) /* step two, relocate all the chunks */ path = btrfs_alloc_path(); - BUG_ON(!path); - + if (!path) { + ret = -ENOMEM; + goto error; + } key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; key.offset = (u64)-1; key.type = BTRFS_CHUNK_ITEM_KEY; @@ -2661,7 +2664,8 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, ret = find_next_chunk(fs_info->chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset); - BUG_ON(ret); + if (ret) + return ret; alloc_profile = BTRFS_BLOCK_GROUP_METADATA | (fs_info->metadata_alloc_profile & diff --git a/fs/dcache.c b/fs/dcache.c index 2347cdb15abb..c83cae19161e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -795,6 +795,7 @@ relock: /** * prune_dcache_sb - shrink the dcache + * @sb: superblock * @nr_to_scan: number of entries to try to free * * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e2d88baf91d3..4687fea0c00f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -124,7 +124,7 @@ void *ext4_kvzalloc(size_t size, gfp_t flags) { void *ret; - ret = kmalloc(size, flags); + ret = kzalloc(size, flags); if (!ret) ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); return ret; diff --git a/fs/stack.c b/fs/stack.c index 4a6f7f440658..b4f2ab48a61f 100644 --- a/fs/stack.c +++ b/fs/stack.c @@ -29,10 +29,7 @@ void fsstack_copy_inode_size(struct inode *dst, struct inode *src) * * We don't actually know what locking is used at the lower level; * but if it's a filesystem that supports quotas, it will be using - * i_lock as in inode_add_bytes(). tmpfs uses other locking, and - * its 32-bit is (just) able to exceed 2TB i_size with the aid of - * holes; but its i_blocks cannot carry into the upper long without - * almost 2TB swap - let's ignore that case. + * i_lock as in inode_add_bytes(). */ if (sizeof(i_blocks) > sizeof(long)) spin_lock(&src->i_lock); diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h index 3090471b2a5e..e49c36d38d7e 100644 --- a/include/acpi/acpi_drivers.h +++ b/include/acpi/acpi_drivers.h @@ -128,7 +128,7 @@ extern int is_dock_device(acpi_handle handle); extern int register_dock_notifier(struct notifier_block *nb); extern void unregister_dock_notifier(struct notifier_block *nb); extern int register_hotplug_dock_device(acpi_handle handle, - struct acpi_dock_ops *ops, + const struct acpi_dock_ops *ops, void *context); extern void unregister_hotplug_dock_device(acpi_handle handle); #else diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 2ed0a8486c19..f554a9313b43 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -47,7 +47,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20110413 +#define ACPI_CA_VERSION 0x20110623 #include "actypes.h" #include "actbl.h" @@ -69,6 +69,7 @@ extern u32 acpi_gbl_trace_flags; extern u32 acpi_gbl_enable_aml_debug_object; extern u8 acpi_gbl_copy_dsdt_locally; extern u8 acpi_gbl_truncate_io_addresses; +extern u8 acpi_gbl_disable_auto_repair; extern u32 acpi_current_gpe_count; extern struct acpi_table_fadt acpi_gbl_FADT; diff --git a/include/acpi/processor.h b/include/acpi/processor.h index ba4928cae473..67055f180330 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -337,7 +337,7 @@ extern struct cpuidle_driver acpi_idle_driver; /* in processor_thermal.c */ int acpi_processor_get_limit_info(struct acpi_processor *pr); -extern struct thermal_cooling_device_ops processor_cooling_ops; +extern const struct thermal_cooling_device_ops processor_cooling_ops; #ifdef CONFIG_CPU_FREQ void acpi_thermal_cpufreq_init(void); void acpi_thermal_cpufreq_exit(void); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index e19527de6a93..6001b4da39dd 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -238,7 +238,6 @@ extern int acpi_paddr_to_node(u64 start_addr, u64 size); extern int pnpacpi_disabled; #define PXM_INVAL (-1) -#define NID_INVAL (-1) int acpi_check_resource_conflict(const struct resource *res); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 4427e0454051..3fa1f3d90ce0 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -208,6 +208,49 @@ struct dm_target_callbacks { int dm_register_target(struct target_type *t); void dm_unregister_target(struct target_type *t); +/* + * Target argument parsing. + */ +struct dm_arg_set { + unsigned argc; + char **argv; +}; + +/* + * The minimum and maximum value of a numeric argument, together with + * the error message to use if the number is found to be outside that range. + */ +struct dm_arg { + unsigned min; + unsigned max; + char *error; +}; + +/* + * Validate the next argument, either returning it as *value or, if invalid, + * returning -EINVAL and setting *error. + */ +int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, + unsigned *value, char **error); + +/* + * Process the next argument as the start of a group containing between + * arg->min and arg->max further arguments. Either return the size as + * *num_args or, if invalid, return -EINVAL and set *error. + */ +int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set, + unsigned *num_args, char **error); + +/* + * Return the current argument and shift to the next. + */ +const char *dm_shift_arg(struct dm_arg_set *as); + +/* + * Move through num_args arguments. + */ +void dm_consume_args(struct dm_arg_set *as, unsigned num_args); + /*----------------------------------------------------------------- * Functions for creating and manipulating mapped devices. * Drop the reference with dm_put when you finish with the object. diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h index 3708455ee6c3..0cb8eff76bd6 100644 --- a/include/linux/dm-ioctl.h +++ b/include/linux/dm-ioctl.h @@ -267,9 +267,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 20 +#define DM_VERSION_MINOR 21 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2011-02-02)" +#define DM_VERSION_EXTRA "-ioctl (2011-07-06)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h index 298d587e349b..5e54458e920f 100644 --- a/include/linux/dm-kcopyd.h +++ b/include/linux/dm-kcopyd.h @@ -42,5 +42,20 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, unsigned num_dests, struct dm_io_region *dests, unsigned flags, dm_kcopyd_notify_fn fn, void *context); +/* + * Prepare a callback and submit it via the kcopyd thread. + * + * dm_kcopyd_prepare_callback allocates a callback structure and returns it. + * It must not be called from interrupt context. + * The returned value should be passed into dm_kcopyd_do_callback. + * + * dm_kcopyd_do_callback submits the callback. + * It may be called from interrupt context. + * The callback is issued from the kcopyd thread. + */ +void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc, + dm_kcopyd_notify_fn fn, void *context); +void dm_kcopyd_do_callback(void *job, int read_err, unsigned long write_err); + #endif /* __KERNEL__ */ #endif /* _LINUX_DM_KCOPYD_H */ diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 3ff060ac7810..c6f996f2abb6 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -25,10 +25,6 @@ struct fault_attr { unsigned long reject_end; unsigned long count; - -#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS - struct dentry *dir; -#endif }; #define FAULT_ATTR_INITIALIZER { \ @@ -45,19 +41,15 @@ bool should_fail(struct fault_attr *attr, ssize_t size); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS -int init_fault_attr_dentries(struct fault_attr *attr, const char *name); -void cleanup_fault_attr_dentries(struct fault_attr *attr); +struct dentry *fault_create_debugfs_attr(const char *name, + struct dentry *parent, struct fault_attr *attr); #else /* CONFIG_FAULT_INJECTION_DEBUG_FS */ -static inline int init_fault_attr_dentries(struct fault_attr *attr, - const char *name) -{ - return -ENODEV; -} - -static inline void cleanup_fault_attr_dentries(struct fault_attr *attr) +static inline struct dentry *fault_create_debugfs_attr(const char *name, + struct dentry *parent, struct fault_attr *attr) { + return ERR_PTR(-ENODEV); } #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ diff --git a/include/linux/gfp.h b/include/linux/gfp.h index cb4089254f01..3a76faf6a3ee 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -92,7 +92,7 @@ struct vm_area_struct; */ #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) -#define __GFP_BITS_SHIFT 23 /* Room for 23 __GFP_FOO bits */ +#define __GFP_BITS_SHIFT 24 /* Room for N __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /* This equals 0, but use constants in case they ever change */ diff --git a/include/linux/idr.h b/include/linux/idr.h index 13a801f3d028..255491cf522e 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -146,6 +146,10 @@ void ida_remove(struct ida *ida, int id); void ida_destroy(struct ida *ida); void ida_init(struct ida *ida); +int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end, + gfp_t gfp_mask); +void ida_simple_remove(struct ida *ida, unsigned int id); + void __init idr_init_cache(void); #endif /* __IDR_H__ */ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b96600786913..3b535db00a94 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -86,8 +86,6 @@ extern void mem_cgroup_uncharge_end(void); extern void mem_cgroup_uncharge_page(struct page *page); extern void mem_cgroup_uncharge_cache_page(struct page *page); -extern int mem_cgroup_shmem_charge_fallback(struct page *page, - struct mm_struct *mm, gfp_t gfp_mask); extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); @@ -225,12 +223,6 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page) { } -static inline int mem_cgroup_shmem_charge_fallback(struct page *page, - struct mm_struct *mm, gfp_t gfp_mask) -{ - return 0; -} - static inline void mem_cgroup_add_lru_list(struct page *page, int lru) { } diff --git a/include/linux/mfd/aat2870.h b/include/linux/mfd/aat2870.h index 89212df05622..f7316c29bdec 100644 --- a/include/linux/mfd/aat2870.h +++ b/include/linux/mfd/aat2870.h @@ -89,7 +89,7 @@ enum aat2870_id { /* Backlight current magnitude (mA) */ enum aat2870_current { - AAT2870_CURRENT_0_45, + AAT2870_CURRENT_0_45 = 1, AAT2870_CURRENT_0_90, AAT2870_CURRENT_1_80, AAT2870_CURRENT_2_70, diff --git a/include/linux/of.h b/include/linux/of.h index bd716f8908de..bc3dc6399547 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -68,6 +68,7 @@ struct device_node { /* Pointer for first entry in chain of all nodes. */ extern struct device_node *allnodes; extern struct device_node *of_chosen; +extern struct device_node *of_aliases; extern rwlock_t devtree_lock; static inline bool of_have_populated_dt(void) @@ -196,18 +197,22 @@ extern struct property *of_find_property(const struct device_node *np, const char *name, int *lenp); extern int of_property_read_u32_array(const struct device_node *np, - char *propname, + const char *propname, u32 *out_values, size_t sz); -extern int of_property_read_string(struct device_node *np, char *propname, - const char **out_string); +extern int of_property_read_string(struct device_node *np, + const char *propname, + const char **out_string); extern int of_device_is_compatible(const struct device_node *device, const char *); extern int of_device_is_available(const struct device_node *device); extern const void *of_get_property(const struct device_node *node, const char *name, int *lenp); +#define for_each_property(pp, properties) \ + for (pp = properties; pp != NULL; pp = pp->next) + extern int of_n_addr_cells(struct device_node *np); extern int of_n_size_cells(struct device_node *np); extern const struct of_device_id *of_match_node( @@ -220,6 +225,10 @@ extern int of_parse_phandles_with_args(struct device_node *np, const char *list_name, const char *cells_name, int index, struct device_node **out_node, const void **out_args); +extern void *early_init_dt_alloc_memory_arch(u64 size, u64 align); +extern void of_alias_scan(void); +extern int of_alias_get_id(struct device_node *np, const char *stem); + extern int of_machine_is_compatible(const char *compat); extern int prom_add_property(struct device_node* np, struct property* prop); @@ -242,13 +251,15 @@ static inline bool of_have_populated_dt(void) } static inline int of_property_read_u32_array(const struct device_node *np, - char *propname, u32 *out_values, size_t sz) + const char *propname, + u32 *out_values, size_t sz) { return -ENOSYS; } static inline int of_property_read_string(struct device_node *np, - char *propname, const char **out_string) + const char *propname, + const char **out_string) { return -ENOSYS; } @@ -256,7 +267,7 @@ static inline int of_property_read_string(struct device_node *np, #endif /* CONFIG_OF */ static inline int of_property_read_u32(const struct device_node *np, - char *propname, + const char *propname, u32 *out_value) { return of_property_read_u32_array(np, propname, out_value, 1); diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index c84d900fbbb3..b74b74ffe0e7 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -97,7 +97,6 @@ extern void early_init_dt_check_for_initrd(unsigned long node); extern int early_init_dt_scan_memory(unsigned long node, const char *uname, int depth, void *data); extern void early_init_dt_add_memory_arch(u64 base, u64 size); -extern void * early_init_dt_alloc_memory_arch(u64 size, u64 align); extern u64 dt_mem_next_cell(int s, __be32 **cellp); /* diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 23241c2fecce..9d4539c52e53 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -39,7 +39,15 @@ * when it is shrunk, before we rcu free the node. See shrink code for * details. */ -#define RADIX_TREE_INDIRECT_PTR 1 +#define RADIX_TREE_INDIRECT_PTR 1 +/* + * A common use of the radix tree is to store pointers to struct pages; + * but shmem/tmpfs needs also to store swap entries in the same tree: + * those are marked as exceptional entries to distinguish them. + * EXCEPTIONAL_ENTRY tests the bit, EXCEPTIONAL_SHIFT shifts content past it. + */ +#define RADIX_TREE_EXCEPTIONAL_ENTRY 2 +#define RADIX_TREE_EXCEPTIONAL_SHIFT 2 #define radix_tree_indirect_to_ptr(ptr) \ radix_tree_indirect_to_ptr((void __force *)(ptr)) @@ -174,6 +182,28 @@ static inline int radix_tree_deref_retry(void *arg) } /** + * radix_tree_exceptional_entry - radix_tree_deref_slot gave exceptional entry? + * @arg: value returned by radix_tree_deref_slot + * Returns: 0 if well-aligned pointer, non-0 if exceptional entry. + */ +static inline int radix_tree_exceptional_entry(void *arg) +{ + /* Not unlikely because radix_tree_exception often tested first */ + return (unsigned long)arg & RADIX_TREE_EXCEPTIONAL_ENTRY; +} + +/** + * radix_tree_exception - radix_tree_deref_slot returned either exception? + * @arg: value returned by radix_tree_deref_slot + * Returns: 0 if well-aligned pointer, non-0 if either kind of exception. + */ +static inline int radix_tree_exception(void *arg) +{ + return unlikely((unsigned long)arg & + (RADIX_TREE_INDIRECT_PTR | RADIX_TREE_EXCEPTIONAL_ENTRY)); +} + +/** * radix_tree_replace_slot - replace item in a slot * @pslot: pointer to slot, returned by radix_tree_lookup_slot * @item: new item to store in the slot. @@ -194,8 +224,8 @@ void *radix_tree_delete(struct radix_tree_root *, unsigned long); unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, void **results, unsigned long first_index, unsigned int max_items); -unsigned int -radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, +unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root, + void ***results, unsigned long *indices, unsigned long first_index, unsigned int max_items); unsigned long radix_tree_next_hole(struct radix_tree_root *root, unsigned long index, unsigned long max_scan); @@ -222,6 +252,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, unsigned long nr_to_tag, unsigned int fromtag, unsigned int totag); int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag); +unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item); static inline void radix_tree_preload_end(void) { diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index aa08fa8fd79b..9291ac3cc627 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -8,22 +8,15 @@ /* inode in-kernel data */ -#define SHMEM_NR_DIRECT 16 - -#define SHMEM_SYMLINK_INLINE_LEN (SHMEM_NR_DIRECT * sizeof(swp_entry_t)) - struct shmem_inode_info { spinlock_t lock; unsigned long flags; unsigned long alloced; /* data pages alloced to file */ - unsigned long swapped; /* subtotal assigned to swap */ - unsigned long next_index; /* highest alloced index + 1 */ - struct shared_policy policy; /* NUMA memory alloc policy */ - struct page *i_indirect; /* top indirect blocks page */ union { - swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* first blocks */ - char inline_symlink[SHMEM_SYMLINK_INLINE_LEN]; + unsigned long swapped; /* subtotal assigned to swap */ + char *symlink; /* unswappable short symlink */ }; + struct shared_policy policy; /* NUMA memory alloc policy */ struct list_head swaplist; /* chain of maybes on swap */ struct list_head xattr_list; /* list of shmem_xattr */ struct inode vfs_inode; @@ -49,7 +42,7 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode) /* * Functions in mm/shmem.c called directly from elsewhere: */ -extern int init_tmpfs(void); +extern int shmem_init(void); extern int shmem_fill_super(struct super_block *sb, void *data, int silent); extern struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); @@ -59,8 +52,6 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); extern int shmem_unuse(swp_entry_t entry, struct page *page); -extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, - struct page **pagep, swp_entry_t *ent); static inline struct page *shmem_read_mapping_page( struct address_space *mapping, pgoff_t index) diff --git a/include/linux/swapops.h b/include/linux/swapops.h index cd42e30b7c6e..2189d3ffc85d 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -1,3 +1,8 @@ +#ifndef _LINUX_SWAPOPS_H +#define _LINUX_SWAPOPS_H + +#include <linux/radix-tree.h> + /* * swapcache pages are stored in the swapper_space radix tree. We want to * get good packing density in that tree, so the index should be dense in @@ -76,6 +81,22 @@ static inline pte_t swp_entry_to_pte(swp_entry_t entry) return __swp_entry_to_pte(arch_entry); } +static inline swp_entry_t radix_to_swp_entry(void *arg) +{ + swp_entry_t entry; + + entry.val = (unsigned long)arg >> RADIX_TREE_EXCEPTIONAL_SHIFT; + return entry; +} + +static inline void *swp_to_radix_entry(swp_entry_t entry) +{ + unsigned long value; + + value = entry.val << RADIX_TREE_EXCEPTIONAL_SHIFT; + return (void *)(value | RADIX_TREE_EXCEPTIONAL_ENTRY); +} + #ifdef CONFIG_MIGRATION static inline swp_entry_t make_migration_entry(struct page *page, int write) { @@ -169,3 +190,5 @@ static inline int non_swap_entry(swp_entry_t entry) return 0; } #endif + +#endif /* _LINUX_SWAPOPS_H */ diff --git a/include/linux/thermal.h b/include/linux/thermal.h index d3ec89fb4122..47b4a27e6e97 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -85,22 +85,6 @@ struct thermal_cooling_device { ((long)t-2732+5)/10 : ((long)t-2732-5)/10) #define CELSIUS_TO_KELVIN(t) ((t)*10+2732) -#if defined(CONFIG_THERMAL_HWMON) -/* thermal zone devices with the same type share one hwmon device */ -struct thermal_hwmon_device { - char type[THERMAL_NAME_LENGTH]; - struct device *device; - int count; - struct list_head tz_list; - struct list_head node; -}; - -struct thermal_hwmon_attr { - struct device_attribute attr; - char name[16]; -}; -#endif - struct thermal_zone_device { int id; char type[THERMAL_NAME_LENGTH]; @@ -120,12 +104,6 @@ struct thermal_zone_device { struct mutex lock; /* protect cooling devices list */ struct list_head node; struct delayed_work poll_queue; -#if defined(CONFIG_THERMAL_HWMON) - struct list_head hwmon_node; - struct thermal_hwmon_device *hwmon; - struct thermal_hwmon_attr temp_input; /* hwmon sys attr */ - struct thermal_hwmon_attr temp_crit; /* hwmon sys attr */ -#endif }; /* Adding event notification support elements */ #define THERMAL_GENL_FAMILY_NAME "thermal_event" diff --git a/init/main.c b/init/main.c index d7211faed2ad..1952d37e4ecb 100644 --- a/init/main.c +++ b/init/main.c @@ -715,7 +715,7 @@ static void __init do_basic_setup(void) { cpuset_init_smp(); usermodehelper_init(); - init_tmpfs(); + shmem_init(); driver_init(); init_irq_proc(); do_ctors(); diff --git a/ipc/shm.c b/ipc/shm.c index 9fb044f3b345..b5bae9d945b6 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -294,7 +294,7 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data) void shm_destroy_orphaned(struct ipc_namespace *ns) { down_write(&shm_ids(ns).rw_mutex); - if (&shm_ids(ns).in_use) + if (shm_ids(ns).in_use) idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); up_write(&shm_ids(ns).rw_mutex); } @@ -304,9 +304,12 @@ void exit_shm(struct task_struct *task) { struct ipc_namespace *ns = task->nsproxy->ipc_ns; + if (shm_ids(ns).in_use == 0) + return; + /* Destroy all already created segments, but not mapped yet */ down_write(&shm_ids(ns).rw_mutex); - if (&shm_ids(ns).in_use) + if (shm_ids(ns).in_use) idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns); up_write(&shm_ids(ns).rw_mutex); } diff --git a/kernel/taskstats.c b/kernel/taskstats.c index d1db2880d1cf..e19ce1454ee1 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -291,30 +291,28 @@ static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd) if (!cpumask_subset(mask, cpu_possible_mask)) return -EINVAL; - s = NULL; if (isadd == REGISTER) { for_each_cpu(cpu, mask) { - if (!s) - s = kmalloc_node(sizeof(struct listener), - GFP_KERNEL, cpu_to_node(cpu)); + s = kmalloc_node(sizeof(struct listener), + GFP_KERNEL, cpu_to_node(cpu)); if (!s) goto cleanup; + s->pid = pid; - INIT_LIST_HEAD(&s->list); s->valid = 1; listeners = &per_cpu(listener_array, cpu); down_write(&listeners->sem); - list_for_each_entry_safe(s2, tmp, &listeners->list, list) { - if (s2->pid == pid) - goto next_cpu; + list_for_each_entry(s2, &listeners->list, list) { + if (s2->pid == pid && s2->valid) + goto exists; } list_add(&s->list, &listeners->list); s = NULL; -next_cpu: +exists: up_write(&listeners->sem); + kfree(s); /* nop if NULL */ } - kfree(s); return 0; } diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 2577b121c7c1..f193b7796449 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -197,21 +197,15 @@ static struct dentry *debugfs_create_atomic_t(const char *name, mode_t mode, return debugfs_create_file(name, mode, parent, value, &fops_atomic_t); } -void cleanup_fault_attr_dentries(struct fault_attr *attr) -{ - debugfs_remove_recursive(attr->dir); -} - -int init_fault_attr_dentries(struct fault_attr *attr, const char *name) +struct dentry *fault_create_debugfs_attr(const char *name, + struct dentry *parent, struct fault_attr *attr) { mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; struct dentry *dir; - dir = debugfs_create_dir(name, NULL); + dir = debugfs_create_dir(name, parent); if (!dir) - return -ENOMEM; - - attr->dir = dir; + return ERR_PTR(-ENOMEM); if (!debugfs_create_ul("probability", mode, dir, &attr->probability)) goto fail; @@ -243,11 +237,11 @@ int init_fault_attr_dentries(struct fault_attr *attr, const char *name) #endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */ - return 0; + return dir; fail: - debugfs_remove_recursive(attr->dir); + debugfs_remove_recursive(dir); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ diff --git a/lib/idr.c b/lib/idr.c index e15502e8b21e..db040ce3fa73 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -34,8 +34,10 @@ #include <linux/err.h> #include <linux/string.h> #include <linux/idr.h> +#include <linux/spinlock.h> static struct kmem_cache *idr_layer_cache; +static DEFINE_SPINLOCK(simple_ida_lock); static struct idr_layer *get_from_free_list(struct idr *idp) { @@ -926,6 +928,71 @@ void ida_destroy(struct ida *ida) EXPORT_SYMBOL(ida_destroy); /** + * ida_simple_get - get a new id. + * @ida: the (initialized) ida. + * @start: the minimum id (inclusive, < 0x8000000) + * @end: the maximum id (exclusive, < 0x8000000 or 0) + * @gfp_mask: memory allocation flags + * + * Allocates an id in the range start <= id < end, or returns -ENOSPC. + * On memory allocation failure, returns -ENOMEM. + * + * Use ida_simple_remove() to get rid of an id. + */ +int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end, + gfp_t gfp_mask) +{ + int ret, id; + unsigned int max; + + BUG_ON((int)start < 0); + BUG_ON((int)end < 0); + + if (end == 0) + max = 0x80000000; + else { + BUG_ON(end < start); + max = end - 1; + } + +again: + if (!ida_pre_get(ida, gfp_mask)) + return -ENOMEM; + + spin_lock(&simple_ida_lock); + ret = ida_get_new_above(ida, start, &id); + if (!ret) { + if (id > max) { + ida_remove(ida, id); + ret = -ENOSPC; + } else { + ret = id; + } + } + spin_unlock(&simple_ida_lock); + + if (unlikely(ret == -EAGAIN)) + goto again; + + return ret; +} +EXPORT_SYMBOL(ida_simple_get); + +/** + * ida_simple_remove - remove an allocated id. + * @ida: the (initialized) ida. + * @id: the id returned by ida_simple_get. + */ +void ida_simple_remove(struct ida *ida, unsigned int id) +{ + BUG_ON((int)id < 0); + spin_lock(&simple_ida_lock); + ida_remove(ida, id); + spin_unlock(&simple_ida_lock); +} +EXPORT_SYMBOL(ida_simple_remove); + +/** * ida_init - initialize ida handle * @ida: ida handle * diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 7ea2e033d715..a2f9da59c197 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -823,8 +823,8 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root, EXPORT_SYMBOL(radix_tree_prev_hole); static unsigned int -__lookup(struct radix_tree_node *slot, void ***results, unsigned long index, - unsigned int max_items, unsigned long *next_index) +__lookup(struct radix_tree_node *slot, void ***results, unsigned long *indices, + unsigned long index, unsigned int max_items, unsigned long *next_index) { unsigned int nr_found = 0; unsigned int shift, height; @@ -857,12 +857,16 @@ __lookup(struct radix_tree_node *slot, void ***results, unsigned long index, /* Bottom level: grab some items */ for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { - index++; if (slot->slots[i]) { - results[nr_found++] = &(slot->slots[i]); - if (nr_found == max_items) + results[nr_found] = &(slot->slots[i]); + if (indices) + indices[nr_found] = index; + if (++nr_found == max_items) { + index++; goto out; + } } + index++; } out: *next_index = index; @@ -918,8 +922,8 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, if (cur_index > max_index) break; - slots_found = __lookup(node, (void ***)results + ret, cur_index, - max_items - ret, &next_index); + slots_found = __lookup(node, (void ***)results + ret, NULL, + cur_index, max_items - ret, &next_index); nr_found = 0; for (i = 0; i < slots_found; i++) { struct radix_tree_node *slot; @@ -944,6 +948,7 @@ EXPORT_SYMBOL(radix_tree_gang_lookup); * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree * @root: radix tree root * @results: where the results of the lookup are placed + * @indices: where their indices should be placed (but usually NULL) * @first_index: start the lookup from this key * @max_items: place up to this many items at *results * @@ -958,7 +963,8 @@ EXPORT_SYMBOL(radix_tree_gang_lookup); * protection, radix_tree_deref_slot may fail requiring a retry. */ unsigned int -radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, +radix_tree_gang_lookup_slot(struct radix_tree_root *root, + void ***results, unsigned long *indices, unsigned long first_index, unsigned int max_items) { unsigned long max_index; @@ -974,6 +980,8 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, if (first_index > 0) return 0; results[0] = (void **)&root->rnode; + if (indices) + indices[0] = 0; return 1; } node = indirect_to_ptr(node); @@ -987,8 +995,9 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, if (cur_index > max_index) break; - slots_found = __lookup(node, results + ret, cur_index, - max_items - ret, &next_index); + slots_found = __lookup(node, results + ret, + indices ? indices + ret : NULL, + cur_index, max_items - ret, &next_index); ret += slots_found; if (next_index == 0) break; @@ -1194,6 +1203,98 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, } EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); +#if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP) +#include <linux/sched.h> /* for cond_resched() */ + +/* + * This linear search is at present only useful to shmem_unuse_inode(). + */ +static unsigned long __locate(struct radix_tree_node *slot, void *item, + unsigned long index, unsigned long *found_index) +{ + unsigned int shift, height; + unsigned long i; + + height = slot->height; + shift = (height-1) * RADIX_TREE_MAP_SHIFT; + + for ( ; height > 1; height--) { + i = (index >> shift) & RADIX_TREE_MAP_MASK; + for (;;) { + if (slot->slots[i] != NULL) + break; + index &= ~((1UL << shift) - 1); + index += 1UL << shift; + if (index == 0) + goto out; /* 32-bit wraparound */ + i++; + if (i == RADIX_TREE_MAP_SIZE) + goto out; + } + + shift -= RADIX_TREE_MAP_SHIFT; + slot = rcu_dereference_raw(slot->slots[i]); + if (slot == NULL) + goto out; + } + + /* Bottom level: check items */ + for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { + if (slot->slots[i] == item) { + *found_index = index + i; + index = 0; + goto out; + } + } + index += RADIX_TREE_MAP_SIZE; +out: + return index; +} + +/** + * radix_tree_locate_item - search through radix tree for item + * @root: radix tree root + * @item: item to be found + * + * Returns index where item was found, or -1 if not found. + * Caller must hold no lock (since this time-consuming function needs + * to be preemptible), and must check afterwards if item is still there. + */ +unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) +{ + struct radix_tree_node *node; + unsigned long max_index; + unsigned long cur_index = 0; + unsigned long found_index = -1; + + do { + rcu_read_lock(); + node = rcu_dereference_raw(root->rnode); + if (!radix_tree_is_indirect_ptr(node)) { + rcu_read_unlock(); + if (node == item) + found_index = 0; + break; + } + + node = indirect_to_ptr(node); + max_index = radix_tree_maxindex(node->height); + if (cur_index > max_index) + break; + + cur_index = __locate(node, item, cur_index, &found_index); + rcu_read_unlock(); + cond_resched(); + } while (cur_index != 0 && cur_index <= max_index); + + return found_index; +} +#else +unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) +{ + return -1; +} +#endif /* CONFIG_SHMEM && CONFIG_SWAP */ /** * radix_tree_shrink - shrink height of a radix tree to minimal diff --git a/mm/failslab.c b/mm/failslab.c index 1ce58c201dca..0dd7b8fec71c 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -34,23 +34,23 @@ __setup("failslab=", setup_failslab); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS static int __init failslab_debugfs_init(void) { + struct dentry *dir; mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; - int err; - err = init_fault_attr_dentries(&failslab.attr, "failslab"); - if (err) - return err; + dir = fault_create_debugfs_attr("failslab", NULL, &failslab.attr); + if (IS_ERR(dir)) + return PTR_ERR(dir); - if (!debugfs_create_bool("ignore-gfp-wait", mode, failslab.attr.dir, + if (!debugfs_create_bool("ignore-gfp-wait", mode, dir, &failslab.ignore_gfp_wait)) goto fail; - if (!debugfs_create_bool("cache-filter", mode, failslab.attr.dir, + if (!debugfs_create_bool("cache-filter", mode, dir, &failslab.cache_filter)) goto fail; return 0; fail: - cleanup_fault_attr_dentries(&failslab.attr); + debugfs_remove_recursive(dir); return -ENOMEM; } diff --git a/mm/filemap.c b/mm/filemap.c index 867d40222ec7..645a080ba4df 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -33,7 +33,6 @@ #include <linux/cpuset.h> #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ #include <linux/memcontrol.h> -#include <linux/mm_inline.h> /* for page_is_file_cache() */ #include <linux/cleancache.h> #include "internal.h" @@ -462,6 +461,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, int error; VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON(PageSwapBacked(page)); error = mem_cgroup_cache_charge(page, current->mm, gfp_mask & GFP_RECLAIM_MASK); @@ -479,8 +479,6 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, if (likely(!error)) { mapping->nrpages++; __inc_zone_page_state(page, NR_FILE_PAGES); - if (PageSwapBacked(page)) - __inc_zone_page_state(page, NR_SHMEM); spin_unlock_irq(&mapping->tree_lock); } else { page->mapping = NULL; @@ -502,22 +500,9 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, { int ret; - /* - * Splice_read and readahead add shmem/tmpfs pages into the page cache - * before shmem_readpage has a chance to mark them as SwapBacked: they - * need to go on the anon lru below, and mem_cgroup_cache_charge - * (called in add_to_page_cache) needs to know where they're going too. - */ - if (mapping_cap_swap_backed(mapping)) - SetPageSwapBacked(page); - ret = add_to_page_cache(page, mapping, offset, gfp_mask); - if (ret == 0) { - if (page_is_file_cache(page)) - lru_cache_add_file(page); - else - lru_cache_add_anon(page); - } + if (ret == 0) + lru_cache_add_file(page); return ret; } EXPORT_SYMBOL_GPL(add_to_page_cache_lru); @@ -714,9 +699,16 @@ repeat: page = radix_tree_deref_slot(pagep); if (unlikely(!page)) goto out; - if (radix_tree_deref_retry(page)) - goto repeat; - + if (radix_tree_exception(page)) { + if (radix_tree_deref_retry(page)) + goto repeat; + /* + * Otherwise, shmem/tmpfs must be storing a swap entry + * here as an exceptional entry: so return it without + * attempting to raise page count. + */ + goto out; + } if (!page_cache_get_speculative(page)) goto repeat; @@ -753,7 +745,7 @@ struct page *find_lock_page(struct address_space *mapping, pgoff_t offset) repeat: page = find_get_page(mapping, offset); - if (page) { + if (page && !radix_tree_exception(page)) { lock_page(page); /* Has the page been truncated? */ if (unlikely(page->mapping != mapping)) { @@ -840,7 +832,7 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start, rcu_read_lock(); restart: nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, - (void ***)pages, start, nr_pages); + (void ***)pages, NULL, start, nr_pages); ret = 0; for (i = 0; i < nr_found; i++) { struct page *page; @@ -849,13 +841,22 @@ repeat: if (unlikely(!page)) continue; - /* - * This can only trigger when the entry at index 0 moves out - * of or back to the root: none yet gotten, safe to restart. - */ - if (radix_tree_deref_retry(page)) { - WARN_ON(start | i); - goto restart; + if (radix_tree_exception(page)) { + if (radix_tree_deref_retry(page)) { + /* + * Transient condition which can only trigger + * when entry at index 0 moves out of or back + * to root: none yet gotten, safe to restart. + */ + WARN_ON(start | i); + goto restart; + } + /* + * Otherwise, shmem/tmpfs must be storing a swap entry + * here as an exceptional entry: so skip over it - + * we only reach this from invalidate_mapping_pages(). + */ + continue; } if (!page_cache_get_speculative(page)) @@ -903,7 +904,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, rcu_read_lock(); restart: nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, - (void ***)pages, index, nr_pages); + (void ***)pages, NULL, index, nr_pages); ret = 0; for (i = 0; i < nr_found; i++) { struct page *page; @@ -912,12 +913,22 @@ repeat: if (unlikely(!page)) continue; - /* - * This can only trigger when the entry at index 0 moves out - * of or back to the root: none yet gotten, safe to restart. - */ - if (radix_tree_deref_retry(page)) - goto restart; + if (radix_tree_exception(page)) { + if (radix_tree_deref_retry(page)) { + /* + * Transient condition which can only trigger + * when entry at index 0 moves out of or back + * to root: none yet gotten, safe to restart. + */ + goto restart; + } + /* + * Otherwise, shmem/tmpfs must be storing a swap entry + * here as an exceptional entry: so stop looking for + * contiguous pages. + */ + break; + } if (!page_cache_get_speculative(page)) goto repeat; @@ -977,12 +988,21 @@ repeat: if (unlikely(!page)) continue; - /* - * This can only trigger when the entry at index 0 moves out - * of or back to the root: none yet gotten, safe to restart. - */ - if (radix_tree_deref_retry(page)) - goto restart; + if (radix_tree_exception(page)) { + if (radix_tree_deref_retry(page)) { + /* + * Transient condition which can only trigger + * when entry at index 0 moves out of or back + * to root: none yet gotten, safe to restart. + */ + goto restart; + } + /* + * This function is never used on a shmem/tmpfs + * mapping, so a swap entry won't be found here. + */ + BUG(); + } if (!page_cache_get_speculative(page)) goto repeat; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 5f84d2351ddb..f4ec4e7ca4cd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -35,7 +35,6 @@ #include <linux/limits.h> #include <linux/mutex.h> #include <linux/rbtree.h> -#include <linux/shmem_fs.h> #include <linux/slab.h> #include <linux/swap.h> #include <linux/swapops.h> @@ -2873,30 +2872,6 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, return 0; if (PageCompound(page)) return 0; - /* - * Corner case handling. This is called from add_to_page_cache() - * in usual. But some FS (shmem) precharges this page before calling it - * and call add_to_page_cache() with GFP_NOWAIT. - * - * For GFP_NOWAIT case, the page may be pre-charged before calling - * add_to_page_cache(). (See shmem.c) check it here and avoid to call - * charge twice. (It works but has to pay a bit larger cost.) - * And when the page is SwapCache, it should take swap information - * into account. This is under lock_page() now. - */ - if (!(gfp_mask & __GFP_WAIT)) { - struct page_cgroup *pc; - - pc = lookup_page_cgroup(page); - if (!pc) - return 0; - lock_page_cgroup(pc); - if (PageCgroupUsed(pc)) { - unlock_page_cgroup(pc); - return 0; - } - unlock_page_cgroup(pc); - } if (unlikely(!mm)) mm = &init_mm; @@ -3486,31 +3461,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, cgroup_release_and_wakeup_rmdir(&mem->css); } -/* - * A call to try to shrink memory usage on charge failure at shmem's swapin. - * Calling hierarchical_reclaim is not enough because we should update - * last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM. - * Moreover considering hierarchy, we should reclaim from the mem_over_limit, - * not from the memcg which this page would be charged to. - * try_charge_swapin does all of these works properly. - */ -int mem_cgroup_shmem_charge_fallback(struct page *page, - struct mm_struct *mm, - gfp_t gfp_mask) -{ - struct mem_cgroup *mem; - int ret; - - if (mem_cgroup_disabled()) - return 0; - - ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem); - if (!ret) - mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */ - - return ret; -} - #ifdef CONFIG_DEBUG_VM static struct page_cgroup *lookup_page_cgroup_used(struct page *page) { @@ -5330,15 +5280,17 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma, pgoff = pte_to_pgoff(ptent); /* page is moved even if it's not RSS of this task(page-faulted). */ - if (!mapping_cap_swap_backed(mapping)) { /* normal file */ - page = find_get_page(mapping, pgoff); - } else { /* shmem/tmpfs file. we should take account of swap too. */ - swp_entry_t ent; - mem_cgroup_get_shmem_target(inode, pgoff, &page, &ent); + page = find_get_page(mapping, pgoff); + +#ifdef CONFIG_SWAP + /* shmem/tmpfs may report page out on swap: account for that too. */ + if (radix_tree_exceptional_entry(page)) { + swp_entry_t swap = radix_to_swp_entry(page); if (do_swap_account) - entry->val = ent.val; + *entry = swap; + page = find_get_page(&swapper_space, swap.val); } - +#endif return page; } diff --git a/mm/mincore.c b/mm/mincore.c index a4e6b9d75c76..636a86876ff2 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -69,12 +69,15 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff) * file will not get a swp_entry_t in its pte, but rather it is like * any other file mapping (ie. marked !present and faulted in with * tmpfs's .fault). So swapped out tmpfs mappings are tested here. - * - * However when tmpfs moves the page from pagecache and into swapcache, - * it is still in core, but the find_get_page below won't find it. - * No big deal, but make a note of it. */ page = find_get_page(mapping, pgoff); +#ifdef CONFIG_SWAP + /* shmem/tmpfs may return swap: account for swapcache page too. */ + if (radix_tree_exceptional_entry(page)) { + swp_entry_t swap = radix_to_swp_entry(page); + page = find_get_page(&swapper_space, swap.val); + } +#endif if (page) { present = PageUptodate(page); page_cache_release(page); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1dbcf8888f14..6e8ecb6e021c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1409,14 +1409,11 @@ static int __init fail_page_alloc_debugfs(void) { mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; struct dentry *dir; - int err; - err = init_fault_attr_dentries(&fail_page_alloc.attr, - "fail_page_alloc"); - if (err) - return err; - - dir = fail_page_alloc.attr.dir; + dir = fault_create_debugfs_attr("fail_page_alloc", NULL, + &fail_page_alloc.attr); + if (IS_ERR(dir)) + return PTR_ERR(dir); if (!debugfs_create_bool("ignore-gfp-wait", mode, dir, &fail_page_alloc.ignore_gfp_wait)) @@ -1430,7 +1427,7 @@ static int __init fail_page_alloc_debugfs(void) return 0; fail: - cleanup_fault_attr_dentries(&fail_page_alloc.attr); + debugfs_remove_recursive(dir); return -ENOMEM; } diff --git a/mm/shmem.c b/mm/shmem.c index 5cc21f8b4cd3..32f6763f16fb 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -6,7 +6,8 @@ * 2000-2001 Christoph Rohland * 2000-2001 SAP AG * 2002 Red Hat Inc. - * Copyright (C) 2002-2005 Hugh Dickins. + * Copyright (C) 2002-2011 Hugh Dickins. + * Copyright (C) 2011 Google Inc. * Copyright (C) 2002-2005 VERITAS Software Corporation. * Copyright (C) 2004 Andi Kleen, SuSE Labs * @@ -28,7 +29,6 @@ #include <linux/file.h> #include <linux/mm.h> #include <linux/module.h> -#include <linux/percpu_counter.h> #include <linux/swap.h> static struct vfsmount *shm_mnt; @@ -51,6 +51,8 @@ static struct vfsmount *shm_mnt; #include <linux/shmem_fs.h> #include <linux/writeback.h> #include <linux/blkdev.h> +#include <linux/pagevec.h> +#include <linux/percpu_counter.h> #include <linux/splice.h> #include <linux/security.h> #include <linux/swapops.h> @@ -63,43 +65,17 @@ static struct vfsmount *shm_mnt; #include <linux/magic.h> #include <asm/uaccess.h> -#include <asm/div64.h> #include <asm/pgtable.h> -/* - * The maximum size of a shmem/tmpfs file is limited by the maximum size of - * its triple-indirect swap vector - see illustration at shmem_swp_entry(). - * - * With 4kB page size, maximum file size is just over 2TB on a 32-bit kernel, - * but one eighth of that on a 64-bit kernel. With 8kB page size, maximum - * file size is just over 4TB on a 64-bit kernel, but 16TB on a 32-bit kernel, - * MAX_LFS_FILESIZE being then more restrictive than swap vector layout. - * - * We use / and * instead of shifts in the definitions below, so that the swap - * vector can be tested with small even values (e.g. 20) for ENTRIES_PER_PAGE. - */ -#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) -#define ENTRIES_PER_PAGEPAGE ((unsigned long long)ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) - -#define SHMSWP_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1)) -#define SHMSWP_MAX_BYTES (SHMSWP_MAX_INDEX << PAGE_CACHE_SHIFT) - -#define SHMEM_MAX_BYTES min_t(unsigned long long, SHMSWP_MAX_BYTES, MAX_LFS_FILESIZE) -#define SHMEM_MAX_INDEX ((unsigned long)((SHMEM_MAX_BYTES+1) >> PAGE_CACHE_SHIFT)) - #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) #define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) -/* info->flags needs VM_flags to handle pagein/truncate races efficiently */ -#define SHMEM_PAGEIN VM_READ -#define SHMEM_TRUNCATE VM_WRITE - -/* Definition to limit shmem_truncate's steps between cond_rescheds */ -#define LATENCY_LIMIT 64 - /* Pretend that each entry is of this size in directory's i_size */ #define BOGO_DIRENT_SIZE 20 +/* Symlink up to this size is kmalloc'ed instead of using a swappable page */ +#define SHORT_SYMLINK_LEN 128 + struct shmem_xattr { struct list_head list; /* anchored by shmem_inode_info->xattr_list */ char *name; /* xattr name */ @@ -107,7 +83,7 @@ struct shmem_xattr { char value[0]; }; -/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */ +/* Flag allocation requirements to shmem_getpage */ enum sgp_type { SGP_READ, /* don't exceed i_size, don't allocate page */ SGP_CACHE, /* don't exceed i_size, may allocate page */ @@ -137,56 +113,6 @@ static inline int shmem_getpage(struct inode *inode, pgoff_t index, mapping_gfp_mask(inode->i_mapping), fault_type); } -static inline struct page *shmem_dir_alloc(gfp_t gfp_mask) -{ - /* - * The above definition of ENTRIES_PER_PAGE, and the use of - * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE: - * might be reconsidered if it ever diverges from PAGE_SIZE. - * - * Mobility flags are masked out as swap vectors cannot move - */ - return alloc_pages((gfp_mask & ~GFP_MOVABLE_MASK) | __GFP_ZERO, - PAGE_CACHE_SHIFT-PAGE_SHIFT); -} - -static inline void shmem_dir_free(struct page *page) -{ - __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT); -} - -static struct page **shmem_dir_map(struct page *page) -{ - return (struct page **)kmap_atomic(page, KM_USER0); -} - -static inline void shmem_dir_unmap(struct page **dir) -{ - kunmap_atomic(dir, KM_USER0); -} - -static swp_entry_t *shmem_swp_map(struct page *page) -{ - return (swp_entry_t *)kmap_atomic(page, KM_USER1); -} - -static inline void shmem_swp_balance_unmap(void) -{ - /* - * When passing a pointer to an i_direct entry, to code which - * also handles indirect entries and so will shmem_swp_unmap, - * we must arrange for the preempt count to remain in balance. - * What kmap_atomic of a lowmem page does depends on config - * and architecture, so pretend to kmap_atomic some lowmem page. - */ - (void) kmap_atomic(ZERO_PAGE(0), KM_USER1); -} - -static inline void shmem_swp_unmap(swp_entry_t *entry) -{ - kunmap_atomic(entry, KM_USER1); -} - static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) { return sb->s_fs_info; @@ -244,15 +170,6 @@ static struct backing_dev_info shmem_backing_dev_info __read_mostly = { static LIST_HEAD(shmem_swaplist); static DEFINE_MUTEX(shmem_swaplist_mutex); -static void shmem_free_blocks(struct inode *inode, long pages) -{ - struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); - if (sbinfo->max_blocks) { - percpu_counter_add(&sbinfo->used_blocks, -pages); - inode->i_blocks -= pages*BLOCKS_PER_PAGE; - } -} - static int shmem_reserve_inode(struct super_block *sb) { struct shmem_sb_info *sbinfo = SHMEM_SB(sb); @@ -279,7 +196,7 @@ static void shmem_free_inode(struct super_block *sb) } /** - * shmem_recalc_inode - recalculate the size of an inode + * shmem_recalc_inode - recalculate the block usage of an inode * @inode: inode to recalc * * We have to calculate the free blocks since the mm can drop @@ -297,474 +214,297 @@ static void shmem_recalc_inode(struct inode *inode) freed = info->alloced - info->swapped - inode->i_mapping->nrpages; if (freed > 0) { + struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); + if (sbinfo->max_blocks) + percpu_counter_add(&sbinfo->used_blocks, -freed); info->alloced -= freed; + inode->i_blocks -= freed * BLOCKS_PER_PAGE; shmem_unacct_blocks(info->flags, freed); - shmem_free_blocks(inode, freed); } } -/** - * shmem_swp_entry - find the swap vector position in the info structure - * @info: info structure for the inode - * @index: index of the page to find - * @page: optional page to add to the structure. Has to be preset to - * all zeros - * - * If there is no space allocated yet it will return NULL when - * page is NULL, else it will use the page for the needed block, - * setting it to NULL on return to indicate that it has been used. - * - * The swap vector is organized the following way: - * - * There are SHMEM_NR_DIRECT entries directly stored in the - * shmem_inode_info structure. So small files do not need an addional - * allocation. - * - * For pages with index > SHMEM_NR_DIRECT there is the pointer - * i_indirect which points to a page which holds in the first half - * doubly indirect blocks, in the second half triple indirect blocks: - * - * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the - * following layout (for SHMEM_NR_DIRECT == 16): - * - * i_indirect -> dir --> 16-19 - * | +-> 20-23 - * | - * +-->dir2 --> 24-27 - * | +-> 28-31 - * | +-> 32-35 - * | +-> 36-39 - * | - * +-->dir3 --> 40-43 - * +-> 44-47 - * +-> 48-51 - * +-> 52-55 +/* + * Replace item expected in radix tree by a new item, while holding tree lock. */ -static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page) -{ - unsigned long offset; - struct page **dir; - struct page *subdir; - - if (index < SHMEM_NR_DIRECT) { - shmem_swp_balance_unmap(); - return info->i_direct+index; - } - if (!info->i_indirect) { - if (page) { - info->i_indirect = *page; - *page = NULL; - } - return NULL; /* need another page */ - } - - index -= SHMEM_NR_DIRECT; - offset = index % ENTRIES_PER_PAGE; - index /= ENTRIES_PER_PAGE; - dir = shmem_dir_map(info->i_indirect); - - if (index >= ENTRIES_PER_PAGE/2) { - index -= ENTRIES_PER_PAGE/2; - dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE; - index %= ENTRIES_PER_PAGE; - subdir = *dir; - if (!subdir) { - if (page) { - *dir = *page; - *page = NULL; - } - shmem_dir_unmap(dir); - return NULL; /* need another page */ - } - shmem_dir_unmap(dir); - dir = shmem_dir_map(subdir); - } +static int shmem_radix_tree_replace(struct address_space *mapping, + pgoff_t index, void *expected, void *replacement) +{ + void **pslot; + void *item = NULL; + + VM_BUG_ON(!expected); + pslot = radix_tree_lookup_slot(&mapping->page_tree, index); + if (pslot) + item = radix_tree_deref_slot_protected(pslot, + &mapping->tree_lock); + if (item != expected) + return -ENOENT; + if (replacement) + radix_tree_replace_slot(pslot, replacement); + else + radix_tree_delete(&mapping->page_tree, index); + return 0; +} - dir += index; - subdir = *dir; - if (!subdir) { - if (!page || !(subdir = *page)) { - shmem_dir_unmap(dir); - return NULL; /* need a page */ +/* + * Like add_to_page_cache_locked, but error if expected item has gone. + */ +static int shmem_add_to_page_cache(struct page *page, + struct address_space *mapping, + pgoff_t index, gfp_t gfp, void *expected) +{ + int error = 0; + + VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON(!PageSwapBacked(page)); + + if (!expected) + error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); + if (!error) { + page_cache_get(page); + page->mapping = mapping; + page->index = index; + + spin_lock_irq(&mapping->tree_lock); + if (!expected) + error = radix_tree_insert(&mapping->page_tree, + index, page); + else + error = shmem_radix_tree_replace(mapping, index, + expected, page); + if (!error) { + mapping->nrpages++; + __inc_zone_page_state(page, NR_FILE_PAGES); + __inc_zone_page_state(page, NR_SHMEM); + spin_unlock_irq(&mapping->tree_lock); + } else { + page->mapping = NULL; + spin_unlock_irq(&mapping->tree_lock); + page_cache_release(page); } - *dir = subdir; - *page = NULL; + if (!expected) + radix_tree_preload_end(); } - shmem_dir_unmap(dir); - return shmem_swp_map(subdir) + offset; + if (error) + mem_cgroup_uncharge_cache_page(page); + return error; } -static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value) +/* + * Like delete_from_page_cache, but substitutes swap for page. + */ +static void shmem_delete_from_page_cache(struct page *page, void *radswap) { - long incdec = value? 1: -1; + struct address_space *mapping = page->mapping; + int error; - entry->val = value; - info->swapped += incdec; - if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) { - struct page *page = kmap_atomic_to_page(entry); - set_page_private(page, page_private(page) + incdec); - } + spin_lock_irq(&mapping->tree_lock); + error = shmem_radix_tree_replace(mapping, page->index, page, radswap); + page->mapping = NULL; + mapping->nrpages--; + __dec_zone_page_state(page, NR_FILE_PAGES); + __dec_zone_page_state(page, NR_SHMEM); + spin_unlock_irq(&mapping->tree_lock); + page_cache_release(page); + BUG_ON(error); } -/** - * shmem_swp_alloc - get the position of the swap entry for the page. - * @info: info structure for the inode - * @index: index of the page to find - * @sgp: check and recheck i_size? skip allocation? - * @gfp: gfp mask to use for any page allocation - * - * If the entry does not exist, allocate it. +/* + * Like find_get_pages, but collecting swap entries as well as pages. */ -static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, - unsigned long index, enum sgp_type sgp, gfp_t gfp) -{ - struct inode *inode = &info->vfs_inode; - struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); - struct page *page = NULL; - swp_entry_t *entry; - - if (sgp != SGP_WRITE && - ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) - return ERR_PTR(-EINVAL); - - while (!(entry = shmem_swp_entry(info, index, &page))) { - if (sgp == SGP_READ) - return shmem_swp_map(ZERO_PAGE(0)); - /* - * Test used_blocks against 1 less max_blocks, since we have 1 data - * page (and perhaps indirect index pages) yet to allocate: - * a waste to allocate index if we cannot allocate data. - */ - if (sbinfo->max_blocks) { - if (percpu_counter_compare(&sbinfo->used_blocks, - sbinfo->max_blocks - 1) >= 0) - return ERR_PTR(-ENOSPC); - percpu_counter_inc(&sbinfo->used_blocks); - inode->i_blocks += BLOCKS_PER_PAGE; +static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping, + pgoff_t start, unsigned int nr_pages, + struct page **pages, pgoff_t *indices) +{ + unsigned int i; + unsigned int ret; + unsigned int nr_found; + + rcu_read_lock(); +restart: + nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, + (void ***)pages, indices, start, nr_pages); + ret = 0; + for (i = 0; i < nr_found; i++) { + struct page *page; +repeat: + page = radix_tree_deref_slot((void **)pages[i]); + if (unlikely(!page)) + continue; + if (radix_tree_exception(page)) { + if (radix_tree_deref_retry(page)) + goto restart; + /* + * Otherwise, we must be storing a swap entry + * here as an exceptional entry: so return it + * without attempting to raise page count. + */ + goto export; } + if (!page_cache_get_speculative(page)) + goto repeat; - spin_unlock(&info->lock); - page = shmem_dir_alloc(gfp); - spin_lock(&info->lock); - - if (!page) { - shmem_free_blocks(inode, 1); - return ERR_PTR(-ENOMEM); - } - if (sgp != SGP_WRITE && - ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { - entry = ERR_PTR(-EINVAL); - break; + /* Has the page moved? */ + if (unlikely(page != *((void **)pages[i]))) { + page_cache_release(page); + goto repeat; } - if (info->next_index <= index) - info->next_index = index + 1; - } - if (page) { - /* another task gave its page, or truncated the file */ - shmem_free_blocks(inode, 1); - shmem_dir_free(page); - } - if (info->next_index <= index && !IS_ERR(entry)) - info->next_index = index + 1; - return entry; +export: + indices[ret] = indices[i]; + pages[ret] = page; + ret++; + } + if (unlikely(!ret && nr_found)) + goto restart; + rcu_read_unlock(); + return ret; } -/** - * shmem_free_swp - free some swap entries in a directory - * @dir: pointer to the directory - * @edir: pointer after last entry of the directory - * @punch_lock: pointer to spinlock when needed for the holepunch case +/* + * Remove swap entry from radix tree, free the swap and its page cache. */ -static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir, - spinlock_t *punch_lock) -{ - spinlock_t *punch_unlock = NULL; - swp_entry_t *ptr; - int freed = 0; - - for (ptr = dir; ptr < edir; ptr++) { - if (ptr->val) { - if (unlikely(punch_lock)) { - punch_unlock = punch_lock; - punch_lock = NULL; - spin_lock(punch_unlock); - if (!ptr->val) - continue; - } - free_swap_and_cache(*ptr); - *ptr = (swp_entry_t){0}; - freed++; - } - } - if (punch_unlock) - spin_unlock(punch_unlock); - return freed; -} - -static int shmem_map_and_free_swp(struct page *subdir, int offset, - int limit, struct page ***dir, spinlock_t *punch_lock) -{ - swp_entry_t *ptr; - int freed = 0; - - ptr = shmem_swp_map(subdir); - for (; offset < limit; offset += LATENCY_LIMIT) { - int size = limit - offset; - if (size > LATENCY_LIMIT) - size = LATENCY_LIMIT; - freed += shmem_free_swp(ptr+offset, ptr+offset+size, - punch_lock); - if (need_resched()) { - shmem_swp_unmap(ptr); - if (*dir) { - shmem_dir_unmap(*dir); - *dir = NULL; - } - cond_resched(); - ptr = shmem_swp_map(subdir); - } - } - shmem_swp_unmap(ptr); - return freed; +static int shmem_free_swap(struct address_space *mapping, + pgoff_t index, void *radswap) +{ + int error; + + spin_lock_irq(&mapping->tree_lock); + error = shmem_radix_tree_replace(mapping, index, radswap, NULL); + spin_unlock_irq(&mapping->tree_lock); + if (!error) + free_swap_and_cache(radix_to_swp_entry(radswap)); + return error; } -static void shmem_free_pages(struct list_head *next) +/* + * Pagevec may contain swap entries, so shuffle up pages before releasing. + */ +static void shmem_pagevec_release(struct pagevec *pvec) { - struct page *page; - int freed = 0; - - do { - page = container_of(next, struct page, lru); - next = next->next; - shmem_dir_free(page); - freed++; - if (freed >= LATENCY_LIMIT) { - cond_resched(); - freed = 0; - } - } while (next); + int i, j; + + for (i = 0, j = 0; i < pagevec_count(pvec); i++) { + struct page *page = pvec->pages[i]; + if (!radix_tree_exceptional_entry(page)) + pvec->pages[j++] = page; + } + pvec->nr = j; + pagevec_release(pvec); } -void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) +/* + * Remove range of pages and swap entries from radix tree, and free them. + */ +void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) { + struct address_space *mapping = inode->i_mapping; struct shmem_inode_info *info = SHMEM_I(inode); - unsigned long idx; - unsigned long size; - unsigned long limit; - unsigned long stage; - unsigned long diroff; - struct page **dir; - struct page *topdir; - struct page *middir; - struct page *subdir; - swp_entry_t *ptr; - LIST_HEAD(pages_to_free); - long nr_pages_to_free = 0; + pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); + pgoff_t end = (lend >> PAGE_CACHE_SHIFT); + struct pagevec pvec; + pgoff_t indices[PAGEVEC_SIZE]; long nr_swaps_freed = 0; - int offset; - int freed; - int punch_hole; - spinlock_t *needs_lock; - spinlock_t *punch_lock; - unsigned long upper_limit; + pgoff_t index; + int i; - truncate_inode_pages_range(inode->i_mapping, start, end); + BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); - inode->i_ctime = inode->i_mtime = CURRENT_TIME; - idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (idx >= info->next_index) - return; + pagevec_init(&pvec, 0); + index = start; + while (index <= end) { + pvec.nr = shmem_find_get_pages_and_swap(mapping, index, + min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, + pvec.pages, indices); + if (!pvec.nr) + break; + mem_cgroup_uncharge_start(); + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; - spin_lock(&info->lock); - info->flags |= SHMEM_TRUNCATE; - if (likely(end == (loff_t) -1)) { - limit = info->next_index; - upper_limit = SHMEM_MAX_INDEX; - info->next_index = idx; - needs_lock = NULL; - punch_hole = 0; - } else { - if (end + 1 >= inode->i_size) { /* we may free a little more */ - limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; - upper_limit = SHMEM_MAX_INDEX; - } else { - limit = (end + 1) >> PAGE_CACHE_SHIFT; - upper_limit = limit; - } - needs_lock = &info->lock; - punch_hole = 1; - } + index = indices[i]; + if (index > end) + break; + + if (radix_tree_exceptional_entry(page)) { + nr_swaps_freed += !shmem_free_swap(mapping, + index, page); + continue; + } - topdir = info->i_indirect; - if (topdir && idx <= SHMEM_NR_DIRECT && !punch_hole) { - info->i_indirect = NULL; - nr_pages_to_free++; - list_add(&topdir->lru, &pages_to_free); + if (!trylock_page(page)) + continue; + if (page->mapping == mapping) { + VM_BUG_ON(PageWriteback(page)); + truncate_inode_page(mapping, page); + } + unlock_page(page); + } + shmem_pagevec_release(&pvec); + mem_cgroup_uncharge_end(); + cond_resched(); + index++; } - spin_unlock(&info->lock); - if (info->swapped && idx < SHMEM_NR_DIRECT) { - ptr = info->i_direct; - size = limit; - if (size > SHMEM_NR_DIRECT) - size = SHMEM_NR_DIRECT; - nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock); + if (partial) { + struct page *page = NULL; + shmem_getpage(inode, start - 1, &page, SGP_READ, NULL); + if (page) { + zero_user_segment(page, partial, PAGE_CACHE_SIZE); + set_page_dirty(page); + unlock_page(page); + page_cache_release(page); + } } - /* - * If there are no indirect blocks or we are punching a hole - * below indirect blocks, nothing to be done. - */ - if (!topdir || limit <= SHMEM_NR_DIRECT) - goto done2; + index = start; + for ( ; ; ) { + cond_resched(); + pvec.nr = shmem_find_get_pages_and_swap(mapping, index, + min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, + pvec.pages, indices); + if (!pvec.nr) { + if (index == start) + break; + index = start; + continue; + } + if (index == start && indices[0] > end) { + shmem_pagevec_release(&pvec); + break; + } + mem_cgroup_uncharge_start(); + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; - /* - * The truncation case has already dropped info->lock, and we're safe - * because i_size and next_index have already been lowered, preventing - * access beyond. But in the punch_hole case, we still need to take - * the lock when updating the swap directory, because there might be - * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or - * shmem_writepage. However, whenever we find we can remove a whole - * directory page (not at the misaligned start or end of the range), - * we first NULLify its pointer in the level above, and then have no - * need to take the lock when updating its contents: needs_lock and - * punch_lock (either pointing to info->lock or NULL) manage this. - */ + index = indices[i]; + if (index > end) + break; - upper_limit -= SHMEM_NR_DIRECT; - limit -= SHMEM_NR_DIRECT; - idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0; - offset = idx % ENTRIES_PER_PAGE; - idx -= offset; - - dir = shmem_dir_map(topdir); - stage = ENTRIES_PER_PAGEPAGE/2; - if (idx < ENTRIES_PER_PAGEPAGE/2) { - middir = topdir; - diroff = idx/ENTRIES_PER_PAGE; - } else { - dir += ENTRIES_PER_PAGE/2; - dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE; - while (stage <= idx) - stage += ENTRIES_PER_PAGEPAGE; - middir = *dir; - if (*dir) { - diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) % - ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE; - if (!diroff && !offset && upper_limit >= stage) { - if (needs_lock) { - spin_lock(needs_lock); - *dir = NULL; - spin_unlock(needs_lock); - needs_lock = NULL; - } else - *dir = NULL; - nr_pages_to_free++; - list_add(&middir->lru, &pages_to_free); + if (radix_tree_exceptional_entry(page)) { + nr_swaps_freed += !shmem_free_swap(mapping, + index, page); + continue; } - shmem_dir_unmap(dir); - dir = shmem_dir_map(middir); - } else { - diroff = 0; - offset = 0; - idx = stage; - } - } - for (; idx < limit; idx += ENTRIES_PER_PAGE, diroff++) { - if (unlikely(idx == stage)) { - shmem_dir_unmap(dir); - dir = shmem_dir_map(topdir) + - ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE; - while (!*dir) { - dir++; - idx += ENTRIES_PER_PAGEPAGE; - if (idx >= limit) - goto done1; - } - stage = idx + ENTRIES_PER_PAGEPAGE; - middir = *dir; - if (punch_hole) - needs_lock = &info->lock; - if (upper_limit >= stage) { - if (needs_lock) { - spin_lock(needs_lock); - *dir = NULL; - spin_unlock(needs_lock); - needs_lock = NULL; - } else - *dir = NULL; - nr_pages_to_free++; - list_add(&middir->lru, &pages_to_free); + lock_page(page); + if (page->mapping == mapping) { + VM_BUG_ON(PageWriteback(page)); + truncate_inode_page(mapping, page); } - shmem_dir_unmap(dir); - cond_resched(); - dir = shmem_dir_map(middir); - diroff = 0; - } - punch_lock = needs_lock; - subdir = dir[diroff]; - if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) { - if (needs_lock) { - spin_lock(needs_lock); - dir[diroff] = NULL; - spin_unlock(needs_lock); - punch_lock = NULL; - } else - dir[diroff] = NULL; - nr_pages_to_free++; - list_add(&subdir->lru, &pages_to_free); - } - if (subdir && page_private(subdir) /* has swap entries */) { - size = limit - idx; - if (size > ENTRIES_PER_PAGE) - size = ENTRIES_PER_PAGE; - freed = shmem_map_and_free_swp(subdir, - offset, size, &dir, punch_lock); - if (!dir) - dir = shmem_dir_map(middir); - nr_swaps_freed += freed; - if (offset || punch_lock) { - spin_lock(&info->lock); - set_page_private(subdir, - page_private(subdir) - freed); - spin_unlock(&info->lock); - } else - BUG_ON(page_private(subdir) != freed); + unlock_page(page); } - offset = 0; - } -done1: - shmem_dir_unmap(dir); -done2: - if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) { - /* - * Call truncate_inode_pages again: racing shmem_unuse_inode - * may have swizzled a page in from swap since - * truncate_pagecache or generic_delete_inode did it, before we - * lowered next_index. Also, though shmem_getpage checks - * i_size before adding to cache, no recheck after: so fix the - * narrow window there too. - */ - truncate_inode_pages_range(inode->i_mapping, start, end); + shmem_pagevec_release(&pvec); + mem_cgroup_uncharge_end(); + index++; } spin_lock(&info->lock); - info->flags &= ~SHMEM_TRUNCATE; info->swapped -= nr_swaps_freed; - if (nr_pages_to_free) - shmem_free_blocks(inode, nr_pages_to_free); shmem_recalc_inode(inode); spin_unlock(&info->lock); - /* - * Empty swap vector directory pages to be freed? - */ - if (!list_empty(&pages_to_free)) { - pages_to_free.prev->next = NULL; - shmem_free_pages(pages_to_free.next); - } + inode->i_ctime = inode->i_mtime = CURRENT_TIME; } EXPORT_SYMBOL_GPL(shmem_truncate_range); @@ -780,37 +520,7 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr) if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { loff_t oldsize = inode->i_size; loff_t newsize = attr->ia_size; - struct page *page = NULL; - if (newsize < oldsize) { - /* - * If truncating down to a partial page, then - * if that page is already allocated, hold it - * in memory until the truncation is over, so - * truncate_partial_page cannot miss it were - * it assigned to swap. - */ - if (newsize & (PAGE_CACHE_SIZE-1)) { - (void) shmem_getpage(inode, - newsize >> PAGE_CACHE_SHIFT, - &page, SGP_READ, NULL); - if (page) - unlock_page(page); - } - /* - * Reset SHMEM_PAGEIN flag so that shmem_truncate can - * detect if any pages might have been added to cache - * after truncate_inode_pages. But we needn't bother - * if it's being fully truncated to zero-length: the - * nrpages check is efficient enough in that case. - */ - if (newsize) { - struct shmem_inode_info *info = SHMEM_I(inode); - spin_lock(&info->lock); - info->flags &= ~SHMEM_PAGEIN; - spin_unlock(&info->lock); - } - } if (newsize != oldsize) { i_size_write(inode, newsize); inode->i_ctime = inode->i_mtime = CURRENT_TIME; @@ -822,8 +532,6 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr) /* unmap again to remove racily COWed private pages */ unmap_mapping_range(inode->i_mapping, holebegin, 0, 1); } - if (page) - page_cache_release(page); } setattr_copy(inode, attr); @@ -848,7 +556,8 @@ static void shmem_evict_inode(struct inode *inode) list_del_init(&info->swaplist); mutex_unlock(&shmem_swaplist_mutex); } - } + } else + kfree(info->symlink); list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) { kfree(xattr->name); @@ -859,106 +568,27 @@ static void shmem_evict_inode(struct inode *inode) end_writeback(inode); } -static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir) -{ - swp_entry_t *ptr; - - for (ptr = dir; ptr < edir; ptr++) { - if (ptr->val == entry.val) - return ptr - dir; - } - return -1; -} - -static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) +/* + * If swap found in inode, free it and move page from swapcache to filecache. + */ +static int shmem_unuse_inode(struct shmem_inode_info *info, + swp_entry_t swap, struct page *page) { - struct address_space *mapping; - unsigned long idx; - unsigned long size; - unsigned long limit; - unsigned long stage; - struct page **dir; - struct page *subdir; - swp_entry_t *ptr; - int offset; + struct address_space *mapping = info->vfs_inode.i_mapping; + void *radswap; + pgoff_t index; int error; - idx = 0; - ptr = info->i_direct; - spin_lock(&info->lock); - if (!info->swapped) { - list_del_init(&info->swaplist); - goto lost2; - } - limit = info->next_index; - size = limit; - if (size > SHMEM_NR_DIRECT) - size = SHMEM_NR_DIRECT; - offset = shmem_find_swp(entry, ptr, ptr+size); - if (offset >= 0) { - shmem_swp_balance_unmap(); - goto found; - } - if (!info->i_indirect) - goto lost2; - - dir = shmem_dir_map(info->i_indirect); - stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2; - - for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) { - if (unlikely(idx == stage)) { - shmem_dir_unmap(dir-1); - if (cond_resched_lock(&info->lock)) { - /* check it has not been truncated */ - if (limit > info->next_index) { - limit = info->next_index; - if (idx >= limit) - goto lost2; - } - } - dir = shmem_dir_map(info->i_indirect) + - ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE; - while (!*dir) { - dir++; - idx += ENTRIES_PER_PAGEPAGE; - if (idx >= limit) - goto lost1; - } - stage = idx + ENTRIES_PER_PAGEPAGE; - subdir = *dir; - shmem_dir_unmap(dir); - dir = shmem_dir_map(subdir); - } - subdir = *dir; - if (subdir && page_private(subdir)) { - ptr = shmem_swp_map(subdir); - size = limit - idx; - if (size > ENTRIES_PER_PAGE) - size = ENTRIES_PER_PAGE; - offset = shmem_find_swp(entry, ptr, ptr+size); - shmem_swp_unmap(ptr); - if (offset >= 0) { - shmem_dir_unmap(dir); - ptr = shmem_swp_map(subdir); - goto found; - } - } - } -lost1: - shmem_dir_unmap(dir-1); -lost2: - spin_unlock(&info->lock); - return 0; -found: - idx += offset; - ptr += offset; + radswap = swp_to_radix_entry(swap); + index = radix_tree_locate_item(&mapping->page_tree, radswap); + if (index == -1) + return 0; /* * Move _head_ to start search for next from here. * But be careful: shmem_evict_inode checks list_empty without taking * mutex, and there's an instant in list_move_tail when info->swaplist - * would appear empty, if it were the only one on shmem_swaplist. We - * could avoid doing it if inode NULL; or use this minor optimization. + * would appear empty, if it were the only one on shmem_swaplist. */ if (shmem_swaplist.next != &info->swaplist) list_move_tail(&shmem_swaplist, &info->swaplist); @@ -968,29 +598,34 @@ found: * but also to hold up shmem_evict_inode(): so inode cannot be freed * beneath us (pagelock doesn't help until the page is in pagecache). */ - mapping = info->vfs_inode.i_mapping; - error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT); + error = shmem_add_to_page_cache(page, mapping, index, + GFP_NOWAIT, radswap); /* which does mem_cgroup_uncharge_cache_page on error */ if (error != -ENOMEM) { + /* + * Truncation and eviction use free_swap_and_cache(), which + * only does trylock page: if we raced, best clean up here. + */ delete_from_swap_cache(page); set_page_dirty(page); - info->flags |= SHMEM_PAGEIN; - shmem_swp_set(info, ptr, 0); - swap_free(entry); + if (!error) { + spin_lock(&info->lock); + info->swapped--; + spin_unlock(&info->lock); + swap_free(swap); + } error = 1; /* not an error, but entry was found */ } - shmem_swp_unmap(ptr); - spin_unlock(&info->lock); return error; } /* - * shmem_unuse() search for an eventually swapped out shmem page. + * Search through swapped inodes to find and replace swap by page. */ -int shmem_unuse(swp_entry_t entry, struct page *page) +int shmem_unuse(swp_entry_t swap, struct page *page) { - struct list_head *p, *next; + struct list_head *this, *next; struct shmem_inode_info *info; int found = 0; int error; @@ -999,32 +634,25 @@ int shmem_unuse(swp_entry_t entry, struct page *page) * Charge page using GFP_KERNEL while we can wait, before taking * the shmem_swaplist_mutex which might hold up shmem_writepage(). * Charged back to the user (not to caller) when swap account is used. - * add_to_page_cache() will be called with GFP_NOWAIT. */ error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); if (error) goto out; - /* - * Try to preload while we can wait, to not make a habit of - * draining atomic reserves; but don't latch on to this cpu, - * it's okay if sometimes we get rescheduled after this. - */ - error = radix_tree_preload(GFP_KERNEL); - if (error) - goto uncharge; - radix_tree_preload_end(); + /* No radix_tree_preload: swap entry keeps a place for page in tree */ mutex_lock(&shmem_swaplist_mutex); - list_for_each_safe(p, next, &shmem_swaplist) { - info = list_entry(p, struct shmem_inode_info, swaplist); - found = shmem_unuse_inode(info, entry, page); + list_for_each_safe(this, next, &shmem_swaplist) { + info = list_entry(this, struct shmem_inode_info, swaplist); + if (info->swapped) + found = shmem_unuse_inode(info, swap, page); + else + list_del_init(&info->swaplist); cond_resched(); if (found) break; } mutex_unlock(&shmem_swaplist_mutex); -uncharge: if (!found) mem_cgroup_uncharge_cache_page(page); if (found < 0) @@ -1041,10 +669,10 @@ out: static int shmem_writepage(struct page *page, struct writeback_control *wbc) { struct shmem_inode_info *info; - swp_entry_t *entry, swap; struct address_space *mapping; - unsigned long index; struct inode *inode; + swp_entry_t swap; + pgoff_t index; BUG_ON(!PageLocked(page)); mapping = page->mapping; @@ -1073,50 +701,32 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) /* * Add inode to shmem_unuse()'s list of swapped-out inodes, - * if it's not already there. Do it now because we cannot take - * mutex while holding spinlock, and must do so before the page - * is moved to swap cache, when its pagelock no longer protects + * if it's not already there. Do it now before the page is + * moved to swap cache, when its pagelock no longer protects * the inode from eviction. But don't unlock the mutex until - * we've taken the spinlock, because shmem_unuse_inode() will - * prune a !swapped inode from the swaplist under both locks. + * we've incremented swapped, because shmem_unuse_inode() will + * prune a !swapped inode from the swaplist under this mutex. */ mutex_lock(&shmem_swaplist_mutex); if (list_empty(&info->swaplist)) list_add_tail(&info->swaplist, &shmem_swaplist); - spin_lock(&info->lock); - mutex_unlock(&shmem_swaplist_mutex); - - if (index >= info->next_index) { - BUG_ON(!(info->flags & SHMEM_TRUNCATE)); - goto unlock; - } - entry = shmem_swp_entry(info, index, NULL); - if (entry->val) { - WARN_ON_ONCE(1); /* Still happens? Tell us about it! */ - free_swap_and_cache(*entry); - shmem_swp_set(info, entry, 0); - } - shmem_recalc_inode(inode); - if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) { - delete_from_page_cache(page); - shmem_swp_set(info, entry, swap.val); - shmem_swp_unmap(entry); swap_shmem_alloc(swap); + shmem_delete_from_page_cache(page, swp_to_radix_entry(swap)); + + spin_lock(&info->lock); + info->swapped++; + shmem_recalc_inode(inode); spin_unlock(&info->lock); + + mutex_unlock(&shmem_swaplist_mutex); BUG_ON(page_mapped(page)); swap_writepage(page, wbc); return 0; } - shmem_swp_unmap(entry); -unlock: - spin_unlock(&info->lock); - /* - * add_to_swap_cache() doesn't return -EEXIST, so we can safely - * clear SWAP_HAS_CACHE flag. - */ + mutex_unlock(&shmem_swaplist_mutex); swapcache_free(swap, NULL); redirty: set_page_dirty(page); @@ -1153,35 +763,33 @@ static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) } #endif /* CONFIG_TMPFS */ -static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp, - struct shmem_inode_info *info, unsigned long idx) +static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp, + struct shmem_inode_info *info, pgoff_t index) { struct mempolicy mpol, *spol; struct vm_area_struct pvma; - struct page *page; spol = mpol_cond_copy(&mpol, - mpol_shared_policy_lookup(&info->policy, idx)); + mpol_shared_policy_lookup(&info->policy, index)); /* Create a pseudo vma that just contains the policy */ pvma.vm_start = 0; - pvma.vm_pgoff = idx; + pvma.vm_pgoff = index; pvma.vm_ops = NULL; pvma.vm_policy = spol; - page = swapin_readahead(entry, gfp, &pvma, 0); - return page; + return swapin_readahead(swap, gfp, &pvma, 0); } static struct page *shmem_alloc_page(gfp_t gfp, - struct shmem_inode_info *info, unsigned long idx) + struct shmem_inode_info *info, pgoff_t index) { struct vm_area_struct pvma; /* Create a pseudo vma that just contains the policy */ pvma.vm_start = 0; - pvma.vm_pgoff = idx; + pvma.vm_pgoff = index; pvma.vm_ops = NULL; - pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx); + pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index); /* * alloc_page_vma() will drop the shared policy reference @@ -1190,19 +798,19 @@ static struct page *shmem_alloc_page(gfp_t gfp, } #else /* !CONFIG_NUMA */ #ifdef CONFIG_TMPFS -static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *p) +static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol) { } #endif /* CONFIG_TMPFS */ -static inline struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp, - struct shmem_inode_info *info, unsigned long idx) +static inline struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp, + struct shmem_inode_info *info, pgoff_t index) { - return swapin_readahead(entry, gfp, NULL, 0); + return swapin_readahead(swap, gfp, NULL, 0); } static inline struct page *shmem_alloc_page(gfp_t gfp, - struct shmem_inode_info *info, unsigned long idx) + struct shmem_inode_info *info, pgoff_t index) { return alloc_page(gfp); } @@ -1222,243 +830,190 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) * vm. If we swap it in we mark it dirty since we also free the swap * entry since a page cannot live in both the swap and page cache */ -static int shmem_getpage_gfp(struct inode *inode, pgoff_t idx, +static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type) { struct address_space *mapping = inode->i_mapping; - struct shmem_inode_info *info = SHMEM_I(inode); + struct shmem_inode_info *info; struct shmem_sb_info *sbinfo; struct page *page; - struct page *prealloc_page = NULL; - swp_entry_t *entry; swp_entry_t swap; int error; - int ret; + int once = 0; - if (idx >= SHMEM_MAX_INDEX) + if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT)) return -EFBIG; repeat: - page = find_lock_page(mapping, idx); - if (page) { + swap.val = 0; + page = find_lock_page(mapping, index); + if (radix_tree_exceptional_entry(page)) { + swap = radix_to_swp_entry(page); + page = NULL; + } + + if (sgp != SGP_WRITE && + ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { + error = -EINVAL; + goto failed; + } + + if (page || (sgp == SGP_READ && !swap.val)) { /* * Once we can get the page lock, it must be uptodate: * if there were an error in reading back from swap, * the page would not be inserted into the filecache. */ - BUG_ON(!PageUptodate(page)); - goto done; + BUG_ON(page && !PageUptodate(page)); + *pagep = page; + return 0; } /* - * Try to preload while we can wait, to not make a habit of - * draining atomic reserves; but don't latch on to this cpu. + * Fast cache lookup did not find it: + * bring it back from swap or allocate. */ - error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); - if (error) - goto out; - radix_tree_preload_end(); - - if (sgp != SGP_READ && !prealloc_page) { - prealloc_page = shmem_alloc_page(gfp, info, idx); - if (prealloc_page) { - SetPageSwapBacked(prealloc_page); - if (mem_cgroup_cache_charge(prealloc_page, - current->mm, GFP_KERNEL)) { - page_cache_release(prealloc_page); - prealloc_page = NULL; - } - } - } - - spin_lock(&info->lock); - shmem_recalc_inode(inode); - entry = shmem_swp_alloc(info, idx, sgp, gfp); - if (IS_ERR(entry)) { - spin_unlock(&info->lock); - error = PTR_ERR(entry); - goto out; - } - swap = *entry; + info = SHMEM_I(inode); + sbinfo = SHMEM_SB(inode->i_sb); if (swap.val) { /* Look it up and read it in.. */ page = lookup_swap_cache(swap); if (!page) { - shmem_swp_unmap(entry); - spin_unlock(&info->lock); /* here we actually do the io */ if (fault_type) *fault_type |= VM_FAULT_MAJOR; - page = shmem_swapin(swap, gfp, info, idx); + page = shmem_swapin(swap, gfp, info, index); if (!page) { - spin_lock(&info->lock); - entry = shmem_swp_alloc(info, idx, sgp, gfp); - if (IS_ERR(entry)) - error = PTR_ERR(entry); - else { - if (entry->val == swap.val) - error = -ENOMEM; - shmem_swp_unmap(entry); - } - spin_unlock(&info->lock); - if (error) - goto out; - goto repeat; + error = -ENOMEM; + goto failed; } - wait_on_page_locked(page); - page_cache_release(page); - goto repeat; } /* We have to do this with page locked to prevent races */ - if (!trylock_page(page)) { - shmem_swp_unmap(entry); - spin_unlock(&info->lock); - wait_on_page_locked(page); - page_cache_release(page); - goto repeat; - } - if (PageWriteback(page)) { - shmem_swp_unmap(entry); - spin_unlock(&info->lock); - wait_on_page_writeback(page); - unlock_page(page); - page_cache_release(page); - goto repeat; - } + lock_page(page); if (!PageUptodate(page)) { - shmem_swp_unmap(entry); - spin_unlock(&info->lock); - unlock_page(page); - page_cache_release(page); error = -EIO; - goto out; + goto failed; } - - error = add_to_page_cache_locked(page, mapping, - idx, GFP_NOWAIT); - if (error) { - shmem_swp_unmap(entry); - spin_unlock(&info->lock); - if (error == -ENOMEM) { - /* - * reclaim from proper memory cgroup and - * call memcg's OOM if needed. - */ - error = mem_cgroup_shmem_charge_fallback( - page, current->mm, gfp); - if (error) { - unlock_page(page); - page_cache_release(page); - goto out; - } - } - unlock_page(page); - page_cache_release(page); - goto repeat; + wait_on_page_writeback(page); + + /* Someone may have already done it for us */ + if (page->mapping) { + if (page->mapping == mapping && + page->index == index) + goto done; + error = -EEXIST; + goto failed; } - info->flags |= SHMEM_PAGEIN; - shmem_swp_set(info, entry, 0); - shmem_swp_unmap(entry); - delete_from_swap_cache(page); + error = mem_cgroup_cache_charge(page, current->mm, + gfp & GFP_RECLAIM_MASK); + if (!error) + error = shmem_add_to_page_cache(page, mapping, index, + gfp, swp_to_radix_entry(swap)); + if (error) + goto failed; + + spin_lock(&info->lock); + info->swapped--; + shmem_recalc_inode(inode); spin_unlock(&info->lock); + + delete_from_swap_cache(page); set_page_dirty(page); swap_free(swap); - } else if (sgp == SGP_READ) { - shmem_swp_unmap(entry); - page = find_get_page(mapping, idx); - if (page && !trylock_page(page)) { - spin_unlock(&info->lock); - wait_on_page_locked(page); - page_cache_release(page); - goto repeat; + } else { + if (shmem_acct_block(info->flags)) { + error = -ENOSPC; + goto failed; } - spin_unlock(&info->lock); - - } else if (prealloc_page) { - shmem_swp_unmap(entry); - sbinfo = SHMEM_SB(inode->i_sb); if (sbinfo->max_blocks) { if (percpu_counter_compare(&sbinfo->used_blocks, - sbinfo->max_blocks) >= 0 || - shmem_acct_block(info->flags)) - goto nospace; + sbinfo->max_blocks) >= 0) { + error = -ENOSPC; + goto unacct; + } percpu_counter_inc(&sbinfo->used_blocks); - inode->i_blocks += BLOCKS_PER_PAGE; - } else if (shmem_acct_block(info->flags)) - goto nospace; - - page = prealloc_page; - prealloc_page = NULL; - - entry = shmem_swp_alloc(info, idx, sgp, gfp); - if (IS_ERR(entry)) - error = PTR_ERR(entry); - else { - swap = *entry; - shmem_swp_unmap(entry); } - ret = error || swap.val; - if (ret) - mem_cgroup_uncharge_cache_page(page); - else - ret = add_to_page_cache_lru(page, mapping, - idx, GFP_NOWAIT); - /* - * At add_to_page_cache_lru() failure, - * uncharge will be done automatically. - */ - if (ret) { - shmem_unacct_blocks(info->flags, 1); - shmem_free_blocks(inode, 1); - spin_unlock(&info->lock); - page_cache_release(page); - if (error) - goto out; - goto repeat; + + page = shmem_alloc_page(gfp, info, index); + if (!page) { + error = -ENOMEM; + goto decused; } - info->flags |= SHMEM_PAGEIN; + SetPageSwapBacked(page); + __set_page_locked(page); + error = mem_cgroup_cache_charge(page, current->mm, + gfp & GFP_RECLAIM_MASK); + if (!error) + error = shmem_add_to_page_cache(page, mapping, index, + gfp, NULL); + if (error) + goto decused; + lru_cache_add_anon(page); + + spin_lock(&info->lock); info->alloced++; + inode->i_blocks += BLOCKS_PER_PAGE; + shmem_recalc_inode(inode); spin_unlock(&info->lock); + clear_highpage(page); flush_dcache_page(page); SetPageUptodate(page); if (sgp == SGP_DIRTY) set_page_dirty(page); - - } else { - spin_unlock(&info->lock); - error = -ENOMEM; - goto out; } done: - *pagep = page; - error = 0; -out: - if (prealloc_page) { - mem_cgroup_uncharge_cache_page(prealloc_page); - page_cache_release(prealloc_page); + /* Perhaps the file has been truncated since we checked */ + if (sgp != SGP_WRITE && + ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { + error = -EINVAL; + goto trunc; } - return error; + *pagep = page; + return 0; -nospace: /* - * Perhaps the page was brought in from swap between find_lock_page - * and taking info->lock? We allow for that at add_to_page_cache_lru, - * but must also avoid reporting a spurious ENOSPC while working on a - * full tmpfs. + * Error recovery. */ - page = find_get_page(mapping, idx); +trunc: + ClearPageDirty(page); + delete_from_page_cache(page); + spin_lock(&info->lock); + info->alloced--; + inode->i_blocks -= BLOCKS_PER_PAGE; spin_unlock(&info->lock); +decused: + if (sbinfo->max_blocks) + percpu_counter_add(&sbinfo->used_blocks, -1); +unacct: + shmem_unacct_blocks(info->flags, 1); +failed: + if (swap.val && error != -EINVAL) { + struct page *test = find_get_page(mapping, index); + if (test && !radix_tree_exceptional_entry(test)) + page_cache_release(test); + /* Have another try if the entry has changed */ + if (test != swp_to_radix_entry(swap)) + error = -EEXIST; + } if (page) { + unlock_page(page); page_cache_release(page); + } + if (error == -ENOSPC && !once++) { + info = SHMEM_I(inode); + spin_lock(&info->lock); + shmem_recalc_inode(inode); + spin_unlock(&info->lock); goto repeat; } - error = -ENOSPC; - goto out; + if (error == -EEXIST) + goto repeat; + return error; } static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) @@ -1467,9 +1022,6 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) int error; int ret = VM_FAULT_LOCKED; - if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) - return VM_FAULT_SIGBUS; - error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); if (error) return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); @@ -1482,20 +1034,20 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } #ifdef CONFIG_NUMA -static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new) +static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol) { - struct inode *i = vma->vm_file->f_path.dentry->d_inode; - return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new); + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol); } static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, unsigned long addr) { - struct inode *i = vma->vm_file->f_path.dentry->d_inode; - unsigned long idx; + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + pgoff_t index; - idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx); + index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index); } #endif @@ -1593,7 +1145,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode #ifdef CONFIG_TMPFS static const struct inode_operations shmem_symlink_inode_operations; -static const struct inode_operations shmem_symlink_inline_operations; +static const struct inode_operations shmem_short_symlink_operations; static int shmem_write_begin(struct file *file, struct address_space *mapping, @@ -1626,7 +1178,8 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ { struct inode *inode = filp->f_path.dentry->d_inode; struct address_space *mapping = inode->i_mapping; - unsigned long index, offset; + pgoff_t index; + unsigned long offset; enum sgp_type sgp = SGP_READ; /* @@ -1642,7 +1195,8 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ for (;;) { struct page *page = NULL; - unsigned long end_index, nr, ret; + pgoff_t end_index; + unsigned long nr, ret; loff_t i_size = i_size_read(inode); end_index = i_size >> PAGE_CACHE_SHIFT; @@ -1880,8 +1434,9 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_namelen = NAME_MAX; if (sbinfo->max_blocks) { buf->f_blocks = sbinfo->max_blocks; - buf->f_bavail = buf->f_bfree = - sbinfo->max_blocks - percpu_counter_sum(&sbinfo->used_blocks); + buf->f_bavail = + buf->f_bfree = sbinfo->max_blocks - + percpu_counter_sum(&sbinfo->used_blocks); } if (sbinfo->max_inodes) { buf->f_files = sbinfo->max_inodes; @@ -2055,10 +1610,13 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s info = SHMEM_I(inode); inode->i_size = len-1; - if (len <= SHMEM_SYMLINK_INLINE_LEN) { - /* do it inline */ - memcpy(info->inline_symlink, symname, len); - inode->i_op = &shmem_symlink_inline_operations; + if (len <= SHORT_SYMLINK_LEN) { + info->symlink = kmemdup(symname, len, GFP_KERNEL); + if (!info->symlink) { + iput(inode); + return -ENOMEM; + } + inode->i_op = &shmem_short_symlink_operations; } else { error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL); if (error) { @@ -2081,17 +1639,17 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s return 0; } -static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd) +static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd) { - nd_set_link(nd, SHMEM_I(dentry->d_inode)->inline_symlink); + nd_set_link(nd, SHMEM_I(dentry->d_inode)->symlink); return NULL; } static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd) { struct page *page = NULL; - int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL); - nd_set_link(nd, res ? ERR_PTR(res) : kmap(page)); + int error = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL); + nd_set_link(nd, error ? ERR_PTR(error) : kmap(page)); if (page) unlock_page(page); return page; @@ -2202,7 +1760,6 @@ out: return err; } - static const struct xattr_handler *shmem_xattr_handlers[] = { #ifdef CONFIG_TMPFS_POSIX_ACL &generic_acl_access_handler, @@ -2332,9 +1889,9 @@ static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size) } #endif /* CONFIG_TMPFS_XATTR */ -static const struct inode_operations shmem_symlink_inline_operations = { +static const struct inode_operations shmem_short_symlink_operations = { .readlink = generic_readlink, - .follow_link = shmem_follow_link_inline, + .follow_link = shmem_follow_short_symlink, #ifdef CONFIG_TMPFS_XATTR .setxattr = shmem_setxattr, .getxattr = shmem_getxattr, @@ -2534,8 +2091,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) if (config.max_inodes < inodes) goto out; /* - * Those tests also disallow limited->unlimited while any are in - * use, so i_blocks will always be zero when max_blocks is zero; + * Those tests disallow limited->unlimited while any are in use; * but we must separately disallow unlimited->limited, because * in that case we have no record of how much is already in use. */ @@ -2627,7 +2183,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) goto failed; sbinfo->free_inodes = sbinfo->max_inodes; - sb->s_maxbytes = SHMEM_MAX_BYTES; + sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = TMPFS_MAGIC; @@ -2662,14 +2218,14 @@ static struct kmem_cache *shmem_inode_cachep; static struct inode *shmem_alloc_inode(struct super_block *sb) { - struct shmem_inode_info *p; - p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL); - if (!p) + struct shmem_inode_info *info; + info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL); + if (!info) return NULL; - return &p->vfs_inode; + return &info->vfs_inode; } -static void shmem_i_callback(struct rcu_head *head) +static void shmem_destroy_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); INIT_LIST_HEAD(&inode->i_dentry); @@ -2678,29 +2234,26 @@ static void shmem_i_callback(struct rcu_head *head) static void shmem_destroy_inode(struct inode *inode) { - if ((inode->i_mode & S_IFMT) == S_IFREG) { - /* only struct inode is valid if it's an inline symlink */ + if ((inode->i_mode & S_IFMT) == S_IFREG) mpol_free_shared_policy(&SHMEM_I(inode)->policy); - } - call_rcu(&inode->i_rcu, shmem_i_callback); + call_rcu(&inode->i_rcu, shmem_destroy_callback); } -static void init_once(void *foo) +static void shmem_init_inode(void *foo) { - struct shmem_inode_info *p = (struct shmem_inode_info *) foo; - - inode_init_once(&p->vfs_inode); + struct shmem_inode_info *info = foo; + inode_init_once(&info->vfs_inode); } -static int init_inodecache(void) +static int shmem_init_inodecache(void) { shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", sizeof(struct shmem_inode_info), - 0, SLAB_PANIC, init_once); + 0, SLAB_PANIC, shmem_init_inode); return 0; } -static void destroy_inodecache(void) +static void shmem_destroy_inodecache(void) { kmem_cache_destroy(shmem_inode_cachep); } @@ -2797,21 +2350,20 @@ static const struct vm_operations_struct shmem_vm_ops = { #endif }; - static struct dentry *shmem_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { return mount_nodev(fs_type, flags, data, shmem_fill_super); } -static struct file_system_type tmpfs_fs_type = { +static struct file_system_type shmem_fs_type = { .owner = THIS_MODULE, .name = "tmpfs", .mount = shmem_mount, .kill_sb = kill_litter_super, }; -int __init init_tmpfs(void) +int __init shmem_init(void) { int error; @@ -2819,18 +2371,18 @@ int __init init_tmpfs(void) if (error) goto out4; - error = init_inodecache(); + error = shmem_init_inodecache(); if (error) goto out3; - error = register_filesystem(&tmpfs_fs_type); + error = register_filesystem(&shmem_fs_type); if (error) { printk(KERN_ERR "Could not register tmpfs\n"); goto out2; } - shm_mnt = vfs_kern_mount(&tmpfs_fs_type, MS_NOUSER, - tmpfs_fs_type.name, NULL); + shm_mnt = vfs_kern_mount(&shmem_fs_type, MS_NOUSER, + shmem_fs_type.name, NULL); if (IS_ERR(shm_mnt)) { error = PTR_ERR(shm_mnt); printk(KERN_ERR "Could not kern_mount tmpfs\n"); @@ -2839,9 +2391,9 @@ int __init init_tmpfs(void) return 0; out1: - unregister_filesystem(&tmpfs_fs_type); + unregister_filesystem(&shmem_fs_type); out2: - destroy_inodecache(); + shmem_destroy_inodecache(); out3: bdi_destroy(&shmem_backing_dev_info); out4: @@ -2849,45 +2401,6 @@ out4: return error; } -#ifdef CONFIG_CGROUP_MEM_RES_CTLR -/** - * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file - * @inode: the inode to be searched - * @pgoff: the offset to be searched - * @pagep: the pointer for the found page to be stored - * @ent: the pointer for the found swap entry to be stored - * - * If a page is found, refcount of it is incremented. Callers should handle - * these refcount. - */ -void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, - struct page **pagep, swp_entry_t *ent) -{ - swp_entry_t entry = { .val = 0 }, *ptr; - struct page *page = NULL; - struct shmem_inode_info *info = SHMEM_I(inode); - - if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) - goto out; - - spin_lock(&info->lock); - ptr = shmem_swp_entry(info, pgoff, NULL); -#ifdef CONFIG_SWAP - if (ptr && ptr->val) { - entry.val = ptr->val; - page = find_get_page(&swapper_space, entry.val); - } else -#endif - page = find_get_page(inode->i_mapping, pgoff); - if (ptr) - shmem_swp_unmap(ptr); - spin_unlock(&info->lock); -out: - *pagep = page; - *ent = entry; -} -#endif - #else /* !CONFIG_SHMEM */ /* @@ -2901,23 +2414,23 @@ out: #include <linux/ramfs.h> -static struct file_system_type tmpfs_fs_type = { +static struct file_system_type shmem_fs_type = { .name = "tmpfs", .mount = ramfs_mount, .kill_sb = kill_litter_super, }; -int __init init_tmpfs(void) +int __init shmem_init(void) { - BUG_ON(register_filesystem(&tmpfs_fs_type) != 0); + BUG_ON(register_filesystem(&shmem_fs_type) != 0); - shm_mnt = kern_mount(&tmpfs_fs_type); + shm_mnt = kern_mount(&shmem_fs_type); BUG_ON(IS_ERR(shm_mnt)); return 0; } -int shmem_unuse(swp_entry_t entry, struct page *page) +int shmem_unuse(swp_entry_t swap, struct page *page) { return 0; } @@ -2927,43 +2440,17 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user) return 0; } -void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) +void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) { - truncate_inode_pages_range(inode->i_mapping, start, end); + truncate_inode_pages_range(inode->i_mapping, lstart, lend); } EXPORT_SYMBOL_GPL(shmem_truncate_range); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR -/** - * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file - * @inode: the inode to be searched - * @pgoff: the offset to be searched - * @pagep: the pointer for the found page to be stored - * @ent: the pointer for the found swap entry to be stored - * - * If a page is found, refcount of it is incremented. Callers should handle - * these refcount. - */ -void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, - struct page **pagep, swp_entry_t *ent) -{ - struct page *page = NULL; - - if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) - goto out; - page = find_get_page(inode->i_mapping, pgoff); -out: - *pagep = page; - *ent = (swp_entry_t){ .val = 0 }; -} -#endif - #define shmem_vm_ops generic_file_vm_ops #define shmem_file_operations ramfs_file_operations #define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev) #define shmem_acct_size(flags, size) 0 #define shmem_unacct_size(flags, size) do {} while (0) -#define SHMEM_MAX_BYTES MAX_LFS_FILESIZE #endif /* CONFIG_SHMEM */ @@ -2987,7 +2474,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags if (IS_ERR(shm_mnt)) return (void *)shm_mnt; - if (size < 0 || size > SHMEM_MAX_BYTES) + if (size < 0 || size > MAX_LFS_FILESIZE) return ERR_PTR(-EINVAL); if (shmem_acct_size(flags, size)) diff --git a/mm/swapfile.c b/mm/swapfile.c index 1b8c33907242..17bc224bce68 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1924,20 +1924,24 @@ static unsigned long read_swap_header(struct swap_info_struct *p, /* * Find out how many pages are allowed for a single swap - * device. There are two limiting factors: 1) the number of - * bits for the swap offset in the swp_entry_t type and - * 2) the number of bits in the a swap pte as defined by - * the different architectures. In order to find the - * largest possible bit mask a swap entry with swap type 0 + * device. There are three limiting factors: 1) the number + * of bits for the swap offset in the swp_entry_t type, and + * 2) the number of bits in the swap pte as defined by the + * the different architectures, and 3) the number of free bits + * in an exceptional radix_tree entry. In order to find the + * largest possible bit mask, a swap entry with swap type 0 * and swap offset ~0UL is created, encoded to a swap pte, - * decoded to a swp_entry_t again and finally the swap + * decoded to a swp_entry_t again, and finally the swap * offset is extracted. This will mask all the bits from * the initial ~0UL mask that can't be encoded in either * the swp_entry_t or the architecture definition of a - * swap pte. + * swap pte. Then the same is done for a radix_tree entry. */ maxpages = swp_offset(pte_to_swp_entry( - swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; + swp_entry_to_pte(swp_entry(0, ~0UL)))); + maxpages = swp_offset(radix_to_swp_entry( + swp_to_radix_entry(swp_entry(0, maxpages)))) + 1; + if (maxpages > swap_header->info.last_page) { maxpages = swap_header->info.last_page + 1; /* p->max is an unsigned int: don't overflow it */ diff --git a/mm/truncate.c b/mm/truncate.c index 232eb2736a79..b40ac6d4e86e 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -336,6 +336,14 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, unsigned long count = 0; int i; + /* + * Note: this function may get called on a shmem/tmpfs mapping: + * pagevec_lookup() might then return 0 prematurely (because it + * got a gangful of swap entries); but it's hardly worth worrying + * about - it can rarely have anything to free from such a mapping + * (most pages are dirty), and already skips over any difficulties. + */ + pagevec_init(&pvec, 0); while (index <= end && pagevec_lookup(&pvec, mapping, index, min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c index 5fb2e28e796f..91cdf9435fec 100644 --- a/sound/core/pcm_compat.c +++ b/sound/core/pcm_compat.c @@ -342,7 +342,7 @@ static int snd_pcm_ioctl_xfern_compat(struct snd_pcm_substream *substream, kfree(bufs); return -EFAULT; } - bufs[ch] = compat_ptr(ptr); + bufs[i] = compat_ptr(ptr); bufptr++; } if (dir == SNDRV_PCM_STREAM_PLAYBACK) diff --git a/sound/core/rtctimer.c b/sound/core/rtctimer.c index 0851cd13e303..e85e72baff9e 100644 --- a/sound/core/rtctimer.c +++ b/sound/core/rtctimer.c @@ -22,7 +22,7 @@ #include <linux/init.h> #include <linux/interrupt.h> -#include <linux/moduleparam.h> +#include <linux/module.h> #include <linux/log2.h> #include <sound/core.h> #include <sound/timer.h> diff --git a/sound/pci/asihpi/hpidspcd.c b/sound/pci/asihpi/hpidspcd.c index 3a7afa31c1d8..71d32c868c92 100644 --- a/sound/pci/asihpi/hpidspcd.c +++ b/sound/pci/asihpi/hpidspcd.c @@ -43,6 +43,7 @@ short hpi_dsp_code_open(u32 adapter, void *os_data, struct dsp_code *dsp_code, struct pci_dev *dev = os_data; struct code_header header; char fw_name[20]; + short err_ret = HPI_ERROR_DSP_FILE_NOT_FOUND; int err; sprintf(fw_name, "asihpi/dsp%04x.bin", adapter); @@ -85,8 +86,10 @@ short hpi_dsp_code_open(u32 adapter, void *os_data, struct dsp_code *dsp_code, HPI_DEBUG_LOG(DEBUG, "dsp code %s opened\n", fw_name); dsp_code->pvt = kmalloc(sizeof(*dsp_code->pvt), GFP_KERNEL); - if (!dsp_code->pvt) - return HPI_ERROR_MEMORY_ALLOC; + if (!dsp_code->pvt) { + err_ret = HPI_ERROR_MEMORY_ALLOC; + goto error2; + } dsp_code->pvt->dev = dev; dsp_code->pvt->firmware = firmware; @@ -99,7 +102,7 @@ error2: release_firmware(firmware); error1: dsp_code->block_length = 0; - return HPI_ERROR_DSP_FILE_NOT_FOUND; + return err_ret; } /*-------------------------------------------------------------------*/ diff --git a/sound/pci/asihpi/hpioctl.c b/sound/pci/asihpi/hpioctl.c index 9683f84ecdc8..a32502e796de 100644 --- a/sound/pci/asihpi/hpioctl.c +++ b/sound/pci/asihpi/hpioctl.c @@ -177,16 +177,21 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } else { u16 __user *ptr = NULL; u32 size = 0; - + u32 adapter_present; /* -1=no data 0=read from user mem, 1=write to user mem */ int wrflag = -1; - u32 adapter = hm->h.adapter_index; - struct hpi_adapter *pa = &adapters[adapter]; + struct hpi_adapter *pa; + + if (hm->h.adapter_index < HPI_MAX_ADAPTERS) { + pa = &adapters[hm->h.adapter_index]; + adapter_present = pa->type; + } else { + adapter_present = 0; + } - if ((adapter >= HPI_MAX_ADAPTERS) || (!pa->type)) { - hpi_init_response(&hr->r0, HPI_OBJ_ADAPTER, - HPI_ADAPTER_OPEN, - HPI_ERROR_BAD_ADAPTER_NUMBER); + if (!adapter_present) { + hpi_init_response(&hr->r0, hm->h.object, + hm->h.function, HPI_ERROR_BAD_ADAPTER_NUMBER); uncopied_bytes = copy_to_user(puhr, hr, sizeof(hr->h)); diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index af130ee0c45d..6edc67ced905 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -521,6 +521,7 @@ MODULE_SUPPORTED_DEVICE("{{RME HDSPM-MADI}}"); #define HDSPM_DMA_AREA_KILOBYTES (HDSPM_DMA_AREA_BYTES/1024) /* revisions >= 230 indicate AES32 card */ +#define HDSPM_MADI_ANCIENT_REV 204 #define HDSPM_MADI_OLD_REV 207 #define HDSPM_MADI_REV 210 #define HDSPM_RAYDAT_REV 211 @@ -1217,6 +1218,22 @@ static int hdspm_external_sample_rate(struct hdspm *hdspm) rate = 0; break; } + + /* QS and DS rates normally can not be detected + * automatically by the card. Only exception is MADI + * in 96k frame mode. + * + * So if we read SS values (32 .. 48k), check for + * user-provided DS/QS bits in the control register + * and multiply the base frequency accordingly. + */ + if (rate <= 48000) { + if (hdspm->control_register & HDSPM_QuadSpeed) + rate *= 4; + else if (hdspm->control_register & + HDSPM_DoubleSpeed) + rate *= 2; + } } break; } @@ -3415,6 +3432,91 @@ static int snd_hdspm_put_qs_wire(struct snd_kcontrol *kcontrol, return change; } +#define HDSPM_MADI_SPEEDMODE(xname, xindex) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ + .name = xname, \ + .index = xindex, \ + .info = snd_hdspm_info_madi_speedmode, \ + .get = snd_hdspm_get_madi_speedmode, \ + .put = snd_hdspm_put_madi_speedmode \ +} + +static int hdspm_madi_speedmode(struct hdspm *hdspm) +{ + if (hdspm->control_register & HDSPM_QuadSpeed) + return 2; + if (hdspm->control_register & HDSPM_DoubleSpeed) + return 1; + return 0; +} + +static int hdspm_set_madi_speedmode(struct hdspm *hdspm, int mode) +{ + hdspm->control_register &= ~(HDSPM_DoubleSpeed | HDSPM_QuadSpeed); + switch (mode) { + case 0: + break; + case 1: + hdspm->control_register |= HDSPM_DoubleSpeed; + break; + case 2: + hdspm->control_register |= HDSPM_QuadSpeed; + break; + } + hdspm_write(hdspm, HDSPM_controlRegister, hdspm->control_register); + + return 0; +} + +static int snd_hdspm_info_madi_speedmode(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo) +{ + static char *texts[] = { "Single", "Double", "Quad" }; + + uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; + uinfo->count = 1; + uinfo->value.enumerated.items = 3; + + if (uinfo->value.enumerated.item >= uinfo->value.enumerated.items) + uinfo->value.enumerated.item = + uinfo->value.enumerated.items - 1; + strcpy(uinfo->value.enumerated.name, + texts[uinfo->value.enumerated.item]); + + return 0; +} + +static int snd_hdspm_get_madi_speedmode(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct hdspm *hdspm = snd_kcontrol_chip(kcontrol); + + spin_lock_irq(&hdspm->lock); + ucontrol->value.enumerated.item[0] = hdspm_madi_speedmode(hdspm); + spin_unlock_irq(&hdspm->lock); + return 0; +} + +static int snd_hdspm_put_madi_speedmode(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct hdspm *hdspm = snd_kcontrol_chip(kcontrol); + int change; + int val; + + if (!snd_hdspm_use_is_exclusive(hdspm)) + return -EBUSY; + val = ucontrol->value.integer.value[0]; + if (val < 0) + val = 0; + if (val > 2) + val = 2; + spin_lock_irq(&hdspm->lock); + change = val != hdspm_madi_speedmode(hdspm); + hdspm_set_madi_speedmode(hdspm, val); + spin_unlock_irq(&hdspm->lock); + return change; +} #define HDSPM_MIXER(xname, xindex) \ { .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ @@ -4289,7 +4391,8 @@ static struct snd_kcontrol_new snd_hdspm_controls_madi[] = { HDSPM_TX_64("TX 64 channels mode", 0), HDSPM_C_TMS("Clear Track Marker", 0), HDSPM_SAFE_MODE("Safe Mode", 0), - HDSPM_INPUT_SELECT("Input Select", 0) + HDSPM_INPUT_SELECT("Input Select", 0), + HDSPM_MADI_SPEEDMODE("MADI Speed Mode", 0) }; @@ -4302,7 +4405,8 @@ static struct snd_kcontrol_new snd_hdspm_controls_madiface[] = { HDSPM_SYNC_CHECK("MADI SyncCheck", 0), HDSPM_TX_64("TX 64 channels mode", 0), HDSPM_C_TMS("Clear Track Marker", 0), - HDSPM_SAFE_MODE("Safe Mode", 0) + HDSPM_SAFE_MODE("Safe Mode", 0), + HDSPM_MADI_SPEEDMODE("MADI Speed Mode", 0) }; static struct snd_kcontrol_new snd_hdspm_controls_aio[] = { @@ -6381,6 +6485,7 @@ static int __devinit snd_hdspm_create(struct snd_card *card, switch (hdspm->firmware_rev) { case HDSPM_MADI_REV: case HDSPM_MADI_OLD_REV: + case HDSPM_MADI_ANCIENT_REV: hdspm->io_type = MADI; hdspm->card_name = "RME MADI"; hdspm->midiPorts = 3; diff --git a/sound/soc/txx9/txx9aclc.c b/sound/soc/txx9/txx9aclc.c index 34aa972669ed..3de99af8cb82 100644 --- a/sound/soc/txx9/txx9aclc.c +++ b/sound/soc/txx9/txx9aclc.c @@ -290,6 +290,7 @@ static void txx9aclc_pcm_free_dma_buffers(struct snd_pcm *pcm) static int txx9aclc_pcm_new(struct snd_soc_pcm_runtime *rtd) { + struct snd_card *card = rtd->card->snd_card; struct snd_soc_dai *dai = rtd->cpu_dai; struct snd_pcm *pcm = rtd->pcm; struct platform_device *pdev = to_platform_device(dai->platform->dev); diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 6d8ef4a3a9b5..8b2d37b59c9e 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -128,34 +128,34 @@ unsigned long long get_msr(int cpu, off_t offset) void print_header(void) { if (show_pkg) - fprintf(stderr, "pkg "); + fprintf(stderr, "pk"); if (show_core) - fprintf(stderr, "core"); + fprintf(stderr, " cr"); if (show_cpu) fprintf(stderr, " CPU"); if (do_nhm_cstates) - fprintf(stderr, " %%c0 "); + fprintf(stderr, " %%c0 "); if (has_aperf) - fprintf(stderr, " GHz"); + fprintf(stderr, " GHz"); fprintf(stderr, " TSC"); if (do_nhm_cstates) - fprintf(stderr, " %%c1 "); + fprintf(stderr, " %%c1"); if (do_nhm_cstates) - fprintf(stderr, " %%c3 "); + fprintf(stderr, " %%c3"); if (do_nhm_cstates) - fprintf(stderr, " %%c6 "); + fprintf(stderr, " %%c6"); if (do_snb_cstates) - fprintf(stderr, " %%c7 "); + fprintf(stderr, " %%c7"); if (do_snb_cstates) - fprintf(stderr, " %%pc2 "); + fprintf(stderr, " %%pc2"); if (do_nhm_cstates) - fprintf(stderr, " %%pc3 "); + fprintf(stderr, " %%pc3"); if (do_nhm_cstates) - fprintf(stderr, " %%pc6 "); + fprintf(stderr, " %%pc6"); if (do_snb_cstates) - fprintf(stderr, " %%pc7 "); + fprintf(stderr, " %%pc7"); if (extra_msr_offset) - fprintf(stderr, " MSR 0x%x ", extra_msr_offset); + fprintf(stderr, " MSR 0x%x ", extra_msr_offset); putc('\n', stderr); } @@ -194,14 +194,14 @@ void print_cnt(struct counters *p) /* topology columns, print blanks on 1st (average) line */ if (p == cnt_average) { if (show_pkg) - fprintf(stderr, " "); + fprintf(stderr, " "); if (show_core) fprintf(stderr, " "); if (show_cpu) fprintf(stderr, " "); } else { if (show_pkg) - fprintf(stderr, "%4d", p->pkg); + fprintf(stderr, "%d", p->pkg); if (show_core) fprintf(stderr, "%4d", p->core); if (show_cpu) @@ -241,22 +241,22 @@ void print_cnt(struct counters *p) if (!skip_c1) fprintf(stderr, "%7.2f", 100.0 * p->c1/p->tsc); else - fprintf(stderr, " ****"); + fprintf(stderr, " ****"); } if (do_nhm_cstates) - fprintf(stderr, "%7.2f", 100.0 * p->c3/p->tsc); + fprintf(stderr, " %6.2f", 100.0 * p->c3/p->tsc); if (do_nhm_cstates) - fprintf(stderr, "%7.2f", 100.0 * p->c6/p->tsc); + fprintf(stderr, " %6.2f", 100.0 * p->c6/p->tsc); if (do_snb_cstates) - fprintf(stderr, "%7.2f", 100.0 * p->c7/p->tsc); + fprintf(stderr, " %6.2f", 100.0 * p->c7/p->tsc); if (do_snb_cstates) - fprintf(stderr, "%7.2f", 100.0 * p->pc2/p->tsc); + fprintf(stderr, " %5.2f", 100.0 * p->pc2/p->tsc); if (do_nhm_cstates) - fprintf(stderr, "%7.2f", 100.0 * p->pc3/p->tsc); + fprintf(stderr, " %5.2f", 100.0 * p->pc3/p->tsc); if (do_nhm_cstates) - fprintf(stderr, "%7.2f", 100.0 * p->pc6/p->tsc); + fprintf(stderr, " %5.2f", 100.0 * p->pc6/p->tsc); if (do_snb_cstates) - fprintf(stderr, "%7.2f", 100.0 * p->pc7/p->tsc); + fprintf(stderr, " %5.2f", 100.0 * p->pc7/p->tsc); if (extra_msr_offset) fprintf(stderr, " 0x%016llx", p->extra_msr); putc('\n', stderr); diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index 2618ef2ba31f..33c5c7ee148f 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -137,7 +137,6 @@ void cmdline(int argc, char **argv) void validate_cpuid(void) { unsigned int eax, ebx, ecx, edx, max_level; - char brand[16]; unsigned int fms, family, model, stepping; eax = ebx = ecx = edx = 0; @@ -160,8 +159,8 @@ void validate_cpuid(void) model += ((fms >> 16) & 0xf) << 4; if (verbose > 1) - printf("CPUID %s %d levels family:model:stepping " - "0x%x:%x:%x (%d:%d:%d)\n", brand, max_level, + printf("CPUID %d levels family:model:stepping " + "0x%x:%x:%x (%d:%d:%d)\n", max_level, family, model, stepping, family, model, stepping); if (!(edx & (1 << 5))) { |