summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-07-14 22:15:14 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-14 22:15:14 +0200
commitdddec01eb8e2b56267b37a6f9f0997a64b4e0b2a (patch)
treeb6d8bfbce9abd105384b9d116499afbe306b9c22
parentStart using the new '%pS' infrastructure to print symbols (diff)
parentsplice: fix generic_file_splice_read() race with page invalidation (diff)
downloadlinux-dddec01eb8e2b56267b37a6f9f0997a64b4e0b2a.tar.xz
linux-dddec01eb8e2b56267b37a6f9f0997a64b4e0b2a.zip
Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
* 'for-linus' of git://git.kernel.dk/linux-2.6-block: (37 commits) splice: fix generic_file_splice_read() race with page invalidation ramfs: enable splice write drivers/block/pktcdvd.c: avoid useless memset cdrom: revert commit 22a9189 (cdrom: use kmalloced buffers instead of buffers on stack) scsi: sr avoids useless buffer allocation block: blk_rq_map_kern uses the bounce buffers for stack buffers block: add blk_queue_update_dma_pad DAC960: push down BKL pktcdvd: push BKL down into driver paride: push ioctl down into driver block: use get_unaligned_* helpers block: extend queue_flag bitops block: request_module(): use format string Add bvec_merge_data to handle stacked devices and ->merge_bvec() block: integrity flags can't use bit ops on unsigned short cmdfilter: extend default read filter sg: fix odd style (extra parenthesis) introduced by cmd filter patch block: add bounce support to blk_rq_map_user_iov cfq-iosched: get rid of enable_idle being unused warning allow userspace to modify scsi command filter on per device basis ...
-rw-r--r--Documentation/ABI/testing/sysfs-block34
-rw-r--r--Documentation/block/data-integrity.txt327
-rw-r--r--block/Kconfig12
-rw-r--r--block/Makefile4
-rw-r--r--block/as-iosched.c18
-rw-r--r--block/blk-core.c19
-rw-r--r--block/blk-integrity.c381
-rw-r--r--block/blk-map.c6
-rw-r--r--block/blk-merge.c3
-rw-r--r--block/blk-settings.c24
-rw-r--r--block/blk.h8
-rw-r--r--block/blktrace.c45
-rw-r--r--block/bsg.c38
-rw-r--r--block/cfq-iosched.c83
-rw-r--r--block/cmd-filter.c334
-rw-r--r--block/elevator.c8
-rw-r--r--block/genhd.c12
-rw-r--r--block/scsi_ioctl.c121
-rw-r--r--drivers/ata/libata-scsi.c3
-rw-r--r--drivers/block/DAC960.c157
-rw-r--r--drivers/block/aoe/aoecmd.c2
-rw-r--r--drivers/block/paride/pt.c20
-rw-r--r--drivers/block/pktcdvd.c46
-rw-r--r--drivers/block/xen-blkfront.c48
-rw-r--r--drivers/cdrom/cdrom.c274
-rw-r--r--drivers/md/linear.c10
-rw-r--r--drivers/md/raid0.c10
-rw-r--r--drivers/md/raid10.c15
-rw-r--r--drivers/md/raid5.c10
-rw-r--r--drivers/net/xen-netfront.c4
-rw-r--r--drivers/scsi/sg.c44
-rw-r--r--drivers/scsi/sr.c20
-rw-r--r--drivers/xen/xenbus/xenbus_client.c2
-rw-r--r--drivers/xen/xenbus/xenbus_xs.c10
-rw-r--r--fs/Makefile1
-rw-r--r--fs/bio-integrity.c719
-rw-r--r--fs/bio.c88
-rw-r--r--fs/ramfs/file-mmu.c1
-rw-r--r--fs/ramfs/file-nommu.c1
-rw-r--r--fs/splice.c17
-rw-r--r--include/linux/bio.h130
-rw-r--r--include/linux/blkdev.h163
-rw-r--r--include/linux/blktrace_api.h1
-rw-r--r--include/linux/genhd.h12
-rw-r--r--include/linux/iocontext.h18
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c1
47 files changed, 2741 insertions, 564 deletions
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index 4bd9ea539129..44f52a4f5903 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -26,3 +26,37 @@ Description:
I/O statistics of partition <part>. The format is the
same as the above-written /sys/block/<disk>/stat
format.
+
+
+What: /sys/block/<disk>/integrity/format
+Date: June 2008
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Metadata format for integrity capable block device.
+ E.g. T10-DIF-TYPE1-CRC.
+
+
+What: /sys/block/<disk>/integrity/read_verify
+Date: June 2008
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Indicates whether the block layer should verify the
+ integrity of read requests serviced by devices that
+ support sending integrity metadata.
+
+
+What: /sys/block/<disk>/integrity/tag_size
+Date: June 2008
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Number of bytes of integrity tag space available per
+ 512 bytes of data.
+
+
+What: /sys/block/<disk>/integrity/write_generate
+Date: June 2008
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Indicates whether the block layer should automatically
+ generate checksums for write requests bound for
+ devices that support receiving integrity metadata.
diff --git a/Documentation/block/data-integrity.txt b/Documentation/block/data-integrity.txt
new file mode 100644
index 000000000000..e9dc8d86adc7
--- /dev/null
+++ b/Documentation/block/data-integrity.txt
@@ -0,0 +1,327 @@
+----------------------------------------------------------------------
+1. INTRODUCTION
+
+Modern filesystems feature checksumming of data and metadata to
+protect against data corruption. However, the detection of the
+corruption is done at read time which could potentially be months
+after the data was written. At that point the original data that the
+application tried to write is most likely lost.
+
+The solution is to ensure that the disk is actually storing what the
+application meant it to. Recent additions to both the SCSI family
+protocols (SBC Data Integrity Field, SCC protection proposal) as well
+as SATA/T13 (External Path Protection) try to remedy this by adding
+support for appending integrity metadata to an I/O. The integrity
+metadata (or protection information in SCSI terminology) includes a
+checksum for each sector as well as an incrementing counter that
+ensures the individual sectors are written in the right order. And
+for some protection schemes also that the I/O is written to the right
+place on disk.
+
+Current storage controllers and devices implement various protective
+measures, for instance checksumming and scrubbing. But these
+technologies are working in their own isolated domains or at best
+between adjacent nodes in the I/O path. The interesting thing about
+DIF and the other integrity extensions is that the protection format
+is well defined and every node in the I/O path can verify the
+integrity of the I/O and reject it if corruption is detected. This
+allows not only corruption prevention but also isolation of the point
+of failure.
+
+----------------------------------------------------------------------
+2. THE DATA INTEGRITY EXTENSIONS
+
+As written, the protocol extensions only protect the path between
+controller and storage device. However, many controllers actually
+allow the operating system to interact with the integrity metadata
+(IMD). We have been working with several FC/SAS HBA vendors to enable
+the protection information to be transferred to and from their
+controllers.
+
+The SCSI Data Integrity Field works by appending 8 bytes of protection
+information to each sector. The data + integrity metadata is stored
+in 520 byte sectors on disk. Data + IMD are interleaved when
+transferred between the controller and target. The T13 proposal is
+similar.
+
+Because it is highly inconvenient for operating systems to deal with
+520 (and 4104) byte sectors, we approached several HBA vendors and
+encouraged them to allow separation of the data and integrity metadata
+scatter-gather lists.
+
+The controller will interleave the buffers on write and split them on
+read. This means that the Linux can DMA the data buffers to and from
+host memory without changes to the page cache.
+
+Also, the 16-bit CRC checksum mandated by both the SCSI and SATA specs
+is somewhat heavy to compute in software. Benchmarks found that
+calculating this checksum had a significant impact on system
+performance for a number of workloads. Some controllers allow a
+lighter-weight checksum to be used when interfacing with the operating
+system. Emulex, for instance, supports the TCP/IP checksum instead.
+The IP checksum received from the OS is converted to the 16-bit CRC
+when writing and vice versa. This allows the integrity metadata to be
+generated by Linux or the application at very low cost (comparable to
+software RAID5).
+
+The IP checksum is weaker than the CRC in terms of detecting bit
+errors. However, the strength is really in the separation of the data
+buffers and the integrity metadata. These two distinct buffers much
+match up for an I/O to complete.
+
+The separation of the data and integrity metadata buffers as well as
+the choice in checksums is referred to as the Data Integrity
+Extensions. As these extensions are outside the scope of the protocol
+bodies (T10, T13), Oracle and its partners are trying to standardize
+them within the Storage Networking Industry Association.
+
+----------------------------------------------------------------------
+3. KERNEL CHANGES
+
+The data integrity framework in Linux enables protection information
+to be pinned to I/Os and sent to/received from controllers that
+support it.
+
+The advantage to the integrity extensions in SCSI and SATA is that
+they enable us to protect the entire path from application to storage
+device. However, at the same time this is also the biggest
+disadvantage. It means that the protection information must be in a
+format that can be understood by the disk.
+
+Generally Linux/POSIX applications are agnostic to the intricacies of
+the storage devices they are accessing. The virtual filesystem switch
+and the block layer make things like hardware sector size and
+transport protocols completely transparent to the application.
+
+However, this level of detail is required when preparing the
+protection information to send to a disk. Consequently, the very
+concept of an end-to-end protection scheme is a layering violation.
+It is completely unreasonable for an application to be aware whether
+it is accessing a SCSI or SATA disk.
+
+The data integrity support implemented in Linux attempts to hide this
+from the application. As far as the application (and to some extent
+the kernel) is concerned, the integrity metadata is opaque information
+that's attached to the I/O.
+
+The current implementation allows the block layer to automatically
+generate the protection information for any I/O. Eventually the
+intent is to move the integrity metadata calculation to userspace for
+user data. Metadata and other I/O that originates within the kernel
+will still use the automatic generation interface.
+
+Some storage devices allow each hardware sector to be tagged with a
+16-bit value. The owner of this tag space is the owner of the block
+device. I.e. the filesystem in most cases. The filesystem can use
+this extra space to tag sectors as they see fit. Because the tag
+space is limited, the block interface allows tagging bigger chunks by
+way of interleaving. This way, 8*16 bits of information can be
+attached to a typical 4KB filesystem block.
+
+This also means that applications such as fsck and mkfs will need
+access to manipulate the tags from user space. A passthrough
+interface for this is being worked on.
+
+
+----------------------------------------------------------------------
+4. BLOCK LAYER IMPLEMENTATION DETAILS
+
+4.1 BIO
+
+The data integrity patches add a new field to struct bio when
+CONFIG_BLK_DEV_INTEGRITY is enabled. bio->bi_integrity is a pointer
+to a struct bip which contains the bio integrity payload. Essentially
+a bip is a trimmed down struct bio which holds a bio_vec containing
+the integrity metadata and the required housekeeping information (bvec
+pool, vector count, etc.)
+
+A kernel subsystem can enable data integrity protection on a bio by
+calling bio_integrity_alloc(bio). This will allocate and attach the
+bip to the bio.
+
+Individual pages containing integrity metadata can subsequently be
+attached using bio_integrity_add_page().
+
+bio_free() will automatically free the bip.
+
+
+4.2 BLOCK DEVICE
+
+Because the format of the protection data is tied to the physical
+disk, each block device has been extended with a block integrity
+profile (struct blk_integrity). This optional profile is registered
+with the block layer using blk_integrity_register().
+
+The profile contains callback functions for generating and verifying
+the protection data, as well as getting and setting application tags.
+The profile also contains a few constants to aid in completing,
+merging and splitting the integrity metadata.
+
+Layered block devices will need to pick a profile that's appropriate
+for all subdevices. blk_integrity_compare() can help with that. DM
+and MD linear, RAID0 and RAID1 are currently supported. RAID4/5/6
+will require extra work due to the application tag.
+
+
+----------------------------------------------------------------------
+5.0 BLOCK LAYER INTEGRITY API
+
+5.1 NORMAL FILESYSTEM
+
+ The normal filesystem is unaware that the underlying block device
+ is capable of sending/receiving integrity metadata. The IMD will
+ be automatically generated by the block layer at submit_bio() time
+ in case of a WRITE. A READ request will cause the I/O integrity
+ to be verified upon completion.
+
+ IMD generation and verification can be toggled using the
+
+ /sys/block/<bdev>/integrity/write_generate
+
+ and
+
+ /sys/block/<bdev>/integrity/read_verify
+
+ flags.
+
+
+5.2 INTEGRITY-AWARE FILESYSTEM
+
+ A filesystem that is integrity-aware can prepare I/Os with IMD
+ attached. It can also use the application tag space if this is
+ supported by the block device.
+
+
+ int bdev_integrity_enabled(block_device, int rw);
+
+ bdev_integrity_enabled() will return 1 if the block device
+ supports integrity metadata transfer for the data direction
+ specified in 'rw'.
+
+ bdev_integrity_enabled() honors the write_generate and
+ read_verify flags in sysfs and will respond accordingly.
+
+
+ int bio_integrity_prep(bio);
+
+ To generate IMD for WRITE and to set up buffers for READ, the
+ filesystem must call bio_integrity_prep(bio).
+
+ Prior to calling this function, the bio data direction and start
+ sector must be set, and the bio should have all data pages
+ added. It is up to the caller to ensure that the bio does not
+ change while I/O is in progress.
+
+ bio_integrity_prep() should only be called if
+ bio_integrity_enabled() returned 1.
+
+
+ int bio_integrity_tag_size(bio);
+
+ If the filesystem wants to use the application tag space it will
+ first have to find out how much storage space is available.
+ Because tag space is generally limited (usually 2 bytes per
+ sector regardless of sector size), the integrity framework
+ supports interleaving the information between the sectors in an
+ I/O.
+
+ Filesystems can call bio_integrity_tag_size(bio) to find out how
+ many bytes of storage are available for that particular bio.
+
+ Another option is bdev_get_tag_size(block_device) which will
+ return the number of available bytes per hardware sector.
+
+
+ int bio_integrity_set_tag(bio, void *tag_buf, len);
+
+ After a successful return from bio_integrity_prep(),
+ bio_integrity_set_tag() can be used to attach an opaque tag
+ buffer to a bio. Obviously this only makes sense if the I/O is
+ a WRITE.
+
+
+ int bio_integrity_get_tag(bio, void *tag_buf, len);
+
+ Similarly, at READ I/O completion time the filesystem can
+ retrieve the tag buffer using bio_integrity_get_tag().
+
+
+6.3 PASSING EXISTING INTEGRITY METADATA
+
+ Filesystems that either generate their own integrity metadata or
+ are capable of transferring IMD from user space can use the
+ following calls:
+
+
+ struct bip * bio_integrity_alloc(bio, gfp_mask, nr_pages);
+
+ Allocates the bio integrity payload and hangs it off of the bio.
+ nr_pages indicate how many pages of protection data need to be
+ stored in the integrity bio_vec list (similar to bio_alloc()).
+
+ The integrity payload will be freed at bio_free() time.
+
+
+ int bio_integrity_add_page(bio, page, len, offset);
+
+ Attaches a page containing integrity metadata to an existing
+ bio. The bio must have an existing bip,
+ i.e. bio_integrity_alloc() must have been called. For a WRITE,
+ the integrity metadata in the pages must be in a format
+ understood by the target device with the notable exception that
+ the sector numbers will be remapped as the request traverses the
+ I/O stack. This implies that the pages added using this call
+ will be modified during I/O! The first reference tag in the
+ integrity metadata must have a value of bip->bip_sector.
+
+ Pages can be added using bio_integrity_add_page() as long as
+ there is room in the bip bio_vec array (nr_pages).
+
+ Upon completion of a READ operation, the attached pages will
+ contain the integrity metadata received from the storage device.
+ It is up to the receiver to process them and verify data
+ integrity upon completion.
+
+
+6.4 REGISTERING A BLOCK DEVICE AS CAPABLE OF EXCHANGING INTEGRITY
+ METADATA
+
+ To enable integrity exchange on a block device the gendisk must be
+ registered as capable:
+
+ int blk_integrity_register(gendisk, blk_integrity);
+
+ The blk_integrity struct is a template and should contain the
+ following:
+
+ static struct blk_integrity my_profile = {
+ .name = "STANDARDSBODY-TYPE-VARIANT-CSUM",
+ .generate_fn = my_generate_fn,
+ .verify_fn = my_verify_fn,
+ .get_tag_fn = my_get_tag_fn,
+ .set_tag_fn = my_set_tag_fn,
+ .tuple_size = sizeof(struct my_tuple_size),
+ .tag_size = <tag bytes per hw sector>,
+ };
+
+ 'name' is a text string which will be visible in sysfs. This is
+ part of the userland API so chose it carefully and never change
+ it. The format is standards body-type-variant.
+ E.g. T10-DIF-TYPE1-IP or T13-EPP-0-CRC.
+
+ 'generate_fn' generates appropriate integrity metadata (for WRITE).
+
+ 'verify_fn' verifies that the data buffer matches the integrity
+ metadata.
+
+ 'tuple_size' must be set to match the size of the integrity
+ metadata per sector. I.e. 8 for DIF and EPP.
+
+ 'tag_size' must be set to identify how many bytes of tag space
+ are available per hardware sector. For DIF this is either 2 or
+ 0 depending on the value of the Control Mode Page ATO bit.
+
+ See 6.2 for a description of get_tag_fn and set_tag_fn.
+
+----------------------------------------------------------------------
+2007-12-24 Martin K. Petersen <martin.petersen@oracle.com>
diff --git a/block/Kconfig b/block/Kconfig
index 3e97f2bc446f..1ab7c15c8d7a 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -81,6 +81,18 @@ config BLK_DEV_BSG
If unsure, say N.
+config BLK_DEV_INTEGRITY
+ bool "Block layer data integrity support"
+ ---help---
+ Some storage devices allow extra information to be
+ stored/retrieved to help protect the data. The block layer
+ data integrity option provides hooks which can be used by
+ filesystems to ensure better data integrity.
+
+ Say yes here if you have a storage device that provides the
+ T10/SCSI Data Integrity Field or the T13/ATA External Path
+ Protection. If in doubt, say N.
+
endif # BLOCK
config BLOCK_COMPAT
diff --git a/block/Makefile b/block/Makefile
index 5a43c7d79594..208000b0750d 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -4,7 +4,8 @@
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
- blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o
+ blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \
+ cmd-filter.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
@@ -14,3 +15,4 @@ obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
+obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 743f33a01a07..9735acb5b4f5 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -151,6 +151,7 @@ enum arq_state {
static DEFINE_PER_CPU(unsigned long, ioc_count);
static struct completion *ioc_gone;
+static DEFINE_SPINLOCK(ioc_gone_lock);
static void as_move_to_dispatch(struct as_data *ad, struct request *rq);
static void as_antic_stop(struct as_data *ad);
@@ -164,8 +165,19 @@ static void free_as_io_context(struct as_io_context *aic)
{
kfree(aic);
elv_ioc_count_dec(ioc_count);
- if (ioc_gone && !elv_ioc_count_read(ioc_count))
- complete(ioc_gone);
+ if (ioc_gone) {
+ /*
+ * AS scheduler is exiting, grab exit lock and check
+ * the pending io context count. If it hits zero,
+ * complete ioc_gone and set it back to NULL.
+ */
+ spin_lock(&ioc_gone_lock);
+ if (ioc_gone && !elv_ioc_count_read(ioc_count)) {
+ complete(ioc_gone);
+ ioc_gone = NULL;
+ }
+ spin_unlock(&ioc_gone_lock);
+ }
}
static void as_trim(struct io_context *ioc)
@@ -1493,7 +1505,7 @@ static void __exit as_exit(void)
/* ioc_gone's update must be visible before reading ioc_count */
smp_wmb();
if (elv_ioc_count_read(ioc_count))
- wait_for_completion(ioc_gone);
+ wait_for_completion(&all_gone);
synchronize_rcu();
}
diff --git a/block/blk-core.c b/block/blk-core.c
index 1905aaba49fb..dbc7f42b5d2b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -143,6 +143,10 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
bio->bi_size -= nbytes;
bio->bi_sector += (nbytes >> 9);
+
+ if (bio_integrity(bio))
+ bio_integrity_advance(bio, nbytes);
+
if (bio->bi_size == 0)
bio_endio(bio, error);
} else {
@@ -201,8 +205,7 @@ void blk_plug_device(struct request_queue *q)
if (blk_queue_stopped(q))
return;
- if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
- __set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
+ if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
}
@@ -217,10 +220,9 @@ int blk_remove_plug(struct request_queue *q)
{
WARN_ON(!irqs_disabled());
- if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
+ if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))
return 0;
- queue_flag_clear(QUEUE_FLAG_PLUGGED, q);
del_timer(&q->unplug_timer);
return 1;
}
@@ -324,8 +326,7 @@ void blk_start_queue(struct request_queue *q)
* one level of recursion is ok and is much faster than kicking
* the unplug handling
*/
- if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
- queue_flag_set(QUEUE_FLAG_REENTER, q);
+ if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
q->request_fn(q);
queue_flag_clear(QUEUE_FLAG_REENTER, q);
} else {
@@ -390,8 +391,7 @@ void __blk_run_queue(struct request_queue *q)
* handling reinvoke the handler shortly if we already got there.
*/
if (!elv_queue_empty(q)) {
- if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
- queue_flag_set(QUEUE_FLAG_REENTER, q);
+ if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
q->request_fn(q);
queue_flag_clear(QUEUE_FLAG_REENTER, q);
} else {
@@ -1381,6 +1381,9 @@ end_io:
*/
blk_partition_remap(bio);
+ if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
+ goto end_io;
+
if (old_sector != -1)
blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
old_sector);
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
new file mode 100644
index 000000000000..3f1a8478cc38
--- /dev/null
+++ b/block/blk-integrity.c
@@ -0,0 +1,381 @@
+/*
+ * blk-integrity.c - Block layer data integrity extensions
+ *
+ * Copyright (C) 2007, 2008 Oracle Corporation
+ * Written by: Martin K. Petersen <martin.petersen@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/mempool.h>
+#include <linux/bio.h>
+#include <linux/scatterlist.h>
+
+#include "blk.h"
+
+static struct kmem_cache *integrity_cachep;
+
+/**
+ * blk_rq_count_integrity_sg - Count number of integrity scatterlist elements
+ * @rq: request with integrity metadata attached
+ *
+ * Description: Returns the number of elements required in a
+ * scatterlist corresponding to the integrity metadata in a request.
+ */
+int blk_rq_count_integrity_sg(struct request *rq)
+{
+ struct bio_vec *iv, *ivprv;
+ struct req_iterator iter;
+ unsigned int segments;
+
+ ivprv = NULL;
+ segments = 0;
+
+ rq_for_each_integrity_segment(iv, rq, iter) {
+
+ if (!ivprv || !BIOVEC_PHYS_MERGEABLE(ivprv, iv))
+ segments++;
+
+ ivprv = iv;
+ }
+
+ return segments;
+}
+EXPORT_SYMBOL(blk_rq_count_integrity_sg);
+
+/**
+ * blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist
+ * @rq: request with integrity metadata attached
+ * @sglist: target scatterlist
+ *
+ * Description: Map the integrity vectors in request into a
+ * scatterlist. The scatterlist must be big enough to hold all
+ * elements. I.e. sized using blk_rq_count_integrity_sg().
+ */
+int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist)
+{
+ struct bio_vec *iv, *ivprv;
+ struct req_iterator iter;
+ struct scatterlist *sg;
+ unsigned int segments;
+
+ ivprv = NULL;
+ sg = NULL;
+ segments = 0;
+
+ rq_for_each_integrity_segment(iv, rq, iter) {
+
+ if (ivprv) {
+ if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv))
+ goto new_segment;
+
+ sg->length += iv->bv_len;
+ } else {
+new_segment:
+ if (!sg)
+ sg = sglist;
+ else {
+ sg->page_link &= ~0x02;
+ sg = sg_next(sg);
+ }
+
+ sg_set_page(sg, iv->bv_page, iv->bv_len, iv->bv_offset);
+ segments++;
+ }
+
+ ivprv = iv;
+ }
+
+ if (sg)
+ sg_mark_end(sg);
+
+ return segments;
+}
+EXPORT_SYMBOL(blk_rq_map_integrity_sg);
+
+/**
+ * blk_integrity_compare - Compare integrity profile of two block devices
+ * @b1: Device to compare
+ * @b2: Device to compare
+ *
+ * Description: Meta-devices like DM and MD need to verify that all
+ * sub-devices use the same integrity format before advertising to
+ * upper layers that they can send/receive integrity metadata. This
+ * function can be used to check whether two block devices have
+ * compatible integrity formats.
+ */
+int blk_integrity_compare(struct block_device *bd1, struct block_device *bd2)
+{
+ struct blk_integrity *b1 = bd1->bd_disk->integrity;
+ struct blk_integrity *b2 = bd2->bd_disk->integrity;
+
+ BUG_ON(bd1->bd_disk == NULL);
+ BUG_ON(bd2->bd_disk == NULL);
+
+ if (!b1 || !b2)
+ return 0;
+
+ if (b1->sector_size != b2->sector_size) {
+ printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__,
+ bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+ b1->sector_size, b2->sector_size);
+ return -1;
+ }
+
+ if (b1->tuple_size != b2->tuple_size) {
+ printk(KERN_ERR "%s: %s/%s tuple sz %u != %u\n", __func__,
+ bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+ b1->tuple_size, b2->tuple_size);
+ return -1;
+ }
+
+ if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) {
+ printk(KERN_ERR "%s: %s/%s tag sz %u != %u\n", __func__,
+ bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+ b1->tag_size, b2->tag_size);
+ return -1;
+ }
+
+ if (strcmp(b1->name, b2->name)) {
+ printk(KERN_ERR "%s: %s/%s type %s != %s\n", __func__,
+ bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+ b1->name, b2->name);
+ return -1;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(blk_integrity_compare);
+
+struct integrity_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(struct blk_integrity *, char *);
+ ssize_t (*store)(struct blk_integrity *, const char *, size_t);
+};
+
+static ssize_t integrity_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *page)
+{
+ struct blk_integrity *bi =
+ container_of(kobj, struct blk_integrity, kobj);
+ struct integrity_sysfs_entry *entry =
+ container_of(attr, struct integrity_sysfs_entry, attr);
+
+ return entry->show(bi, page);
+}
+
+static ssize_t integrity_attr_store(struct kobject *kobj,
+ struct attribute *attr, const char *page,
+ size_t count)
+{
+ struct blk_integrity *bi =
+ container_of(kobj, struct blk_integrity, kobj);
+ struct integrity_sysfs_entry *entry =
+ container_of(attr, struct integrity_sysfs_entry, attr);
+ ssize_t ret = 0;
+
+ if (entry->store)
+ ret = entry->store(bi, page, count);
+
+ return ret;
+}
+
+static ssize_t integrity_format_show(struct blk_integrity *bi, char *page)
+{
+ if (bi != NULL && bi->name != NULL)
+ return sprintf(page, "%s\n", bi->name);
+ else
+ return sprintf(page, "none\n");
+}
+
+static ssize_t integrity_tag_size_show(struct blk_integrity *bi, char *page)
+{
+ if (bi != NULL)
+ return sprintf(page, "%u\n", bi->tag_size);
+ else
+ return sprintf(page, "0\n");
+}
+
+static ssize_t integrity_read_store(struct blk_integrity *bi,
+ const char *page, size_t count)
+{
+ char *p = (char *) page;
+ unsigned long val = simple_strtoul(p, &p, 10);
+
+ if (val)
+ bi->flags |= INTEGRITY_FLAG_READ;
+ else
+ bi->flags &= ~INTEGRITY_FLAG_READ;
+
+ return count;
+}
+
+static ssize_t integrity_read_show(struct blk_integrity *bi, char *page)
+{
+ return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_READ) != 0);
+}
+
+static ssize_t integrity_write_store(struct blk_integrity *bi,
+ const char *page, size_t count)
+{
+ char *p = (char *) page;
+ unsigned long val = simple_strtoul(p, &p, 10);
+
+ if (val)
+ bi->flags |= INTEGRITY_FLAG_WRITE;
+ else
+ bi->flags &= ~INTEGRITY_FLAG_WRITE;
+
+ return count;
+}
+
+static ssize_t integrity_write_show(struct blk_integrity *bi, char *page)
+{
+ return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_WRITE) != 0);
+}
+
+static struct integrity_sysfs_entry integrity_format_entry = {
+ .attr = { .name = "format", .mode = S_IRUGO },
+ .show = integrity_format_show,
+};
+
+static struct integrity_sysfs_entry integrity_tag_size_entry = {
+ .attr = { .name = "tag_size", .mode = S_IRUGO },
+ .show = integrity_tag_size_show,
+};
+
+static struct integrity_sysfs_entry integrity_read_entry = {
+ .attr = { .name = "read_verify", .mode = S_IRUGO | S_IWUSR },
+ .show = integrity_read_show,
+ .store = integrity_read_store,
+};
+
+static struct integrity_sysfs_entry integrity_write_entry = {
+ .attr = { .name = "write_generate", .mode = S_IRUGO | S_IWUSR },
+ .show = integrity_write_show,
+ .store = integrity_write_store,
+};
+
+static struct attribute *integrity_attrs[] = {
+ &integrity_format_entry.attr,
+ &integrity_tag_size_entry.attr,
+ &integrity_read_entry.attr,
+ &integrity_write_entry.attr,
+ NULL,
+};
+
+static struct sysfs_ops integrity_ops = {
+ .show = &integrity_attr_show,
+ .store = &integrity_attr_store,
+};
+
+static int __init blk_dev_integrity_init(void)
+{
+ integrity_cachep = kmem_cache_create("blkdev_integrity",
+ sizeof(struct blk_integrity),
+ 0, SLAB_PANIC, NULL);
+ return 0;
+}
+subsys_initcall(blk_dev_integrity_init);
+
+static void blk_integrity_release(struct kobject *kobj)
+{
+ struct blk_integrity *bi =
+ container_of(kobj, struct blk_integrity, kobj);
+
+ kmem_cache_free(integrity_cachep, bi);
+}
+
+static struct kobj_type integrity_ktype = {
+ .default_attrs = integrity_attrs,
+ .sysfs_ops = &integrity_ops,
+ .release = blk_integrity_release,
+};
+
+/**
+ * blk_integrity_register - Register a gendisk as being integrity-capable
+ * @disk: struct gendisk pointer to make integrity-aware
+ * @template: integrity profile
+ *
+ * Description: When a device needs to advertise itself as being able
+ * to send/receive integrity metadata it must use this function to
+ * register the capability with the block layer. The template is a
+ * blk_integrity struct with values appropriate for the underlying
+ * hardware. See Documentation/block/data-integrity.txt.
+ */
+int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
+{
+ struct blk_integrity *bi;
+
+ BUG_ON(disk == NULL);
+ BUG_ON(template == NULL);
+
+ if (disk->integrity == NULL) {
+ bi = kmem_cache_alloc(integrity_cachep,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!bi)
+ return -1;
+
+ if (kobject_init_and_add(&bi->kobj, &integrity_ktype,
+ &disk->dev.kobj, "%s", "integrity")) {
+ kmem_cache_free(integrity_cachep, bi);
+ return -1;
+ }
+
+ kobject_uevent(&bi->kobj, KOBJ_ADD);
+
+ bi->flags |= INTEGRITY_FLAG_READ | INTEGRITY_FLAG_WRITE;
+ bi->sector_size = disk->queue->hardsect_size;
+ disk->integrity = bi;
+ } else
+ bi = disk->integrity;
+
+ /* Use the provided profile as template */
+ bi->name = template->name;
+ bi->generate_fn = template->generate_fn;
+ bi->verify_fn = template->verify_fn;
+ bi->tuple_size = template->tuple_size;
+ bi->set_tag_fn = template->set_tag_fn;
+ bi->get_tag_fn = template->get_tag_fn;
+ bi->tag_size = template->tag_size;
+
+ return 0;
+}
+EXPORT_SYMBOL(blk_integrity_register);
+
+/**
+ * blk_integrity_unregister - Remove block integrity profile
+ * @disk: disk whose integrity profile to deallocate
+ *
+ * Description: This function frees all memory used by the block
+ * integrity profile. To be called at device teardown.
+ */
+void blk_integrity_unregister(struct gendisk *disk)
+{
+ struct blk_integrity *bi;
+
+ if (!disk || !disk->integrity)
+ return;
+
+ bi = disk->integrity;
+
+ kobject_uevent(&bi->kobj, KOBJ_REMOVE);
+ kobject_del(&bi->kobj);
+ kobject_put(&disk->dev.kobj);
+ kmem_cache_free(integrity_cachep, bi);
+}
+EXPORT_SYMBOL(blk_integrity_unregister);
diff --git a/block/blk-map.c b/block/blk-map.c
index 0b1af5a3537c..ddd96fb11a7d 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -210,6 +210,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
if (!bio_flagged(bio, BIO_USER_MAPPED))
rq->cmd_flags |= REQ_COPY_USER;
+ blk_queue_bounce(q, &bio);
bio_get(bio);
blk_rq_bio_prep(q, rq, bio);
rq->buffer = rq->data = NULL;
@@ -268,6 +269,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
int reading = rq_data_dir(rq) == READ;
int do_copy = 0;
struct bio *bio;
+ unsigned long stack_mask = ~(THREAD_SIZE - 1);
if (len > (q->max_hw_sectors << 9))
return -EINVAL;
@@ -278,6 +280,10 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
alignment = queue_dma_alignment(q) | q->dma_pad_mask;
do_copy = ((kaddr & alignment) || (len & alignment));
+ if (!((kaddr & stack_mask) ^
+ ((unsigned long)current->stack & stack_mask)))
+ do_copy = 1;
+
if (do_copy)
bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
else
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 651136aae76e..5efc9e7a68b7 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -441,6 +441,9 @@ static int attempt_merge(struct request_queue *q, struct request *req,
|| next->special)
return 0;
+ if (blk_integrity_rq(req) != blk_integrity_rq(next))
+ return 0;
+
/*
* If we are allowed to merge, then append bio list
* from next to rq and release next. merge_requests_fn
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 8dd86418f35d..dfc77012843f 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -302,11 +302,10 @@ EXPORT_SYMBOL(blk_queue_stack_limits);
* @q: the request queue for the device
* @mask: pad mask
*
- * Set pad mask. Direct IO requests are padded to the mask specified.
+ * Set dma pad mask.
*
- * Appending pad buffer to a request modifies ->data_len such that it
- * includes the pad buffer. The original requested data length can be
- * obtained using blk_rq_raw_data_len().
+ * Appending pad buffer to a request modifies the last entry of a
+ * scatter list such that it includes the pad buffer.
**/
void blk_queue_dma_pad(struct request_queue *q, unsigned int mask)
{
@@ -315,6 +314,23 @@ void blk_queue_dma_pad(struct request_queue *q, unsigned int mask)
EXPORT_SYMBOL(blk_queue_dma_pad);
/**
+ * blk_queue_update_dma_pad - update pad mask
+ * @q: the request queue for the device
+ * @mask: pad mask
+ *
+ * Update dma pad mask.
+ *
+ * Appending pad buffer to a request modifies the last entry of a
+ * scatter list such that it includes the pad buffer.
+ **/
+void blk_queue_update_dma_pad(struct request_queue *q, unsigned int mask)
+{
+ if (mask > q->dma_pad_mask)
+ q->dma_pad_mask = mask;
+}
+EXPORT_SYMBOL(blk_queue_update_dma_pad);
+
+/**
* blk_queue_dma_drain - Set up a drain buffer for excess dma.
* @q: the request queue for the device
* @dma_drain_needed: fn which returns non-zero if drain is necessary
diff --git a/block/blk.h b/block/blk.h
index 59776ab4742a..c79f30e1df52 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -51,4 +51,12 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
return q->nr_congestion_off;
}
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+
+#define rq_for_each_integrity_segment(bvl, _rq, _iter) \
+ __rq_for_each_bio(_iter.bio, _rq) \
+ bip_for_each_vec(bvl, _iter.bio->bi_integrity, _iter.i)
+
+#endif /* BLK_DEV_INTEGRITY */
+
#endif
diff --git a/block/blktrace.c b/block/blktrace.c
index 8d3a27780260..eb9651ccb241 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -244,6 +244,7 @@ err:
static void blk_trace_cleanup(struct blk_trace *bt)
{
relay_close(bt->rchan);
+ debugfs_remove(bt->msg_file);
debugfs_remove(bt->dropped_file);
blk_remove_tree(bt->dir);
free_percpu(bt->sequence);
@@ -291,6 +292,44 @@ static const struct file_operations blk_dropped_fops = {
.read = blk_dropped_read,
};
+static int blk_msg_open(struct inode *inode, struct file *filp)
+{
+ filp->private_data = inode->i_private;
+
+ return 0;
+}
+
+static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ char *msg;
+ struct blk_trace *bt;
+
+ if (count > BLK_TN_MAX_MSG)
+ return -EINVAL;
+
+ msg = kmalloc(count, GFP_KERNEL);
+ if (msg == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(msg, buffer, count)) {
+ kfree(msg);
+ return -EFAULT;
+ }
+
+ bt = filp->private_data;
+ __trace_note_message(bt, "%s", msg);
+ kfree(msg);
+
+ return count;
+}
+
+static const struct file_operations blk_msg_fops = {
+ .owner = THIS_MODULE,
+ .open = blk_msg_open,
+ .write = blk_msg_write,
+};
+
/*
* Keep track of how many times we encountered a full subbuffer, to aid
* the user space app in telling how many lost events there were.
@@ -380,6 +419,10 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
if (!bt->dropped_file)
goto err;
+ bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
+ if (!bt->msg_file)
+ goto err;
+
bt->rchan = relay_open("trace", dir, buts->buf_size,
buts->buf_nr, &blk_relay_callbacks, bt);
if (!bt->rchan)
@@ -409,6 +452,8 @@ err:
if (dir)
blk_remove_tree(dir);
if (bt) {
+ if (bt->msg_file)
+ debugfs_remove(bt->msg_file);
if (bt->dropped_file)
debugfs_remove(bt->dropped_file);
free_percpu(bt->sequence);
diff --git a/block/bsg.c b/block/bsg.c
index 54d617f7df3e..93e757d7174b 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -44,11 +44,12 @@ struct bsg_device {
char name[BUS_ID_SIZE];
int max_queue;
unsigned long flags;
+ struct blk_scsi_cmd_filter *cmd_filter;
+ mode_t *f_mode;
};
enum {
BSG_F_BLOCK = 1,
- BSG_F_WRITE_PERM = 2,
};
#define BSG_DEFAULT_CMDS 64
@@ -172,7 +173,7 @@ unlock:
}
static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
- struct sg_io_v4 *hdr, int has_write_perm)
+ struct sg_io_v4 *hdr, struct bsg_device *bd)
{
if (hdr->request_len > BLK_MAX_CDB) {
rq->cmd = kzalloc(hdr->request_len, GFP_KERNEL);
@@ -185,7 +186,8 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
return -EFAULT;
if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
- if (blk_verify_command(rq->cmd, has_write_perm))
+ if (blk_cmd_filter_verify_command(bd->cmd_filter, rq->cmd,
+ bd->f_mode))
return -EPERM;
} else if (!capable(CAP_SYS_RAWIO))
return -EPERM;
@@ -263,8 +265,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
rq = blk_get_request(q, rw, GFP_KERNEL);
if (!rq)
return ERR_PTR(-ENOMEM);
- ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM,
- &bd->flags));
+ ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd);
if (ret)
goto out;
@@ -566,12 +567,23 @@ static inline void bsg_set_block(struct bsg_device *bd, struct file *file)
set_bit(BSG_F_BLOCK, &bd->flags);
}
-static inline void bsg_set_write_perm(struct bsg_device *bd, struct file *file)
+static void bsg_set_cmd_filter(struct bsg_device *bd,
+ struct file *file)
{
- if (file->f_mode & FMODE_WRITE)
- set_bit(BSG_F_WRITE_PERM, &bd->flags);
- else
- clear_bit(BSG_F_WRITE_PERM, &bd->flags);
+ struct inode *inode;
+ struct gendisk *disk;
+
+ if (!file)
+ return;
+
+ inode = file->f_dentry->d_inode;
+ if (!inode)
+ return;
+
+ disk = inode->i_bdev->bd_disk;
+
+ bd->cmd_filter = &disk->cmd_filter;
+ bd->f_mode = &file->f_mode;
}
/*
@@ -595,6 +607,8 @@ bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
dprintk("%s: read %Zd bytes\n", bd->name, count);
bsg_set_block(bd, file);
+ bsg_set_cmd_filter(bd, file);
+
bytes_read = 0;
ret = __bsg_read(buf, count, bd, NULL, &bytes_read);
*ppos = bytes_read;
@@ -668,7 +682,7 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
dprintk("%s: write %Zd bytes\n", bd->name, count);
bsg_set_block(bd, file);
- bsg_set_write_perm(bd, file);
+ bsg_set_cmd_filter(bd, file);
bytes_written = 0;
ret = __bsg_write(bd, buf, count, &bytes_written);
@@ -772,7 +786,9 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
}
bd->queue = rq;
+
bsg_set_block(bd, file);
+ bsg_set_cmd_filter(bd, file);
atomic_set(&bd->ref_count, 1);
mutex_lock(&bsg_mutex);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index d01b411c72f0..1e2aff812ee2 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -11,6 +11,7 @@
#include <linux/elevator.h>
#include <linux/rbtree.h>
#include <linux/ioprio.h>
+#include <linux/blktrace_api.h>
/*
* tunables
@@ -41,13 +42,14 @@ static int cfq_slice_idle = HZ / 125;
#define RQ_CIC(rq) \
((struct cfq_io_context *) (rq)->elevator_private)
-#define RQ_CFQQ(rq) ((rq)->elevator_private2)
+#define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2)
static struct kmem_cache *cfq_pool;
static struct kmem_cache *cfq_ioc_pool;
static DEFINE_PER_CPU(unsigned long, ioc_count);
static struct completion *ioc_gone;
+static DEFINE_SPINLOCK(ioc_gone_lock);
#define CFQ_PRIO_LISTS IOPRIO_BE_NR
#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
@@ -155,6 +157,7 @@ struct cfq_queue {
unsigned short ioprio, org_ioprio;
unsigned short ioprio_class, org_ioprio_class;
+ pid_t pid;
};
enum cfqq_state_flags {
@@ -198,6 +201,11 @@ CFQ_CFQQ_FNS(slice_new);
CFQ_CFQQ_FNS(sync);
#undef CFQ_CFQQ_FNS
+#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
+ blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args)
+#define cfq_log(cfqd, fmt, args...) \
+ blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
+
static void cfq_dispatch_insert(struct request_queue *, struct request *);
static struct cfq_queue *cfq_get_queue(struct cfq_data *, int,
struct io_context *, gfp_t);
@@ -234,8 +242,10 @@ static inline int cfq_bio_sync(struct bio *bio)
*/
static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
{
- if (cfqd->busy_queues)
+ if (cfqd->busy_queues) {
+ cfq_log(cfqd, "schedule dispatch");
kblockd_schedule_work(&cfqd->unplug_work);
+ }
}
static int cfq_queue_empty(struct request_queue *q)
@@ -270,6 +280,7 @@ static inline void
cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
+ cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies);
}
/*
@@ -539,6 +550,7 @@ static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
*/
static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
+ cfq_log_cfqq(cfqd, cfqq, "add_to_rr");
BUG_ON(cfq_cfqq_on_rr(cfqq));
cfq_mark_cfqq_on_rr(cfqq);
cfqd->busy_queues++;
@@ -552,6 +564,7 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
*/
static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
+ cfq_log_cfqq(cfqd, cfqq, "del_from_rr");
BUG_ON(!cfq_cfqq_on_rr(cfqq));
cfq_clear_cfqq_on_rr(cfqq);
@@ -638,6 +651,8 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
struct cfq_data *cfqd = q->elevator->elevator_data;
cfqd->rq_in_driver++;
+ cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
+ cfqd->rq_in_driver);
/*
* If the depth is larger 1, it really could be queueing. But lets
@@ -657,6 +672,8 @@ static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
WARN_ON(!cfqd->rq_in_driver);
cfqd->rq_in_driver--;
+ cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d",
+ cfqd->rq_in_driver);
}
static void cfq_remove_request(struct request *rq)
@@ -746,6 +763,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
struct cfq_queue *cfqq)
{
if (cfqq) {
+ cfq_log_cfqq(cfqd, cfqq, "set_active");
cfqq->slice_end = 0;
cfq_clear_cfqq_must_alloc_slice(cfqq);
cfq_clear_cfqq_fifo_expire(cfqq);
@@ -763,6 +781,8 @@ static void
__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
int timed_out)
{
+ cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out);
+
if (cfq_cfqq_wait_request(cfqq))
del_timer(&cfqd->idle_slice_timer);
@@ -772,8 +792,10 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
/*
* store what was left of this slice, if the queue idled/timed out
*/
- if (timed_out && !cfq_cfqq_slice_new(cfqq))
+ if (timed_out && !cfq_cfqq_slice_new(cfqq)) {
cfqq->slice_resid = cfqq->slice_end - jiffies;
+ cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid);
+ }
cfq_resort_rr_list(cfqd, cfqq);
@@ -866,6 +888,12 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
return;
/*
+ * still requests with the driver, don't idle
+ */
+ if (cfqd->rq_in_driver)
+ return;
+
+ /*
* task has exited, don't wait
*/
cic = cfqd->active_cic;
@@ -892,6 +920,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));
mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
+ cfq_log(cfqd, "arm_idle: %lu", sl);
}
/*
@@ -902,6 +931,8 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
struct cfq_data *cfqd = q->elevator->elevator_data;
struct cfq_queue *cfqq = RQ_CFQQ(rq);
+ cfq_log_cfqq(cfqd, cfqq, "dispatch_insert");
+
cfq_remove_request(rq);
cfqq->dispatched++;
elv_dispatch_sort(q, rq);
@@ -931,8 +962,9 @@ static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
rq = rq_entry_fifo(cfqq->fifo.next);
if (time_before(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo]))
- return NULL;
+ rq = NULL;
+ cfq_log_cfqq(cfqd, cfqq, "fifo=%p", rq);
return rq;
}
@@ -1072,6 +1104,7 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
BUG_ON(cfqd->busy_queues);
+ cfq_log(cfqd, "forced_dispatch=%d\n", dispatched);
return dispatched;
}
@@ -1112,6 +1145,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
dispatched += __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
}
+ cfq_log(cfqd, "dispatched=%d", dispatched);
return dispatched;
}
@@ -1130,6 +1164,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
if (!atomic_dec_and_test(&cfqq->ref))
return;
+ cfq_log_cfqq(cfqd, cfqq, "put_queue");
BUG_ON(rb_first(&cfqq->sort_list));
BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
BUG_ON(cfq_cfqq_on_rr(cfqq));
@@ -1177,8 +1212,19 @@ static void cfq_cic_free_rcu(struct rcu_head *head)
kmem_cache_free(cfq_ioc_pool, cic);
elv_ioc_count_dec(ioc_count);
- if (ioc_gone && !elv_ioc_count_read(ioc_count))
- complete(ioc_gone);
+ if (ioc_gone) {
+ /*
+ * CFQ scheduler is exiting, grab exit lock and check
+ * the pending io context count. If it hits zero,
+ * complete ioc_gone and set it back to NULL
+ */
+ spin_lock(&ioc_gone_lock);
+ if (ioc_gone && !elv_ioc_count_read(ioc_count)) {
+ complete(ioc_gone);
+ ioc_gone = NULL;
+ }
+ spin_unlock(&ioc_gone_lock);
+ }
}
static void cfq_cic_free(struct cfq_io_context *cic)
@@ -1427,6 +1473,8 @@ retry:
cfq_mark_cfqq_idle_window(cfqq);
cfq_mark_cfqq_sync(cfqq);
}
+ cfqq->pid = current->pid;
+ cfq_log_cfqq(cfqd, cfqq, "alloced");
}
if (new_cfqq)
@@ -1675,7 +1723,7 @@ static void
cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
struct cfq_io_context *cic)
{
- int enable_idle;
+ int old_idle, enable_idle;
/*
* Don't idle for async or idle io prio class
@@ -1683,7 +1731,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq))
return;
- enable_idle = cfq_cfqq_idle_window(cfqq);
+ enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);
if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
(cfqd->hw_tag && CIC_SEEKY(cic)))
@@ -1695,10 +1743,13 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
enable_idle = 1;
}
- if (enable_idle)
- cfq_mark_cfqq_idle_window(cfqq);
- else
- cfq_clear_cfqq_idle_window(cfqq);
+ if (old_idle != enable_idle) {
+ cfq_log_cfqq(cfqd, cfqq, "idle=%d", enable_idle);
+ if (enable_idle)
+ cfq_mark_cfqq_idle_window(cfqq);
+ else
+ cfq_clear_cfqq_idle_window(cfqq);
+ }
}
/*
@@ -1757,6 +1808,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
*/
static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
+ cfq_log_cfqq(cfqd, cfqq, "preempt");
cfq_slice_expired(cfqd, 1);
/*
@@ -1818,6 +1870,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
struct cfq_data *cfqd = q->elevator->elevator_data;
struct cfq_queue *cfqq = RQ_CFQQ(rq);
+ cfq_log_cfqq(cfqd, cfqq, "insert_request");
cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc);
cfq_add_rq_rb(rq);
@@ -1835,6 +1888,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
unsigned long now;
now = jiffies;
+ cfq_log_cfqq(cfqd, cfqq, "complete");
WARN_ON(!cfqd->rq_in_driver);
WARN_ON(!cfqq->dispatched);
@@ -2004,6 +2058,7 @@ queue_fail:
cfq_schedule_dispatch(cfqd);
spin_unlock_irqrestore(q->queue_lock, flags);
+ cfq_log(cfqd, "set_request fail");
return 1;
}
@@ -2029,6 +2084,8 @@ static void cfq_idle_slice_timer(unsigned long data)
unsigned long flags;
int timed_out = 1;
+ cfq_log(cfqd, "idle timer fired");
+
spin_lock_irqsave(cfqd->queue->queue_lock, flags);
cfqq = cfqd->active_queue;
@@ -2317,7 +2374,7 @@ static void __exit cfq_exit(void)
* pending RCU callbacks
*/
if (elv_ioc_count_read(ioc_count))
- wait_for_completion(ioc_gone);
+ wait_for_completion(&all_gone);
cfq_slab_kill();
}
diff --git a/block/cmd-filter.c b/block/cmd-filter.c
new file mode 100644
index 000000000000..eec4404fd357
--- /dev/null
+++ b/block/cmd-filter.c
@@ -0,0 +1,334 @@
+/*
+ * Copyright 2004 Peter M. Jones <pjones@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
+ *
+ */
+
+#include <linux/list.h>
+#include <linux/genhd.h>
+#include <linux/spinlock.h>
+#include <linux/parser.h>
+#include <linux/capability.h>
+#include <linux/bitops.h>
+
+#include <scsi/scsi.h>
+#include <linux/cdrom.h>
+
+int blk_cmd_filter_verify_command(struct blk_scsi_cmd_filter *filter,
+ unsigned char *cmd, mode_t *f_mode)
+{
+ /* root can do any command. */
+ if (capable(CAP_SYS_RAWIO))
+ return 0;
+
+ /* if there's no filter set, assume we're filtering everything out */
+ if (!filter)
+ return -EPERM;
+
+ /* Anybody who can open the device can do a read-safe command */
+ if (test_bit(cmd[0], filter->read_ok))
+ return 0;
+
+ /* Write-safe commands require a writable open */
+ if (test_bit(cmd[0], filter->write_ok) && (*f_mode & FMODE_WRITE))
+ return 0;
+
+ return -EPERM;
+}
+EXPORT_SYMBOL(blk_cmd_filter_verify_command);
+
+int blk_verify_command(struct file *file, unsigned char *cmd)
+{
+ struct gendisk *disk;
+ struct inode *inode;
+
+ if (!file)
+ return -EINVAL;
+
+ inode = file->f_dentry->d_inode;
+ if (!inode)
+ return -EINVAL;
+
+ disk = inode->i_bdev->bd_disk;
+
+ return blk_cmd_filter_verify_command(&disk->cmd_filter,
+ cmd, &file->f_mode);
+}
+EXPORT_SYMBOL(blk_verify_command);
+
+/* and now, the sysfs stuff */
+static ssize_t rcf_cmds_show(struct blk_scsi_cmd_filter *filter, char *page,
+ int rw)
+{
+ char *npage = page;
+ unsigned long *okbits;
+ int i;
+
+ if (rw == READ)
+ okbits = filter->read_ok;
+ else
+ okbits = filter->write_ok;
+
+ for (i = 0; i < BLK_SCSI_MAX_CMDS; i++) {
+ if (test_bit(i, okbits)) {
+ sprintf(npage, "%02x", i);
+ npage += 2;
+ if (i < BLK_SCSI_MAX_CMDS - 1)
+ sprintf(npage++, " ");
+ }
+ }
+
+ if (npage != page)
+ npage += sprintf(npage, "\n");
+
+ return npage - page;
+}
+
+static ssize_t rcf_readcmds_show(struct blk_scsi_cmd_filter *filter, char *page)
+{
+ return rcf_cmds_show(filter, page, READ);
+}
+
+static ssize_t rcf_writecmds_show(struct blk_scsi_cmd_filter *filter,
+ char *page)
+{
+ return rcf_cmds_show(filter, page, WRITE);
+}
+
+static ssize_t rcf_cmds_store(struct blk_scsi_cmd_filter *filter,
+ const char *page, size_t count, int rw)
+{
+ ssize_t ret = 0;
+ unsigned long okbits[BLK_SCSI_CMD_PER_LONG], *target_okbits;
+ int cmd, status, len;
+ substring_t ss;
+
+ memset(&okbits, 0, sizeof(okbits));
+
+ for (len = strlen(page); len > 0; len -= 3) {
+ if (len < 2)
+ break;
+ ss.from = (char *) page + ret;
+ ss.to = (char *) page + ret + 2;
+ ret += 3;
+ status = match_hex(&ss, &cmd);
+ /* either of these cases means invalid input, so do nothing. */
+ if (status || cmd >= BLK_SCSI_MAX_CMDS)
+ return -EINVAL;
+
+ __set_bit(cmd, okbits);
+ }
+
+ if (rw == READ)
+ target_okbits = filter->read_ok;
+ else
+ target_okbits = filter->write_ok;
+
+ memmove(target_okbits, okbits, sizeof(okbits));
+ return count;
+}
+
+static ssize_t rcf_readcmds_store(struct blk_scsi_cmd_filter *filter,
+ const char *page, size_t count)
+{
+ return rcf_cmds_store(filter, page, count, READ);
+}
+
+static ssize_t rcf_writecmds_store(struct blk_scsi_cmd_filter *filter,
+ const char *page, size_t count)
+{
+ return rcf_cmds_store(filter, page, count, WRITE);
+}
+
+struct rcf_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(struct blk_scsi_cmd_filter *, char *);
+ ssize_t (*store)(struct blk_scsi_cmd_filter *, const char *, size_t);
+};
+
+static struct rcf_sysfs_entry rcf_readcmds_entry = {
+ .attr = { .name = "read_table", .mode = S_IRUGO | S_IWUSR },
+ .show = rcf_readcmds_show,
+ .store = rcf_readcmds_store,
+};
+
+static struct rcf_sysfs_entry rcf_writecmds_entry = {
+ .attr = {.name = "write_table", .mode = S_IRUGO | S_IWUSR },
+ .show = rcf_writecmds_show,
+ .store = rcf_writecmds_store,
+};
+
+static struct attribute *default_attrs[] = {
+ &rcf_readcmds_entry.attr,
+ &rcf_writecmds_entry.attr,
+ NULL,
+};
+
+#define to_rcf(atr) container_of((atr), struct rcf_sysfs_entry, attr)
+
+static ssize_t
+rcf_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+ struct rcf_sysfs_entry *entry = to_rcf(attr);
+ struct blk_scsi_cmd_filter *filter;
+
+ filter = container_of(kobj, struct blk_scsi_cmd_filter, kobj);
+ if (entry->show)
+ return entry->show(filter, page);
+
+ return 0;
+}
+
+static ssize_t
+rcf_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *page, size_t length)
+{
+ struct rcf_sysfs_entry *entry = to_rcf(attr);
+ struct blk_scsi_cmd_filter *filter;
+
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ if (!entry->store)
+ return -EINVAL;
+
+ filter = container_of(kobj, struct blk_scsi_cmd_filter, kobj);
+ return entry->store(filter, page, length);
+}
+
+static struct sysfs_ops rcf_sysfs_ops = {
+ .show = rcf_attr_show,
+ .store = rcf_attr_store,
+};
+
+static struct kobj_type rcf_ktype = {
+ .sysfs_ops = &rcf_sysfs_ops,
+ .default_attrs = default_attrs,
+};
+
+#ifndef MAINTENANCE_IN_CMD
+#define MAINTENANCE_IN_CMD 0xa3
+#endif
+
+static void rcf_set_defaults(struct blk_scsi_cmd_filter *filter)
+{
+ /* Basic read-only commands */
+ __set_bit(TEST_UNIT_READY, filter->read_ok);
+ __set_bit(REQUEST_SENSE, filter->read_ok);
+ __set_bit(READ_6, filter->read_ok);
+ __set_bit(READ_10, filter->read_ok);
+ __set_bit(READ_12, filter->read_ok);
+ __set_bit(READ_16, filter->read_ok);
+ __set_bit(READ_BUFFER, filter->read_ok);
+ __set_bit(READ_DEFECT_DATA, filter->read_ok);
+ __set_bit(READ_CAPACITY, filter->read_ok);
+ __set_bit(READ_LONG, filter->read_ok);
+ __set_bit(INQUIRY, filter->read_ok);
+ __set_bit(MODE_SENSE, filter->read_ok);
+ __set_bit(MODE_SENSE_10, filter->read_ok);
+ __set_bit(LOG_SENSE, filter->read_ok);
+ __set_bit(START_STOP, filter->read_ok);
+ __set_bit(GPCMD_VERIFY_10, filter->read_ok);
+ __set_bit(VERIFY_16, filter->read_ok);
+ __set_bit(REPORT_LUNS, filter->read_ok);
+ __set_bit(SERVICE_ACTION_IN, filter->read_ok);
+ __set_bit(RECEIVE_DIAGNOSTIC, filter->read_ok);
+ __set_bit(MAINTENANCE_IN_CMD, filter->read_ok);
+ __set_bit(GPCMD_READ_BUFFER_CAPACITY, filter->read_ok);
+
+ /* Audio CD commands */
+ __set_bit(GPCMD_PLAY_CD, filter->read_ok);
+ __set_bit(GPCMD_PLAY_AUDIO_10, filter->read_ok);
+ __set_bit(GPCMD_PLAY_AUDIO_MSF, filter->read_ok);
+ __set_bit(GPCMD_PLAY_AUDIO_TI, filter->read_ok);
+ __set_bit(GPCMD_PAUSE_RESUME, filter->read_ok);
+
+ /* CD/DVD data reading */
+ __set_bit(GPCMD_READ_CD, filter->read_ok);
+ __set_bit(GPCMD_READ_CD_MSF, filter->read_ok);
+ __set_bit(GPCMD_READ_DISC_INFO, filter->read_ok);
+ __set_bit(GPCMD_READ_CDVD_CAPACITY, filter->read_ok);
+ __set_bit(GPCMD_READ_DVD_STRUCTURE, filter->read_ok);
+ __set_bit(GPCMD_READ_HEADER, filter->read_ok);
+ __set_bit(GPCMD_READ_TRACK_RZONE_INFO, filter->read_ok);
+ __set_bit(GPCMD_READ_SUBCHANNEL, filter->read_ok);
+ __set_bit(GPCMD_READ_TOC_PMA_ATIP, filter->read_ok);
+ __set_bit(GPCMD_REPORT_KEY, filter->read_ok);
+ __set_bit(GPCMD_SCAN, filter->read_ok);
+ __set_bit(GPCMD_GET_CONFIGURATION, filter->read_ok);
+ __set_bit(GPCMD_READ_FORMAT_CAPACITIES, filter->read_ok);
+ __set_bit(GPCMD_GET_EVENT_STATUS_NOTIFICATION, filter->read_ok);
+ __set_bit(GPCMD_GET_PERFORMANCE, filter->read_ok);
+ __set_bit(GPCMD_SEEK, filter->read_ok);
+ __set_bit(GPCMD_STOP_PLAY_SCAN, filter->read_ok);
+
+ /* Basic writing commands */
+ __set_bit(WRITE_6, filter->write_ok);
+ __set_bit(WRITE_10, filter->write_ok);
+ __set_bit(WRITE_VERIFY, filter->write_ok);
+ __set_bit(WRITE_12, filter->write_ok);
+ __set_bit(WRITE_VERIFY_12, filter->write_ok);
+ __set_bit(WRITE_16, filter->write_ok);
+ __set_bit(WRITE_LONG, filter->write_ok);
+ __set_bit(WRITE_LONG_2, filter->write_ok);
+ __set_bit(ERASE, filter->write_ok);
+ __set_bit(GPCMD_MODE_SELECT_10, filter->write_ok);
+ __set_bit(MODE_SELECT, filter->write_ok);
+ __set_bit(LOG_SELECT, filter->write_ok);
+ __set_bit(GPCMD_BLANK, filter->write_ok);
+ __set_bit(GPCMD_CLOSE_TRACK, filter->write_ok);
+ __set_bit(GPCMD_FLUSH_CACHE, filter->write_ok);
+ __set_bit(GPCMD_FORMAT_UNIT, filter->write_ok);
+ __set_bit(GPCMD_REPAIR_RZONE_TRACK, filter->write_ok);
+ __set_bit(GPCMD_RESERVE_RZONE_TRACK, filter->write_ok);
+ __set_bit(GPCMD_SEND_DVD_STRUCTURE, filter->write_ok);
+ __set_bit(GPCMD_SEND_EVENT, filter->write_ok);
+ __set_bit(GPCMD_SEND_KEY, filter->write_ok);
+ __set_bit(GPCMD_SEND_OPC, filter->write_ok);
+ __set_bit(GPCMD_SEND_CUE_SHEET, filter->write_ok);
+ __set_bit(GPCMD_SET_SPEED, filter->write_ok);
+ __set_bit(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL, filter->write_ok);
+ __set_bit(GPCMD_LOAD_UNLOAD, filter->write_ok);
+ __set_bit(GPCMD_SET_STREAMING, filter->write_ok);
+}
+
+int blk_register_filter(struct gendisk *disk)
+{
+ int ret;
+ struct blk_scsi_cmd_filter *filter = &disk->cmd_filter;
+ struct kobject *parent = kobject_get(disk->holder_dir->parent);
+
+ if (!parent)
+ return -ENODEV;
+
+ ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, parent,
+ "%s", "cmd_filter");
+
+ if (ret < 0)
+ return ret;
+
+ rcf_set_defaults(filter);
+ return 0;
+}
+
+void blk_unregister_filter(struct gendisk *disk)
+{
+ struct blk_scsi_cmd_filter *filter = &disk->cmd_filter;
+
+ kobject_put(&filter->kobj);
+ kobject_put(disk->holder_dir->parent);
+}
+
diff --git a/block/elevator.c b/block/elevator.c
index 902dd1344d56..ed6f8f32d27e 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -86,6 +86,12 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
return 0;
+ /*
+ * only merge integrity protected bio into ditto rq
+ */
+ if (bio_integrity(bio) != blk_integrity_rq(rq))
+ return 0;
+
if (!elv_iosched_allow_merge(rq, bio))
return 0;
@@ -144,7 +150,7 @@ static struct elevator_type *elevator_get(const char *name)
else
sprintf(elv, "%s-iosched", name);
- request_module(elv);
+ request_module("%s", elv);
spin_lock(&elv_list_lock);
e = elevator_find(name);
}
diff --git a/block/genhd.c b/block/genhd.c
index b922d4801c87..9074f384b097 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -189,6 +189,7 @@ void add_disk(struct gendisk *disk)
disk->minors, NULL, exact_match, exact_lock, disk);
register_disk(disk);
blk_register_queue(disk);
+ blk_register_filter(disk);
bdi = &disk->queue->backing_dev_info;
bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor));
@@ -200,6 +201,7 @@ EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */
void unlink_gendisk(struct gendisk *disk)
{
+ blk_unregister_filter(disk);
sysfs_remove_link(&disk->dev.kobj, "bdi");
bdi_unregister(&disk->queue->backing_dev_info);
blk_unregister_queue(disk);
@@ -400,6 +402,14 @@ static ssize_t disk_removable_show(struct device *dev,
(disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
}
+static ssize_t disk_ro_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+
+ return sprintf(buf, "%d\n", disk->policy ? 1 : 0);
+}
+
static ssize_t disk_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -472,6 +482,7 @@ static ssize_t disk_fail_store(struct device *dev,
static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
+static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
static DEVICE_ATTR(size, S_IRUGO, disk_size_show, NULL);
static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
static DEVICE_ATTR(stat, S_IRUGO, disk_stat_show, NULL);
@@ -483,6 +494,7 @@ static struct device_attribute dev_attr_fail =
static struct attribute *disk_attrs[] = {
&dev_attr_range.attr,
&dev_attr_removable.attr,
+ &dev_attr_ro.attr,
&dev_attr_size.attr,
&dev_attr_capability.attr,
&dev_attr_stat.attr,
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 78199c08ec92..c5b9bcfc0a6d 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -105,120 +105,12 @@ static int sg_emulated_host(struct request_queue *q, int __user *p)
return put_user(1, p);
}
-#define CMD_READ_SAFE 0x01
-#define CMD_WRITE_SAFE 0x02
-#define CMD_WARNED 0x04
-#define safe_for_read(cmd) [cmd] = CMD_READ_SAFE
-#define safe_for_write(cmd) [cmd] = CMD_WRITE_SAFE
-
-int blk_verify_command(unsigned char *cmd, int has_write_perm)
-{
- static unsigned char cmd_type[256] = {
-
- /* Basic read-only commands */
- safe_for_read(TEST_UNIT_READY),
- safe_for_read(REQUEST_SENSE),
- safe_for_read(READ_6),
- safe_for_read(READ_10),
- safe_for_read(READ_12),
- safe_for_read(READ_16),
- safe_for_read(READ_BUFFER),
- safe_for_read(READ_DEFECT_DATA),
- safe_for_read(READ_LONG),
- safe_for_read(INQUIRY),
- safe_for_read(MODE_SENSE),
- safe_for_read(MODE_SENSE_10),
- safe_for_read(LOG_SENSE),
- safe_for_read(START_STOP),
- safe_for_read(GPCMD_VERIFY_10),
- safe_for_read(VERIFY_16),
-
- /* Audio CD commands */
- safe_for_read(GPCMD_PLAY_CD),
- safe_for_read(GPCMD_PLAY_AUDIO_10),
- safe_for_read(GPCMD_PLAY_AUDIO_MSF),
- safe_for_read(GPCMD_PLAY_AUDIO_TI),
- safe_for_read(GPCMD_PAUSE_RESUME),
-
- /* CD/DVD data reading */
- safe_for_read(GPCMD_READ_BUFFER_CAPACITY),
- safe_for_read(GPCMD_READ_CD),
- safe_for_read(GPCMD_READ_CD_MSF),
- safe_for_read(GPCMD_READ_DISC_INFO),
- safe_for_read(GPCMD_READ_CDVD_CAPACITY),
- safe_for_read(GPCMD_READ_DVD_STRUCTURE),
- safe_for_read(GPCMD_READ_HEADER),
- safe_for_read(GPCMD_READ_TRACK_RZONE_INFO),
- safe_for_read(GPCMD_READ_SUBCHANNEL),
- safe_for_read(GPCMD_READ_TOC_PMA_ATIP),
- safe_for_read(GPCMD_REPORT_KEY),
- safe_for_read(GPCMD_SCAN),
- safe_for_read(GPCMD_GET_CONFIGURATION),
- safe_for_read(GPCMD_READ_FORMAT_CAPACITIES),
- safe_for_read(GPCMD_GET_EVENT_STATUS_NOTIFICATION),
- safe_for_read(GPCMD_GET_PERFORMANCE),
- safe_for_read(GPCMD_SEEK),
- safe_for_read(GPCMD_STOP_PLAY_SCAN),
-
- /* Basic writing commands */
- safe_for_write(WRITE_6),
- safe_for_write(WRITE_10),
- safe_for_write(WRITE_VERIFY),
- safe_for_write(WRITE_12),
- safe_for_write(WRITE_VERIFY_12),
- safe_for_write(WRITE_16),
- safe_for_write(WRITE_LONG),
- safe_for_write(WRITE_LONG_2),
- safe_for_write(ERASE),
- safe_for_write(GPCMD_MODE_SELECT_10),
- safe_for_write(MODE_SELECT),
- safe_for_write(LOG_SELECT),
- safe_for_write(GPCMD_BLANK),
- safe_for_write(GPCMD_CLOSE_TRACK),
- safe_for_write(GPCMD_FLUSH_CACHE),
- safe_for_write(GPCMD_FORMAT_UNIT),
- safe_for_write(GPCMD_REPAIR_RZONE_TRACK),
- safe_for_write(GPCMD_RESERVE_RZONE_TRACK),
- safe_for_write(GPCMD_SEND_DVD_STRUCTURE),
- safe_for_write(GPCMD_SEND_EVENT),
- safe_for_write(GPCMD_SEND_KEY),
- safe_for_write(GPCMD_SEND_OPC),
- safe_for_write(GPCMD_SEND_CUE_SHEET),
- safe_for_write(GPCMD_SET_SPEED),
- safe_for_write(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL),
- safe_for_write(GPCMD_LOAD_UNLOAD),
- safe_for_write(GPCMD_SET_STREAMING),
- };
- unsigned char type = cmd_type[cmd[0]];
-
- /* Anybody who can open the device can do a read-safe command */
- if (type & CMD_READ_SAFE)
- return 0;
-
- /* Write-safe commands just require a writable open.. */
- if ((type & CMD_WRITE_SAFE) && has_write_perm)
- return 0;
-
- /* And root can do any command.. */
- if (capable(CAP_SYS_RAWIO))
- return 0;
-
- if (!type) {
- cmd_type[cmd[0]] = CMD_WARNED;
- printk(KERN_WARNING "scsi: unknown opcode 0x%02x\n", cmd[0]);
- }
-
- /* Otherwise fail it with an "Operation not permitted" */
- return -EPERM;
-}
-EXPORT_SYMBOL_GPL(blk_verify_command);
-
static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
- struct sg_io_hdr *hdr, int has_write_perm)
+ struct sg_io_hdr *hdr, struct file *file)
{
if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len))
return -EFAULT;
- if (blk_verify_command(rq->cmd, has_write_perm))
+ if (blk_verify_command(file, rq->cmd))
return -EPERM;
/*
@@ -287,7 +179,7 @@ static int sg_io(struct file *file, struct request_queue *q,
struct gendisk *bd_disk, struct sg_io_hdr *hdr)
{
unsigned long start_time;
- int writing = 0, ret = 0, has_write_perm = 0;
+ int writing = 0, ret = 0;
struct request *rq;
char sense[SCSI_SENSE_BUFFERSIZE];
struct bio *bio;
@@ -316,10 +208,7 @@ static int sg_io(struct file *file, struct request_queue *q,
if (!rq)
return -ENOMEM;
- if (file)
- has_write_perm = file->f_mode & FMODE_WRITE;
-
- if (blk_fill_sghdr_rq(q, rq, hdr, has_write_perm)) {
+ if (blk_fill_sghdr_rq(q, rq, hdr, file)) {
blk_put_request(rq);
return -EFAULT;
}
@@ -451,7 +340,7 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q,
if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len))
goto error;
- err = blk_verify_command(rq->cmd, file->f_mode & FMODE_WRITE);
+ err = blk_verify_command(file, rq->cmd);
if (err)
goto error;
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 57a43649a461..499ccc628d81 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -885,7 +885,8 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
/* set the min alignment and padding */
blk_queue_update_dma_alignment(sdev->request_queue,
ATA_DMA_PAD_SZ - 1);
- blk_queue_dma_pad(sdev->request_queue, ATA_DMA_PAD_SZ - 1);
+ blk_queue_update_dma_pad(sdev->request_queue,
+ ATA_DMA_PAD_SZ - 1);
/* configure draining */
buf = kmalloc(ATAPI_MAX_DRAIN, q->bounce_gfp | GFP_KERNEL);
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index cd03473f3547..a002a381df92 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -6628,15 +6628,18 @@ static void DAC960_DestroyProcEntries(DAC960_Controller_T *Controller)
* DAC960_gam_ioctl is the ioctl function for performing RAID operations.
*/
-static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
- unsigned int Request, unsigned long Argument)
+static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
+ unsigned long Argument)
{
- int ErrorCode = 0;
+ long ErrorCode = 0;
if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+
+ lock_kernel();
switch (Request)
{
case DAC960_IOCTL_GET_CONTROLLER_COUNT:
- return DAC960_ControllerCount;
+ ErrorCode = DAC960_ControllerCount;
+ break;
case DAC960_IOCTL_GET_CONTROLLER_INFO:
{
DAC960_ControllerInfo_T __user *UserSpaceControllerInfo =
@@ -6644,15 +6647,20 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
DAC960_ControllerInfo_T ControllerInfo;
DAC960_Controller_T *Controller;
int ControllerNumber;
- if (UserSpaceControllerInfo == NULL) return -EINVAL;
- ErrorCode = get_user(ControllerNumber,
+ if (UserSpaceControllerInfo == NULL)
+ ErrorCode = -EINVAL;
+ else ErrorCode = get_user(ControllerNumber,
&UserSpaceControllerInfo->ControllerNumber);
- if (ErrorCode != 0) return ErrorCode;
+ if (ErrorCode != 0)
+ break;;
+ ErrorCode = -ENXIO;
if (ControllerNumber < 0 ||
- ControllerNumber > DAC960_ControllerCount - 1)
- return -ENXIO;
+ ControllerNumber > DAC960_ControllerCount - 1) {
+ break;
+ }
Controller = DAC960_Controllers[ControllerNumber];
- if (Controller == NULL) return -ENXIO;
+ if (Controller == NULL)
+ break;;
memset(&ControllerInfo, 0, sizeof(DAC960_ControllerInfo_T));
ControllerInfo.ControllerNumber = ControllerNumber;
ControllerInfo.FirmwareType = Controller->FirmwareType;
@@ -6665,8 +6673,9 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
ControllerInfo.PCI_Address = Controller->PCI_Address;
strcpy(ControllerInfo.ModelName, Controller->ModelName);
strcpy(ControllerInfo.FirmwareVersion, Controller->FirmwareVersion);
- return (copy_to_user(UserSpaceControllerInfo, &ControllerInfo,
+ ErrorCode = (copy_to_user(UserSpaceControllerInfo, &ControllerInfo,
sizeof(DAC960_ControllerInfo_T)) ? -EFAULT : 0);
+ break;
}
case DAC960_IOCTL_V1_EXECUTE_COMMAND:
{
@@ -6684,30 +6693,39 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
int ControllerNumber, DataTransferLength;
unsigned char *DataTransferBuffer = NULL;
dma_addr_t DataTransferBufferDMA;
- if (UserSpaceUserCommand == NULL) return -EINVAL;
+ if (UserSpaceUserCommand == NULL) {
+ ErrorCode = -EINVAL;
+ break;
+ }
if (copy_from_user(&UserCommand, UserSpaceUserCommand,
sizeof(DAC960_V1_UserCommand_T))) {
ErrorCode = -EFAULT;
- goto Failure1a;
+ break;
}
ControllerNumber = UserCommand.ControllerNumber;
+ ErrorCode = -ENXIO;
if (ControllerNumber < 0 ||
ControllerNumber > DAC960_ControllerCount - 1)
- return -ENXIO;
+ break;
Controller = DAC960_Controllers[ControllerNumber];
- if (Controller == NULL) return -ENXIO;
- if (Controller->FirmwareType != DAC960_V1_Controller) return -EINVAL;
+ if (Controller == NULL)
+ break;
+ ErrorCode = -EINVAL;
+ if (Controller->FirmwareType != DAC960_V1_Controller)
+ break;
CommandOpcode = UserCommand.CommandMailbox.Common.CommandOpcode;
DataTransferLength = UserCommand.DataTransferLength;
- if (CommandOpcode & 0x80) return -EINVAL;
+ if (CommandOpcode & 0x80)
+ break;
if (CommandOpcode == DAC960_V1_DCDB)
{
if (copy_from_user(&DCDB, UserCommand.DCDB,
sizeof(DAC960_V1_DCDB_T))) {
ErrorCode = -EFAULT;
- goto Failure1a;
+ break;
}
- if (DCDB.Channel >= DAC960_V1_MaxChannels) return -EINVAL;
+ if (DCDB.Channel >= DAC960_V1_MaxChannels)
+ break;
if (!((DataTransferLength == 0 &&
DCDB.Direction
== DAC960_V1_DCDB_NoDataTransfer) ||
@@ -6717,38 +6735,37 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
(DataTransferLength < 0 &&
DCDB.Direction
== DAC960_V1_DCDB_DataTransferSystemToDevice)))
- return -EINVAL;
+ break;
if (((DCDB.TransferLengthHigh4 << 16) | DCDB.TransferLength)
!= abs(DataTransferLength))
- return -EINVAL;
+ break;
DCDB_IOBUF = pci_alloc_consistent(Controller->PCIDevice,
sizeof(DAC960_V1_DCDB_T), &DCDB_IOBUFDMA);
- if (DCDB_IOBUF == NULL)
- return -ENOMEM;
+ if (DCDB_IOBUF == NULL) {
+ ErrorCode = -ENOMEM;
+ break;
+ }
}
+ ErrorCode = -ENOMEM;
if (DataTransferLength > 0)
{
DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice,
DataTransferLength, &DataTransferBufferDMA);
- if (DataTransferBuffer == NULL) {
- ErrorCode = -ENOMEM;
- goto Failure1;
- }
+ if (DataTransferBuffer == NULL)
+ break;
memset(DataTransferBuffer, 0, DataTransferLength);
}
else if (DataTransferLength < 0)
{
DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice,
-DataTransferLength, &DataTransferBufferDMA);
- if (DataTransferBuffer == NULL) {
- ErrorCode = -ENOMEM;
- goto Failure1;
- }
+ if (DataTransferBuffer == NULL)
+ break;
if (copy_from_user(DataTransferBuffer,
UserCommand.DataTransferBuffer,
-DataTransferLength)) {
ErrorCode = -EFAULT;
- goto Failure1;
+ break;
}
}
if (CommandOpcode == DAC960_V1_DCDB)
@@ -6825,8 +6842,7 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
if (DCDB_IOBUF != NULL)
pci_free_consistent(Controller->PCIDevice, sizeof(DAC960_V1_DCDB_T),
DCDB_IOBUF, DCDB_IOBUFDMA);
- Failure1a:
- return ErrorCode;
+ break;
}
case DAC960_IOCTL_V2_EXECUTE_COMMAND:
{
@@ -6844,32 +6860,43 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
dma_addr_t DataTransferBufferDMA;
unsigned char *RequestSenseBuffer = NULL;
dma_addr_t RequestSenseBufferDMA;
- if (UserSpaceUserCommand == NULL) return -EINVAL;
+
+ ErrorCode = -EINVAL;
+ if (UserSpaceUserCommand == NULL)
+ break;
if (copy_from_user(&UserCommand, UserSpaceUserCommand,
sizeof(DAC960_V2_UserCommand_T))) {
ErrorCode = -EFAULT;
- goto Failure2a;
+ break;
}
+ ErrorCode = -ENXIO;
ControllerNumber = UserCommand.ControllerNumber;
if (ControllerNumber < 0 ||
ControllerNumber > DAC960_ControllerCount - 1)
- return -ENXIO;
+ break;
Controller = DAC960_Controllers[ControllerNumber];
- if (Controller == NULL) return -ENXIO;
- if (Controller->FirmwareType != DAC960_V2_Controller) return -EINVAL;
+ if (Controller == NULL)
+ break;
+ if (Controller->FirmwareType != DAC960_V2_Controller){
+ ErrorCode = -EINVAL;
+ break;
+ }
DataTransferLength = UserCommand.DataTransferLength;
+ ErrorCode = -ENOMEM;
if (DataTransferLength > 0)
{
DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice,
DataTransferLength, &DataTransferBufferDMA);
- if (DataTransferBuffer == NULL) return -ENOMEM;
+ if (DataTransferBuffer == NULL)
+ break;
memset(DataTransferBuffer, 0, DataTransferLength);
}
else if (DataTransferLength < 0)
{
DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice,
-DataTransferLength, &DataTransferBufferDMA);
- if (DataTransferBuffer == NULL) return -ENOMEM;
+ if (DataTransferBuffer == NULL)
+ break;
if (copy_from_user(DataTransferBuffer,
UserCommand.DataTransferBuffer,
-DataTransferLength)) {
@@ -6979,8 +7006,7 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
if (RequestSenseBuffer != NULL)
pci_free_consistent(Controller->PCIDevice, RequestSenseLength,
RequestSenseBuffer, RequestSenseBufferDMA);
- Failure2a:
- return ErrorCode;
+ break;
}
case DAC960_IOCTL_V2_GET_HEALTH_STATUS:
{
@@ -6990,21 +7016,33 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
DAC960_V2_HealthStatusBuffer_T HealthStatusBuffer;
DAC960_Controller_T *Controller;
int ControllerNumber;
- if (UserSpaceGetHealthStatus == NULL) return -EINVAL;
+ if (UserSpaceGetHealthStatus == NULL) {
+ ErrorCode = -EINVAL;
+ break;
+ }
if (copy_from_user(&GetHealthStatus, UserSpaceGetHealthStatus,
- sizeof(DAC960_V2_GetHealthStatus_T)))
- return -EFAULT;
+ sizeof(DAC960_V2_GetHealthStatus_T))) {
+ ErrorCode = -EFAULT;
+ break;
+ }
+ ErrorCode = -ENXIO;
ControllerNumber = GetHealthStatus.ControllerNumber;
if (ControllerNumber < 0 ||
ControllerNumber > DAC960_ControllerCount - 1)
- return -ENXIO;
+ break;
Controller = DAC960_Controllers[ControllerNumber];
- if (Controller == NULL) return -ENXIO;
- if (Controller->FirmwareType != DAC960_V2_Controller) return -EINVAL;
+ if (Controller == NULL)
+ break;
+ if (Controller->FirmwareType != DAC960_V2_Controller) {
+ ErrorCode = -EINVAL;
+ break;
+ }
if (copy_from_user(&HealthStatusBuffer,
GetHealthStatus.HealthStatusBuffer,
- sizeof(DAC960_V2_HealthStatusBuffer_T)))
- return -EFAULT;
+ sizeof(DAC960_V2_HealthStatusBuffer_T))) {
+ ErrorCode = -EFAULT;
+ break;
+ }
while (Controller->V2.HealthStatusBuffer->StatusChangeCounter
== HealthStatusBuffer.StatusChangeCounter &&
Controller->V2.HealthStatusBuffer->NextEventSequenceNumber
@@ -7012,21 +7050,28 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
{
interruptible_sleep_on_timeout(&Controller->HealthStatusWaitQueue,
DAC960_MonitoringTimerInterval);
- if (signal_pending(current)) return -EINTR;
+ if (signal_pending(current)) {
+ ErrorCode = -EINTR;
+ break;
+ }
}
if (copy_to_user(GetHealthStatus.HealthStatusBuffer,
Controller->V2.HealthStatusBuffer,
sizeof(DAC960_V2_HealthStatusBuffer_T)))
- return -EFAULT;
- return 0;
+ ErrorCode = -EFAULT;
+ else
+ ErrorCode = 0;
}
+ default:
+ ErrorCode = -ENOTTY;
}
- return -EINVAL;
+ unlock_kernel();
+ return ErrorCode;
}
static const struct file_operations DAC960_gam_fops = {
.owner = THIS_MODULE,
- .ioctl = DAC960_gam_ioctl
+ .unlocked_ioctl = DAC960_gam_ioctl
};
static struct miscdevice DAC960_gam_dev = {
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 41f818be2f7e..2f1746295d06 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -1003,7 +1003,7 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
* Enough people have their dip switches set backwards to
* warrant a loud message for this special case.
*/
- aoemajor = be16_to_cpu(get_unaligned(&h->major));
+ aoemajor = get_unaligned_be16(&h->major);
if (aoemajor == 0xfff) {
printk(KERN_ERR "aoe: Warning: shelf address is all ones. "
"Check shelf dip switches.\n");
diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c
index 8b9549ab4a4e..27455ee1e9da 100644
--- a/drivers/block/paride/pt.c
+++ b/drivers/block/paride/pt.c
@@ -146,6 +146,7 @@ static int (*drives[4])[6] = {&drive0, &drive1, &drive2, &drive3};
#include <linux/mtio.h>
#include <linux/device.h>
#include <linux/sched.h> /* current, TASK_*, schedule_timeout() */
+#include <linux/smp_lock.h>
#include <asm/uaccess.h>
@@ -189,8 +190,7 @@ module_param_array(drive3, int, NULL, 0);
#define ATAPI_LOG_SENSE 0x4d
static int pt_open(struct inode *inode, struct file *file);
-static int pt_ioctl(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg);
+static long pt_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
static int pt_release(struct inode *inode, struct file *file);
static ssize_t pt_read(struct file *filp, char __user *buf,
size_t count, loff_t * ppos);
@@ -236,7 +236,7 @@ static const struct file_operations pt_fops = {
.owner = THIS_MODULE,
.read = pt_read,
.write = pt_write,
- .ioctl = pt_ioctl,
+ .unlocked_ioctl = pt_ioctl,
.open = pt_open,
.release = pt_release,
};
@@ -685,8 +685,7 @@ out:
return err;
}
-static int pt_ioctl(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg)
+static long pt_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct pt_unit *tape = file->private_data;
struct mtop __user *p = (void __user *)arg;
@@ -700,23 +699,26 @@ static int pt_ioctl(struct inode *inode, struct file *file,
switch (mtop.mt_op) {
case MTREW:
+ lock_kernel();
pt_rewind(tape);
+ unlock_kernel();
return 0;
case MTWEOF:
+ lock_kernel();
pt_write_fm(tape);
+ unlock_kernel();
return 0;
default:
- printk("%s: Unimplemented mt_op %d\n", tape->name,
+ /* FIXME: rate limit ?? */
+ printk(KERN_DEBUG "%s: Unimplemented mt_op %d\n", tape->name,
mtop.mt_op);
return -EINVAL;
}
default:
- printk("%s: Unimplemented ioctl 0x%x\n", tape->name, cmd);
- return -EINVAL;
-
+ return -ENOTTY;
}
}
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 3ba1df93e9e3..45bee918c46a 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -49,6 +49,7 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
+#include <linux/smp_lock.h>
#include <linux/errno.h>
#include <linux/spinlock.h>
#include <linux/file.h>
@@ -2079,7 +2080,6 @@ static noinline_for_stack int pkt_write_caching(struct pktcdvd_device *pd,
unsigned char buf[64];
int ret;
- memset(buf, 0, sizeof(buf));
init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
cgc.sense = &sense;
cgc.buflen = pd->mode_offset + 12;
@@ -2126,7 +2126,6 @@ static noinline_for_stack int pkt_get_max_speed(struct pktcdvd_device *pd,
unsigned char *cap_buf;
int ret, offset;
- memset(buf, 0, sizeof(buf));
cap_buf = &buf[sizeof(struct mode_page_header) + pd->mode_offset];
init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_UNKNOWN);
cgc.sense = &sense;
@@ -2633,11 +2632,12 @@ end_io:
-static int pkt_merge_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *bvec)
+static int pkt_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
+ struct bio_vec *bvec)
{
struct pktcdvd_device *pd = q->queuedata;
- sector_t zone = ZONE(bio->bi_sector, pd);
- int used = ((bio->bi_sector - zone) << 9) + bio->bi_size;
+ sector_t zone = ZONE(bmd->bi_sector, pd);
+ int used = ((bmd->bi_sector - zone) << 9) + bmd->bi_size;
int remaining = (pd->settings.size << 9) - used;
int remaining2;
@@ -2645,7 +2645,7 @@ static int pkt_merge_bvec(struct request_queue *q, struct bio *bio, struct bio_v
* A bio <= PAGE_SIZE must be allowed. If it crosses a packet
* boundary, pkt_make_request() will split the bio.
*/
- remaining2 = PAGE_SIZE - bio->bi_size;
+ remaining2 = PAGE_SIZE - bmd->bi_size;
remaining = max(remaining, remaining2);
BUG_ON(remaining < 0);
@@ -2796,9 +2796,14 @@ out_mem:
return ret;
}
-static int pkt_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+static long pkt_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
- struct pktcdvd_device *pd = inode->i_bdev->bd_disk->private_data;
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct pktcdvd_device *pd;
+ long ret;
+
+ lock_kernel();
+ pd = inode->i_bdev->bd_disk->private_data;
VPRINTK("pkt_ioctl: cmd %x, dev %d:%d\n", cmd, imajor(inode), iminor(inode));
@@ -2811,7 +2816,8 @@ static int pkt_ioctl(struct inode *inode, struct file *file, unsigned int cmd, u
case CDROM_LAST_WRITTEN:
case CDROM_SEND_PACKET:
case SCSI_IOCTL_SEND_COMMAND:
- return blkdev_ioctl(pd->bdev->bd_inode, file, cmd, arg);
+ ret = blkdev_ioctl(pd->bdev->bd_inode, file, cmd, arg);
+ break;
case CDROMEJECT:
/*
@@ -2820,14 +2826,15 @@ static int pkt_ioctl(struct inode *inode, struct file *file, unsigned int cmd, u
*/
if (pd->refcnt == 1)
pkt_lock_door(pd, 0);
- return blkdev_ioctl(pd->bdev->bd_inode, file, cmd, arg);
+ ret = blkdev_ioctl(pd->bdev->bd_inode, file, cmd, arg);
+ break;
default:
VPRINTK(DRIVER_NAME": Unknown ioctl for %s (%x)\n", pd->name, cmd);
- return -ENOTTY;
+ ret = -ENOTTY;
}
-
- return 0;
+ unlock_kernel();
+ return ret;
}
static int pkt_media_changed(struct gendisk *disk)
@@ -2849,7 +2856,7 @@ static struct block_device_operations pktcdvd_ops = {
.owner = THIS_MODULE,
.open = pkt_open,
.release = pkt_close,
- .ioctl = pkt_ioctl,
+ .unlocked_ioctl = pkt_ioctl,
.media_changed = pkt_media_changed,
};
@@ -3014,7 +3021,8 @@ static void pkt_get_status(struct pkt_ctrl_command *ctrl_cmd)
mutex_unlock(&ctl_mutex);
}
-static int pkt_ctl_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+static long pkt_ctl_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
{
void __user *argp = (void __user *)arg;
struct pkt_ctrl_command ctrl_cmd;
@@ -3031,16 +3039,22 @@ static int pkt_ctl_ioctl(struct inode *inode, struct file *file, unsigned int cm
case PKT_CTRL_CMD_SETUP:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ lock_kernel();
ret = pkt_setup_dev(new_decode_dev(ctrl_cmd.dev), &pkt_dev);
ctrl_cmd.pkt_dev = new_encode_dev(pkt_dev);
+ unlock_kernel();
break;
case PKT_CTRL_CMD_TEARDOWN:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ lock_kernel();
ret = pkt_remove_dev(new_decode_dev(ctrl_cmd.pkt_dev));
+ unlock_kernel();
break;
case PKT_CTRL_CMD_STATUS:
+ lock_kernel();
pkt_get_status(&ctrl_cmd);
+ unlock_kernel();
break;
default:
return -ENOTTY;
@@ -3053,7 +3067,7 @@ static int pkt_ctl_ioctl(struct inode *inode, struct file *file, unsigned int cm
static const struct file_operations pkt_ctl_fops = {
- .ioctl = pkt_ctl_ioctl,
+ .unlocked_ioctl = pkt_ctl_ioctl,
.owner = THIS_MODULE,
};
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index f2fff5799ddf..9ae05c584234 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -38,6 +38,7 @@
#include <linux/interrupt.h>
#include <linux/blkdev.h>
#include <linux/hdreg.h>
+#include <linux/cdrom.h>
#include <linux/module.h>
#include <xen/xenbus.h>
@@ -153,6 +154,40 @@ static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
return 0;
}
+int blkif_ioctl(struct inode *inode, struct file *filep,
+ unsigned command, unsigned long argument)
+{
+ struct blkfront_info *info =
+ inode->i_bdev->bd_disk->private_data;
+ int i;
+
+ dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n",
+ command, (long)argument);
+
+ switch (command) {
+ case CDROMMULTISESSION:
+ dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n");
+ for (i = 0; i < sizeof(struct cdrom_multisession); i++)
+ if (put_user(0, (char __user *)(argument + i)))
+ return -EFAULT;
+ return 0;
+
+ case CDROM_GET_CAPABILITY: {
+ struct gendisk *gd = info->gd;
+ if (gd->flags & GENHD_FL_CD)
+ return 0;
+ return -EINVAL;
+ }
+
+ default:
+ /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
+ command);*/
+ return -EINVAL; /* same return as native Linux */
+ }
+
+ return 0;
+}
+
/*
* blkif_queue_request
*
@@ -324,6 +359,9 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
/* Make sure buffer addresses are sector-aligned. */
blk_queue_dma_alignment(rq, 511);
+ /* Make sure we don't use bounce buffers. */
+ blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
+
gd->queue = rq;
return 0;
@@ -546,7 +584,7 @@ static int setup_blkring(struct xenbus_device *dev,
info->ring_ref = GRANT_INVALID_REF;
- sring = (struct blkif_sring *)__get_free_page(GFP_KERNEL);
+ sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
if (!sring) {
xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
return -ENOMEM;
@@ -703,7 +741,8 @@ static int blkif_recover(struct blkfront_info *info)
int j;
/* Stage 1: Make a safe copy of the shadow state. */
- copy = kmalloc(sizeof(info->shadow), GFP_KERNEL);
+ copy = kmalloc(sizeof(info->shadow),
+ GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
if (!copy)
return -ENOMEM;
memcpy(copy, info->shadow, sizeof(info->shadow));
@@ -959,7 +998,7 @@ static int blkif_release(struct inode *inode, struct file *filep)
struct xenbus_device *dev = info->xbdev;
enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
- if (state == XenbusStateClosing)
+ if (state == XenbusStateClosing && info->is_ready)
blkfront_closing(dev);
}
return 0;
@@ -971,6 +1010,7 @@ static struct block_device_operations xlvbd_block_fops =
.open = blkif_open,
.release = blkif_release,
.getgeo = blkif_getgeo,
+ .ioctl = blkif_ioctl,
};
@@ -1006,7 +1046,7 @@ static int __init xlblk_init(void)
module_init(xlblk_init);
-static void xlblk_exit(void)
+static void __exit xlblk_exit(void)
{
return xenbus_unregister_driver(&blkfront);
}
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 69f26eb6415b..a5da35632651 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -461,37 +461,27 @@ int cdrom_get_media_event(struct cdrom_device_info *cdi,
struct media_event_desc *med)
{
struct packet_command cgc;
- unsigned char *buffer;
- struct event_header *eh;
- int ret = 1;
-
- buffer = kmalloc(8, GFP_KERNEL);
- if (!buffer)
- return -ENOMEM;
+ unsigned char buffer[8];
+ struct event_header *eh = (struct event_header *) buffer;
- eh = (struct event_header *)buffer;
-
- init_cdrom_command(&cgc, buffer, 8, CGC_DATA_READ);
+ init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_READ);
cgc.cmd[0] = GPCMD_GET_EVENT_STATUS_NOTIFICATION;
cgc.cmd[1] = 1; /* IMMED */
cgc.cmd[4] = 1 << 4; /* media event */
- cgc.cmd[8] = 8;
+ cgc.cmd[8] = sizeof(buffer);
cgc.quiet = 1;
if (cdi->ops->generic_packet(cdi, &cgc))
- goto err;
+ return 1;
if (be16_to_cpu(eh->data_len) < sizeof(*med))
- goto err;
+ return 1;
if (eh->nea || eh->notification_class != 0x4)
- goto err;
+ return 1;
- memcpy(med, buffer + sizeof(*eh), sizeof(*med));
- ret = 0;
-err:
- kfree(buffer);
- return ret;
+ memcpy(med, &buffer[sizeof(*eh)], sizeof(*med));
+ return 0;
}
/*
@@ -501,82 +491,68 @@ err:
static int cdrom_mrw_probe_pc(struct cdrom_device_info *cdi)
{
struct packet_command cgc;
- char *buffer;
- int ret = 1;
-
- buffer = kmalloc(16, GFP_KERNEL);
- if (!buffer)
- return -ENOMEM;
+ char buffer[16];
- init_cdrom_command(&cgc, buffer, 16, CGC_DATA_READ);
+ init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_READ);
cgc.timeout = HZ;
cgc.quiet = 1;
if (!cdrom_mode_sense(cdi, &cgc, MRW_MODE_PC, 0)) {
cdi->mrw_mode_page = MRW_MODE_PC;
- ret = 0;
+ return 0;
} else if (!cdrom_mode_sense(cdi, &cgc, MRW_MODE_PC_PRE1, 0)) {
cdi->mrw_mode_page = MRW_MODE_PC_PRE1;
- ret = 0;
+ return 0;
}
- kfree(buffer);
- return ret;
+
+ return 1;
}
static int cdrom_is_mrw(struct cdrom_device_info *cdi, int *write)
{
struct packet_command cgc;
struct mrw_feature_desc *mfd;
- unsigned char *buffer;
+ unsigned char buffer[16];
int ret;
*write = 0;
- buffer = kmalloc(16, GFP_KERNEL);
- if (!buffer)
- return -ENOMEM;
- init_cdrom_command(&cgc, buffer, 16, CGC_DATA_READ);
+ init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_READ);
cgc.cmd[0] = GPCMD_GET_CONFIGURATION;
cgc.cmd[3] = CDF_MRW;
- cgc.cmd[8] = 16;
+ cgc.cmd[8] = sizeof(buffer);
cgc.quiet = 1;
if ((ret = cdi->ops->generic_packet(cdi, &cgc)))
- goto err;
+ return ret;
mfd = (struct mrw_feature_desc *)&buffer[sizeof(struct feature_header)];
- if (be16_to_cpu(mfd->feature_code) != CDF_MRW) {
- ret = 1;
- goto err;
- }
+ if (be16_to_cpu(mfd->feature_code) != CDF_MRW)
+ return 1;
*write = mfd->write;
if ((ret = cdrom_mrw_probe_pc(cdi))) {
*write = 0;
+ return ret;
}
-err:
- kfree(buffer);
- return ret;
+
+ return 0;
}
static int cdrom_mrw_bgformat(struct cdrom_device_info *cdi, int cont)
{
struct packet_command cgc;
- unsigned char *buffer;
+ unsigned char buffer[12];
int ret;
printk(KERN_INFO "cdrom: %sstarting format\n", cont ? "Re" : "");
- buffer = kmalloc(12, GFP_KERNEL);
- if (!buffer)
- return -ENOMEM;
-
/*
* FmtData bit set (bit 4), format type is 1
*/
- init_cdrom_command(&cgc, buffer, 12, CGC_DATA_WRITE);
+ init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_WRITE);
cgc.cmd[0] = GPCMD_FORMAT_UNIT;
cgc.cmd[1] = (1 << 4) | 1;
@@ -603,7 +579,6 @@ static int cdrom_mrw_bgformat(struct cdrom_device_info *cdi, int cont)
if (ret)
printk(KERN_INFO "cdrom: bgformat failed\n");
- kfree(buffer);
return ret;
}
@@ -663,17 +638,16 @@ static int cdrom_mrw_set_lba_space(struct cdrom_device_info *cdi, int space)
{
struct packet_command cgc;
struct mode_page_header *mph;
- char *buffer;
+ char buffer[16];
int ret, offset, size;
- buffer = kmalloc(16, GFP_KERNEL);
- if (!buffer)
- return -ENOMEM;
+ init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_READ);
- init_cdrom_command(&cgc, buffer, 16, CGC_DATA_READ);
+ cgc.buffer = buffer;
+ cgc.buflen = sizeof(buffer);
if ((ret = cdrom_mode_sense(cdi, &cgc, cdi->mrw_mode_page, 0)))
- goto err;
+ return ret;
mph = (struct mode_page_header *) buffer;
offset = be16_to_cpu(mph->desc_length);
@@ -683,70 +657,55 @@ static int cdrom_mrw_set_lba_space(struct cdrom_device_info *cdi, int space)
cgc.buflen = size;
if ((ret = cdrom_mode_select(cdi, &cgc)))
- goto err;
+ return ret;
printk(KERN_INFO "cdrom: %s: mrw address space %s selected\n", cdi->name, mrw_address_space[space]);
- ret = 0;
-err:
- kfree(buffer);
- return ret;
+ return 0;
}
static int cdrom_get_random_writable(struct cdrom_device_info *cdi,
struct rwrt_feature_desc *rfd)
{
struct packet_command cgc;
- char *buffer;
+ char buffer[24];
int ret;
- buffer = kmalloc(24, GFP_KERNEL);
- if (!buffer)
- return -ENOMEM;
-
- init_cdrom_command(&cgc, buffer, 24, CGC_DATA_READ);
+ init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_READ);
cgc.cmd[0] = GPCMD_GET_CONFIGURATION; /* often 0x46 */
cgc.cmd[3] = CDF_RWRT; /* often 0x0020 */
- cgc.cmd[8] = 24; /* often 0x18 */
+ cgc.cmd[8] = sizeof(buffer); /* often 0x18 */
cgc.quiet = 1;
if ((ret = cdi->ops->generic_packet(cdi, &cgc)))
- goto err;
+ return ret;
memcpy(rfd, &buffer[sizeof(struct feature_header)], sizeof (*rfd));
- ret = 0;
-err:
- kfree(buffer);
- return ret;
+ return 0;
}
static int cdrom_has_defect_mgt(struct cdrom_device_info *cdi)
{
struct packet_command cgc;
- char *buffer;
+ char buffer[16];
__be16 *feature_code;
int ret;
- buffer = kmalloc(16, GFP_KERNEL);
- if (!buffer)
- return -ENOMEM;
-
- init_cdrom_command(&cgc, buffer, 16, CGC_DATA_READ);
+ init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_READ);
cgc.cmd[0] = GPCMD_GET_CONFIGURATION;
cgc.cmd[3] = CDF_HWDM;
- cgc.cmd[8] = 16;
+ cgc.cmd[8] = sizeof(buffer);
cgc.quiet = 1;
if ((ret = cdi->ops->generic_packet(cdi, &cgc)))
- goto err;
+ return ret;
feature_code = (__be16 *) &buffer[sizeof(struct feature_header)];
if (be16_to_cpu(*feature_code) == CDF_HWDM)
- ret = 0;
-err:
- kfree(buffer);
- return ret;
+ return 0;
+
+ return 1;
}
@@ -837,14 +796,10 @@ static int cdrom_mrw_open_write(struct cdrom_device_info *cdi)
static int mo_open_write(struct cdrom_device_info *cdi)
{
struct packet_command cgc;
- char *buffer;
+ char buffer[255];
int ret;
- buffer = kmalloc(255, GFP_KERNEL);
- if (!buffer)
- return -ENOMEM;
-
- init_cdrom_command(&cgc, buffer, 4, CGC_DATA_READ);
+ init_cdrom_command(&cgc, &buffer, 4, CGC_DATA_READ);
cgc.quiet = 1;
/*
@@ -861,15 +816,10 @@ static int mo_open_write(struct cdrom_device_info *cdi)
}
/* drive gave us no info, let the user go ahead */
- if (ret) {
- ret = 0;
- goto err;
- }
+ if (ret)
+ return 0;
- ret = buffer[3] & 0x80;
-err:
- kfree(buffer);
- return ret;
+ return buffer[3] & 0x80;
}
static int cdrom_ram_open_write(struct cdrom_device_info *cdi)
@@ -892,19 +842,15 @@ static int cdrom_ram_open_write(struct cdrom_device_info *cdi)
static void cdrom_mmc3_profile(struct cdrom_device_info *cdi)
{
struct packet_command cgc;
- char *buffer;
+ char buffer[32];
int ret, mmc3_profile;
- buffer = kmalloc(32, GFP_KERNEL);
- if (!buffer)
- return;
-
- init_cdrom_command(&cgc, buffer, 32, CGC_DATA_READ);
+ init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_READ);
cgc.cmd[0] = GPCMD_GET_CONFIGURATION;
cgc.cmd[1] = 0;
cgc.cmd[2] = cgc.cmd[3] = 0; /* Starting Feature Number */
- cgc.cmd[8] = 32; /* Allocation Length */
+ cgc.cmd[8] = sizeof(buffer); /* Allocation Length */
cgc.quiet = 1;
if ((ret = cdi->ops->generic_packet(cdi, &cgc)))
@@ -913,7 +859,6 @@ static void cdrom_mmc3_profile(struct cdrom_device_info *cdi)
mmc3_profile = (buffer[6] << 8) | buffer[7];
cdi->mmc3_profile = mmc3_profile;
- kfree(buffer);
}
static int cdrom_is_dvd_rw(struct cdrom_device_info *cdi)
@@ -1628,15 +1573,12 @@ static void setup_send_key(struct packet_command *cgc, unsigned agid, unsigned t
static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
{
int ret;
- u_char *buf;
+ u_char buf[20];
struct packet_command cgc;
struct cdrom_device_ops *cdo = cdi->ops;
- rpc_state_t *rpc_state;
-
- buf = kzalloc(20, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
+ rpc_state_t rpc_state;
+ memset(buf, 0, sizeof(buf));
init_cdrom_command(&cgc, buf, 0, CGC_DATA_READ);
switch (ai->type) {
@@ -1647,7 +1589,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
setup_report_key(&cgc, ai->lsa.agid, 0);
if ((ret = cdo->generic_packet(cdi, &cgc)))
- goto err;
+ return ret;
ai->lsa.agid = buf[7] >> 6;
/* Returning data, let host change state */
@@ -1658,7 +1600,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
setup_report_key(&cgc, ai->lsk.agid, 2);
if ((ret = cdo->generic_packet(cdi, &cgc)))
- goto err;
+ return ret;
copy_key(ai->lsk.key, &buf[4]);
/* Returning data, let host change state */
@@ -1669,7 +1611,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
setup_report_key(&cgc, ai->lsc.agid, 1);
if ((ret = cdo->generic_packet(cdi, &cgc)))
- goto err;
+ return ret;
copy_chal(ai->lsc.chal, &buf[4]);
/* Returning data, let host change state */
@@ -1686,7 +1628,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
cgc.cmd[2] = ai->lstk.lba >> 24;
if ((ret = cdo->generic_packet(cdi, &cgc)))
- goto err;
+ return ret;
ai->lstk.cpm = (buf[4] >> 7) & 1;
ai->lstk.cp_sec = (buf[4] >> 6) & 1;
@@ -1700,7 +1642,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
setup_report_key(&cgc, ai->lsasf.agid, 5);
if ((ret = cdo->generic_packet(cdi, &cgc)))
- goto err;
+ return ret;
ai->lsasf.asf = buf[7] & 1;
break;
@@ -1713,7 +1655,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
copy_chal(&buf[4], ai->hsc.chal);
if ((ret = cdo->generic_packet(cdi, &cgc)))
- goto err;
+ return ret;
ai->type = DVD_LU_SEND_KEY1;
break;
@@ -1726,7 +1668,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
if ((ret = cdo->generic_packet(cdi, &cgc))) {
ai->type = DVD_AUTH_FAILURE;
- goto err;
+ return ret;
}
ai->type = DVD_AUTH_ESTABLISHED;
break;
@@ -1737,23 +1679,24 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
cdinfo(CD_DVD, "entering DVD_INVALIDATE_AGID\n");
setup_report_key(&cgc, ai->lsa.agid, 0x3f);
if ((ret = cdo->generic_packet(cdi, &cgc)))
- goto err;
+ return ret;
break;
/* Get region settings */
case DVD_LU_SEND_RPC_STATE:
cdinfo(CD_DVD, "entering DVD_LU_SEND_RPC_STATE\n");
setup_report_key(&cgc, 0, 8);
+ memset(&rpc_state, 0, sizeof(rpc_state_t));
+ cgc.buffer = (char *) &rpc_state;
if ((ret = cdo->generic_packet(cdi, &cgc)))
- goto err;
+ return ret;
- rpc_state = (rpc_state_t *)buf;
- ai->lrpcs.type = rpc_state->type_code;
- ai->lrpcs.vra = rpc_state->vra;
- ai->lrpcs.ucca = rpc_state->ucca;
- ai->lrpcs.region_mask = rpc_state->region_mask;
- ai->lrpcs.rpc_scheme = rpc_state->rpc_scheme;
+ ai->lrpcs.type = rpc_state.type_code;
+ ai->lrpcs.vra = rpc_state.vra;
+ ai->lrpcs.ucca = rpc_state.ucca;
+ ai->lrpcs.region_mask = rpc_state.region_mask;
+ ai->lrpcs.rpc_scheme = rpc_state.rpc_scheme;
break;
/* Set region settings */
@@ -1764,23 +1707,20 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
buf[4] = ai->hrpcs.pdrc;
if ((ret = cdo->generic_packet(cdi, &cgc)))
- goto err;
+ return ret;
break;
default:
cdinfo(CD_WARNING, "Invalid DVD key ioctl (%d)\n", ai->type);
- ret = -ENOTTY;
- goto err;
+ return -ENOTTY;
}
- ret = 0;
-err:
- kfree(buf);
- return ret;
+
+ return 0;
}
static int dvd_read_physical(struct cdrom_device_info *cdi, dvd_struct *s)
{
- unsigned char *buf, *base;
+ unsigned char buf[21], *base;
struct dvd_layer *layer;
struct packet_command cgc;
struct cdrom_device_ops *cdo = cdi->ops;
@@ -1789,11 +1729,7 @@ static int dvd_read_physical(struct cdrom_device_info *cdi, dvd_struct *s)
if (layer_num >= DVD_LAYERS)
return -EINVAL;
- buf = kmalloc(21, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- init_cdrom_command(&cgc, buf, 21, CGC_DATA_READ);
+ init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE;
cgc.cmd[6] = layer_num;
cgc.cmd[7] = s->type;
@@ -1805,7 +1741,7 @@ static int dvd_read_physical(struct cdrom_device_info *cdi, dvd_struct *s)
cgc.quiet = 1;
if ((ret = cdo->generic_packet(cdi, &cgc)))
- goto err;
+ return ret;
base = &buf[4];
layer = &s->physical.layer[layer_num];
@@ -1829,24 +1765,17 @@ static int dvd_read_physical(struct cdrom_device_info *cdi, dvd_struct *s)
layer->end_sector_l0 = base[13] << 16 | base[14] << 8 | base[15];
layer->bca = base[16] >> 7;
- ret = 0;
-err:
- kfree(buf);
- return ret;
+ return 0;
}
static int dvd_read_copyright(struct cdrom_device_info *cdi, dvd_struct *s)
{
int ret;
- u_char *buf;
+ u_char buf[8];
struct packet_command cgc;
struct cdrom_device_ops *cdo = cdi->ops;
- buf = kmalloc(8, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- init_cdrom_command(&cgc, buf, 8, CGC_DATA_READ);
+ init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE;
cgc.cmd[6] = s->copyright.layer_num;
cgc.cmd[7] = s->type;
@@ -1854,15 +1783,12 @@ static int dvd_read_copyright(struct cdrom_device_info *cdi, dvd_struct *s)
cgc.cmd[9] = cgc.buflen & 0xff;
if ((ret = cdo->generic_packet(cdi, &cgc)))
- goto err;
+ return ret;
s->copyright.cpst = buf[4];
s->copyright.rmi = buf[5];
- ret = 0;
-err:
- kfree(buf);
- return ret;
+ return 0;
}
static int dvd_read_disckey(struct cdrom_device_info *cdi, dvd_struct *s)
@@ -1894,33 +1820,26 @@ static int dvd_read_disckey(struct cdrom_device_info *cdi, dvd_struct *s)
static int dvd_read_bca(struct cdrom_device_info *cdi, dvd_struct *s)
{
int ret;
- u_char *buf;
+ u_char buf[4 + 188];
struct packet_command cgc;
struct cdrom_device_ops *cdo = cdi->ops;
- buf = kmalloc(4 + 188, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- init_cdrom_command(&cgc, buf, 4 + 188, CGC_DATA_READ);
+ init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE;
cgc.cmd[7] = s->type;
cgc.cmd[9] = cgc.buflen & 0xff;
if ((ret = cdo->generic_packet(cdi, &cgc)))
- goto err;
+ return ret;
s->bca.len = buf[0] << 8 | buf[1];
if (s->bca.len < 12 || s->bca.len > 188) {
cdinfo(CD_WARNING, "Received invalid BCA length (%d)\n", s->bca.len);
- ret = -EIO;
- goto err;
+ return -EIO;
}
memcpy(s->bca.value, &buf[4], s->bca.len);
- ret = 0;
-err:
- kfree(buf);
- return ret;
+
+ return 0;
}
static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s)
@@ -2020,13 +1939,9 @@ static int cdrom_read_subchannel(struct cdrom_device_info *cdi,
{
struct cdrom_device_ops *cdo = cdi->ops;
struct packet_command cgc;
- char *buffer;
+ char buffer[32];
int ret;
- buffer = kmalloc(32, GFP_KERNEL);
- if (!buffer)
- return -ENOMEM;
-
init_cdrom_command(&cgc, buffer, 16, CGC_DATA_READ);
cgc.cmd[0] = GPCMD_READ_SUBCHANNEL;
cgc.cmd[1] = 2; /* MSF addressing */
@@ -2035,7 +1950,7 @@ static int cdrom_read_subchannel(struct cdrom_device_info *cdi,
cgc.cmd[8] = 16;
if ((ret = cdo->generic_packet(cdi, &cgc)))
- goto err;
+ return ret;
subchnl->cdsc_audiostatus = cgc.buffer[1];
subchnl->cdsc_format = CDROM_MSF;
@@ -2050,10 +1965,7 @@ static int cdrom_read_subchannel(struct cdrom_device_info *cdi,
subchnl->cdsc_absaddr.msf.second = cgc.buffer[10];
subchnl->cdsc_absaddr.msf.frame = cgc.buffer[11];
- ret = 0;
-err:
- kfree(buffer);
- return ret;
+ return 0;
}
/*
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 10748240cb2f..6a866d7c8ae5 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -50,17 +50,19 @@ static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector)
/**
* linear_mergeable_bvec -- tell bio layer if two requests can be merged
* @q: request queue
- * @bio: the buffer head that's been built up so far
+ * @bvm: properties of new bio
* @biovec: the request that could be merged to it.
*
* Return amount of bytes we can take at this offset
*/
-static int linear_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec)
+static int linear_mergeable_bvec(struct request_queue *q,
+ struct bvec_merge_data *bvm,
+ struct bio_vec *biovec)
{
mddev_t *mddev = q->queuedata;
dev_info_t *dev0;
- unsigned long maxsectors, bio_sectors = bio->bi_size >> 9;
- sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
+ unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9;
+ sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
dev0 = which_dev(mddev, sector);
maxsectors = (dev0->size << 1) - (sector - (dev0->offset<<1));
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 914c04ddec7c..bcbb82594a19 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -241,18 +241,20 @@ static int create_strip_zones (mddev_t *mddev)
/**
* raid0_mergeable_bvec -- tell bio layer if a two requests can be merged
* @q: request queue
- * @bio: the buffer head that's been built up so far
+ * @bvm: properties of new bio
* @biovec: the request that could be merged to it.
*
* Return amount of bytes we can accept at this offset
*/
-static int raid0_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec)
+static int raid0_mergeable_bvec(struct request_queue *q,
+ struct bvec_merge_data *bvm,
+ struct bio_vec *biovec)
{
mddev_t *mddev = q->queuedata;
- sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
+ sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
int max;
unsigned int chunk_sectors = mddev->chunk_size >> 9;
- unsigned int bio_sectors = bio->bi_size >> 9;
+ unsigned int bio_sectors = bvm->bi_size >> 9;
max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
if (max < 0) max = 0; /* bio_add cannot handle a negative return */
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index a71277b640ab..22bb2b1b886d 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -439,26 +439,27 @@ static sector_t raid10_find_virt(conf_t *conf, sector_t sector, int dev)
/**
* raid10_mergeable_bvec -- tell bio layer if a two requests can be merged
* @q: request queue
- * @bio: the buffer head that's been built up so far
+ * @bvm: properties of new bio
* @biovec: the request that could be merged to it.
*
* Return amount of bytes we can accept at this offset
* If near_copies == raid_disk, there are no striping issues,
* but in that case, the function isn't called at all.
*/
-static int raid10_mergeable_bvec(struct request_queue *q, struct bio *bio,
- struct bio_vec *bio_vec)
+static int raid10_mergeable_bvec(struct request_queue *q,
+ struct bvec_merge_data *bvm,
+ struct bio_vec *biovec)
{
mddev_t *mddev = q->queuedata;
- sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
+ sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
int max;
unsigned int chunk_sectors = mddev->chunk_size >> 9;
- unsigned int bio_sectors = bio->bi_size >> 9;
+ unsigned int bio_sectors = bvm->bi_size >> 9;
max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
if (max < 0) max = 0; /* bio_add cannot handle a negative return */
- if (max <= bio_vec->bv_len && bio_sectors == 0)
- return bio_vec->bv_len;
+ if (max <= biovec->bv_len && bio_sectors == 0)
+ return biovec->bv_len;
else
return max;
}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 3b27df52456b..9ce7154845c6 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3314,15 +3314,17 @@ static int raid5_congested(void *data, int bits)
/* We want read requests to align with chunks where possible,
* but write requests don't need to.
*/
-static int raid5_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec)
+static int raid5_mergeable_bvec(struct request_queue *q,
+ struct bvec_merge_data *bvm,
+ struct bio_vec *biovec)
{
mddev_t *mddev = q->queuedata;
- sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
+ sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
int max;
unsigned int chunk_sectors = mddev->chunk_size >> 9;
- unsigned int bio_sectors = bio->bi_size >> 9;
+ unsigned int bio_sectors = bvm->bi_size >> 9;
- if (bio_data_dir(bio) == WRITE)
+ if ((bvm->bi_rw & 1) == WRITE)
return biovec->bv_len; /* always allow writes to be mergeable */
max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index d26f69b0184f..ef671d1a3bf0 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1324,7 +1324,7 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
goto fail;
}
- txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_KERNEL);
+ txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
if (!txs) {
err = -ENOMEM;
xenbus_dev_fatal(dev, err, "allocating tx ring page");
@@ -1340,7 +1340,7 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
}
info->tx_ring_ref = err;
- rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_KERNEL);
+ rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
if (!rxs) {
err = -ENOMEM;
xenbus_dev_fatal(dev, err, "allocating rx ring page");
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index ea0edd1b2e76..fe694f0ee19a 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -182,8 +182,9 @@ static int sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp,
int tablesize);
static ssize_t sg_new_read(Sg_fd * sfp, char __user *buf, size_t count,
Sg_request * srp);
-static ssize_t sg_new_write(Sg_fd * sfp, const char __user *buf, size_t count,
- int blocking, int read_only, Sg_request ** o_srp);
+static ssize_t sg_new_write(Sg_fd *sfp, struct file *file,
+ const char __user *buf, size_t count, int blocking,
+ int read_only, Sg_request **o_srp);
static int sg_common_write(Sg_fd * sfp, Sg_request * srp,
unsigned char *cmnd, int timeout, int blocking);
static int sg_u_iovec(sg_io_hdr_t * hp, int sg_num, int ind,
@@ -204,7 +205,6 @@ static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id);
static Sg_request *sg_add_request(Sg_fd * sfp);
static int sg_remove_request(Sg_fd * sfp, Sg_request * srp);
static int sg_res_in_use(Sg_fd * sfp);
-static int sg_allow_access(unsigned char opcode, char dev_type);
static int sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len);
static Sg_device *sg_get_dev(int dev);
#ifdef CONFIG_SCSI_PROC_FS
@@ -544,7 +544,7 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos)
return -EFAULT;
blocking = !(filp->f_flags & O_NONBLOCK);
if (old_hdr.reply_len < 0)
- return sg_new_write(sfp, buf, count, blocking, 0, NULL);
+ return sg_new_write(sfp, filp, buf, count, blocking, 0, NULL);
if (count < (SZ_SG_HEADER + 6))
return -EIO; /* The minimum scsi command length is 6 bytes. */
@@ -621,8 +621,9 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos)
}
static ssize_t
-sg_new_write(Sg_fd * sfp, const char __user *buf, size_t count,
- int blocking, int read_only, Sg_request ** o_srp)
+sg_new_write(Sg_fd *sfp, struct file *file, const char __user *buf,
+ size_t count, int blocking, int read_only,
+ Sg_request **o_srp)
{
int k;
Sg_request *srp;
@@ -678,8 +679,7 @@ sg_new_write(Sg_fd * sfp, const char __user *buf, size_t count,
sg_remove_request(sfp, srp);
return -EFAULT;
}
- if (read_only &&
- (!sg_allow_access(cmnd[0], sfp->parentdp->device->type))) {
+ if (read_only && !blk_verify_command(file, cmnd)) {
sg_remove_request(sfp, srp);
return -EPERM;
}
@@ -799,7 +799,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
if (!access_ok(VERIFY_WRITE, p, SZ_SG_IO_HDR))
return -EFAULT;
result =
- sg_new_write(sfp, p, SZ_SG_IO_HDR,
+ sg_new_write(sfp, filp, p, SZ_SG_IO_HDR,
blocking, read_only, &srp);
if (result < 0)
return result;
@@ -1048,7 +1048,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
if (copy_from_user(&opcode, siocp->data, 1))
return -EFAULT;
- if (!sg_allow_access(opcode, sdp->device->type))
+ if (!blk_verify_command(filp, &opcode))
return -EPERM;
}
return sg_scsi_ioctl(filp, sdp->device->request_queue, NULL, p);
@@ -2502,30 +2502,6 @@ sg_page_free(struct page *page, int size)
__free_pages(page, order);
}
-#ifndef MAINTENANCE_IN_CMD
-#define MAINTENANCE_IN_CMD 0xa3
-#endif
-
-static unsigned char allow_ops[] = { TEST_UNIT_READY, REQUEST_SENSE,
- INQUIRY, READ_CAPACITY, READ_BUFFER, READ_6, READ_10, READ_12,
- READ_16, MODE_SENSE, MODE_SENSE_10, LOG_SENSE, REPORT_LUNS,
- SERVICE_ACTION_IN, RECEIVE_DIAGNOSTIC, READ_LONG, MAINTENANCE_IN_CMD
-};
-
-static int
-sg_allow_access(unsigned char opcode, char dev_type)
-{
- int k;
-
- if (TYPE_SCANNER == dev_type) /* TYPE_ROM maybe burner */
- return 1;
- for (k = 0; k < sizeof (allow_ops); ++k) {
- if (opcode == allow_ops[k])
- return 1;
- }
- return 0;
-}
-
#ifdef CONFIG_SCSI_PROC_FS
static int
sg_idr_max_id(int id, void *p, void *data)
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index c82df8bd4d89..27f5bfd1def3 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -673,24 +673,20 @@ fail:
static void get_sectorsize(struct scsi_cd *cd)
{
unsigned char cmd[10];
- unsigned char *buffer;
+ unsigned char buffer[8];
int the_result, retries = 3;
int sector_size;
struct request_queue *queue;
- buffer = kmalloc(512, GFP_KERNEL | GFP_DMA);
- if (!buffer)
- goto Enomem;
-
do {
cmd[0] = READ_CAPACITY;
memset((void *) &cmd[1], 0, 9);
- memset(buffer, 0, 8);
+ memset(buffer, 0, sizeof(buffer));
/* Do the command and wait.. */
the_result = scsi_execute_req(cd->device, cmd, DMA_FROM_DEVICE,
- buffer, 8, NULL, SR_TIMEOUT,
- MAX_RETRIES);
+ buffer, sizeof(buffer), NULL,
+ SR_TIMEOUT, MAX_RETRIES);
retries--;
@@ -745,14 +741,8 @@ static void get_sectorsize(struct scsi_cd *cd)
queue = cd->device->request_queue;
blk_queue_hardsect_size(queue, sector_size);
-out:
- kfree(buffer);
- return;
-Enomem:
- cd->capacity = 0x1fffff;
- cd->device->sector_size = 2048; /* A guess, just in case */
- goto out;
+ return;
}
static void get_capabilities(struct scsi_cd *cd)
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 0f86b0ff7879..9678b3e98c63 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -117,7 +117,7 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev,
char *path;
va_start(ap, pathfmt);
- path = kvasprintf(GFP_KERNEL, pathfmt, ap);
+ path = kvasprintf(GFP_NOIO | __GFP_HIGH, pathfmt, ap);
va_end(ap);
if (!path) {
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index 227d53b12a5c..7f2f91c0e11d 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -283,9 +283,9 @@ static char *join(const char *dir, const char *name)
char *buffer;
if (strlen(name) == 0)
- buffer = kasprintf(GFP_KERNEL, "%s", dir);
+ buffer = kasprintf(GFP_NOIO | __GFP_HIGH, "%s", dir);
else
- buffer = kasprintf(GFP_KERNEL, "%s/%s", dir, name);
+ buffer = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/%s", dir, name);
return (!buffer) ? ERR_PTR(-ENOMEM) : buffer;
}
@@ -297,7 +297,7 @@ static char **split(char *strings, unsigned int len, unsigned int *num)
*num = count_strings(strings, len);
/* Transfer to one big alloc for easy freeing. */
- ret = kmalloc(*num * sizeof(char *) + len, GFP_KERNEL);
+ ret = kmalloc(*num * sizeof(char *) + len, GFP_NOIO | __GFP_HIGH);
if (!ret) {
kfree(strings);
return ERR_PTR(-ENOMEM);
@@ -751,7 +751,7 @@ static int process_msg(void)
}
- msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+ msg = kmalloc(sizeof(*msg), GFP_NOIO | __GFP_HIGH);
if (msg == NULL) {
err = -ENOMEM;
goto out;
@@ -763,7 +763,7 @@ static int process_msg(void)
goto out;
}
- body = kmalloc(msg->hdr.len + 1, GFP_KERNEL);
+ body = kmalloc(msg->hdr.len + 1, GFP_NOIO | __GFP_HIGH);
if (body == NULL) {
kfree(msg);
err = -ENOMEM;
diff --git a/fs/Makefile b/fs/Makefile
index 1e7a11bd4da1..277b079dec9e 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -19,6 +19,7 @@ else
obj-y += no-block.o
endif
+obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o
obj-$(CONFIG_INOTIFY) += inotify.o
obj-$(CONFIG_INOTIFY_USER) += inotify_user.o
obj-$(CONFIG_EPOLL) += eventpoll.o
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
new file mode 100644
index 000000000000..63e2ee63058d
--- /dev/null
+++ b/fs/bio-integrity.c
@@ -0,0 +1,719 @@
+/*
+ * bio-integrity.c - bio data integrity extensions
+ *
+ * Copyright (C) 2007, 2008 Oracle Corporation
+ * Written by: Martin K. Petersen <martin.petersen@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/mempool.h>
+#include <linux/bio.h>
+#include <linux/workqueue.h>
+
+static struct kmem_cache *bio_integrity_slab __read_mostly;
+static struct workqueue_struct *kintegrityd_wq;
+
+/**
+ * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio
+ * @bio: bio to attach integrity metadata to
+ * @gfp_mask: Memory allocation mask
+ * @nr_vecs: Number of integrity metadata scatter-gather elements
+ * @bs: bio_set to allocate from
+ *
+ * Description: This function prepares a bio for attaching integrity
+ * metadata. nr_vecs specifies the maximum number of pages containing
+ * integrity metadata that can be attached.
+ */
+struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
+ gfp_t gfp_mask,
+ unsigned int nr_vecs,
+ struct bio_set *bs)
+{
+ struct bio_integrity_payload *bip;
+ struct bio_vec *iv;
+ unsigned long idx;
+
+ BUG_ON(bio == NULL);
+
+ bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
+ if (unlikely(bip == NULL)) {
+ printk(KERN_ERR "%s: could not alloc bip\n", __func__);
+ return NULL;
+ }
+
+ memset(bip, 0, sizeof(*bip));
+
+ iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, bs);
+ if (unlikely(iv == NULL)) {
+ printk(KERN_ERR "%s: could not alloc bip_vec\n", __func__);
+ mempool_free(bip, bs->bio_integrity_pool);
+ return NULL;
+ }
+
+ bip->bip_pool = idx;
+ bip->bip_vec = iv;
+ bip->bip_bio = bio;
+ bio->bi_integrity = bip;
+
+ return bip;
+}
+EXPORT_SYMBOL(bio_integrity_alloc_bioset);
+
+/**
+ * bio_integrity_alloc - Allocate integrity payload and attach it to bio
+ * @bio: bio to attach integrity metadata to
+ * @gfp_mask: Memory allocation mask
+ * @nr_vecs: Number of integrity metadata scatter-gather elements
+ *
+ * Description: This function prepares a bio for attaching integrity
+ * metadata. nr_vecs specifies the maximum number of pages containing
+ * integrity metadata that can be attached.
+ */
+struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
+ gfp_t gfp_mask,
+ unsigned int nr_vecs)
+{
+ return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set);
+}
+EXPORT_SYMBOL(bio_integrity_alloc);
+
+/**
+ * bio_integrity_free - Free bio integrity payload
+ * @bio: bio containing bip to be freed
+ * @bs: bio_set this bio was allocated from
+ *
+ * Description: Used to free the integrity portion of a bio. Usually
+ * called from bio_free().
+ */
+void bio_integrity_free(struct bio *bio, struct bio_set *bs)
+{
+ struct bio_integrity_payload *bip = bio->bi_integrity;
+
+ BUG_ON(bip == NULL);
+
+ /* A cloned bio doesn't own the integrity metadata */
+ if (!bio_flagged(bio, BIO_CLONED) && bip->bip_buf != NULL)
+ kfree(bip->bip_buf);
+
+ mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]);
+ mempool_free(bip, bs->bio_integrity_pool);
+
+ bio->bi_integrity = NULL;
+}
+EXPORT_SYMBOL(bio_integrity_free);
+
+/**
+ * bio_integrity_add_page - Attach integrity metadata
+ * @bio: bio to update
+ * @page: page containing integrity metadata
+ * @len: number of bytes of integrity metadata in page
+ * @offset: start offset within page
+ *
+ * Description: Attach a page containing integrity metadata to bio.
+ */
+int bio_integrity_add_page(struct bio *bio, struct page *page,
+ unsigned int len, unsigned int offset)
+{
+ struct bio_integrity_payload *bip = bio->bi_integrity;
+ struct bio_vec *iv;
+
+ if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_pool)) {
+ printk(KERN_ERR "%s: bip_vec full\n", __func__);
+ return 0;
+ }
+
+ iv = bip_vec_idx(bip, bip->bip_vcnt);
+ BUG_ON(iv == NULL);
+ BUG_ON(iv->bv_page != NULL);
+
+ iv->bv_page = page;
+ iv->bv_len = len;
+ iv->bv_offset = offset;
+ bip->bip_vcnt++;
+
+ return len;
+}
+EXPORT_SYMBOL(bio_integrity_add_page);
+
+/**
+ * bio_integrity_enabled - Check whether integrity can be passed
+ * @bio: bio to check
+ *
+ * Description: Determines whether bio_integrity_prep() can be called
+ * on this bio or not. bio data direction and target device must be
+ * set prior to calling. The functions honors the write_generate and
+ * read_verify flags in sysfs.
+ */
+int bio_integrity_enabled(struct bio *bio)
+{
+ /* Already protected? */
+ if (bio_integrity(bio))
+ return 0;
+
+ return bdev_integrity_enabled(bio->bi_bdev, bio_data_dir(bio));
+}
+EXPORT_SYMBOL(bio_integrity_enabled);
+
+/**
+ * bio_integrity_hw_sectors - Convert 512b sectors to hardware ditto
+ * @bi: blk_integrity profile for device
+ * @sectors: Number of 512 sectors to convert
+ *
+ * Description: The block layer calculates everything in 512 byte
+ * sectors but integrity metadata is done in terms of the hardware
+ * sector size of the storage device. Convert the block layer sectors
+ * to physical sectors.
+ */
+static inline unsigned int bio_integrity_hw_sectors(struct blk_integrity *bi,
+ unsigned int sectors)
+{
+ /* At this point there are only 512b or 4096b DIF/EPP devices */
+ if (bi->sector_size == 4096)
+ return sectors >>= 3;
+
+ return sectors;
+}
+
+/**
+ * bio_integrity_tag_size - Retrieve integrity tag space
+ * @bio: bio to inspect
+ *
+ * Description: Returns the maximum number of tag bytes that can be
+ * attached to this bio. Filesystems can use this to determine how
+ * much metadata to attach to an I/O.
+ */
+unsigned int bio_integrity_tag_size(struct bio *bio)
+{
+ struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+
+ BUG_ON(bio->bi_size == 0);
+
+ return bi->tag_size * (bio->bi_size / bi->sector_size);
+}
+EXPORT_SYMBOL(bio_integrity_tag_size);
+
+int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len, int set)
+{
+ struct bio_integrity_payload *bip = bio->bi_integrity;
+ struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+ unsigned int nr_sectors;
+
+ BUG_ON(bip->bip_buf == NULL);
+
+ if (bi->tag_size == 0)
+ return -1;
+
+ nr_sectors = bio_integrity_hw_sectors(bi,
+ DIV_ROUND_UP(len, bi->tag_size));
+
+ if (nr_sectors * bi->tuple_size > bip->bip_size) {
+ printk(KERN_ERR "%s: tag too big for bio: %u > %u\n",
+ __func__, nr_sectors * bi->tuple_size, bip->bip_size);
+ return -1;
+ }
+
+ if (set)
+ bi->set_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
+ else
+ bi->get_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
+
+ return 0;
+}
+
+/**
+ * bio_integrity_set_tag - Attach a tag buffer to a bio
+ * @bio: bio to attach buffer to
+ * @tag_buf: Pointer to a buffer containing tag data
+ * @len: Length of the included buffer
+ *
+ * Description: Use this function to tag a bio by leveraging the extra
+ * space provided by devices formatted with integrity protection. The
+ * size of the integrity buffer must be <= to the size reported by
+ * bio_integrity_tag_size().
+ */
+int bio_integrity_set_tag(struct bio *bio, void *tag_buf, unsigned int len)
+{
+ BUG_ON(bio_data_dir(bio) != WRITE);
+
+ return bio_integrity_tag(bio, tag_buf, len, 1);
+}
+EXPORT_SYMBOL(bio_integrity_set_tag);
+
+/**
+ * bio_integrity_get_tag - Retrieve a tag buffer from a bio
+ * @bio: bio to retrieve buffer from
+ * @tag_buf: Pointer to a buffer for the tag data
+ * @len: Length of the target buffer
+ *
+ * Description: Use this function to retrieve the tag buffer from a
+ * completed I/O. The size of the integrity buffer must be <= to the
+ * size reported by bio_integrity_tag_size().
+ */
+int bio_integrity_get_tag(struct bio *bio, void *tag_buf, unsigned int len)
+{
+ BUG_ON(bio_data_dir(bio) != READ);
+
+ return bio_integrity_tag(bio, tag_buf, len, 0);
+}
+EXPORT_SYMBOL(bio_integrity_get_tag);
+
+/**
+ * bio_integrity_generate - Generate integrity metadata for a bio
+ * @bio: bio to generate integrity metadata for
+ *
+ * Description: Generates integrity metadata for a bio by calling the
+ * block device's generation callback function. The bio must have a
+ * bip attached with enough room to accommodate the generated
+ * integrity metadata.
+ */
+static void bio_integrity_generate(struct bio *bio)
+{
+ struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+ struct blk_integrity_exchg bix;
+ struct bio_vec *bv;
+ sector_t sector = bio->bi_sector;
+ unsigned int i, sectors, total;
+ void *prot_buf = bio->bi_integrity->bip_buf;
+
+ total = 0;
+ bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
+ bix.sector_size = bi->sector_size;
+
+ bio_for_each_segment(bv, bio, i) {
+ void *kaddr = kmap_atomic(bv->bv_page, KM_USER0);
+ bix.data_buf = kaddr + bv->bv_offset;
+ bix.data_size = bv->bv_len;
+ bix.prot_buf = prot_buf;
+ bix.sector = sector;
+
+ bi->generate_fn(&bix);
+
+ sectors = bv->bv_len / bi->sector_size;
+ sector += sectors;
+ prot_buf += sectors * bi->tuple_size;
+ total += sectors * bi->tuple_size;
+ BUG_ON(total > bio->bi_integrity->bip_size);
+
+ kunmap_atomic(kaddr, KM_USER0);
+ }
+}
+
+/**
+ * bio_integrity_prep - Prepare bio for integrity I/O
+ * @bio: bio to prepare
+ *
+ * Description: Allocates a buffer for integrity metadata, maps the
+ * pages and attaches them to a bio. The bio must have data
+ * direction, target device and start sector set priot to calling. In
+ * the WRITE case, integrity metadata will be generated using the
+ * block device's integrity function. In the READ case, the buffer
+ * will be prepared for DMA and a suitable end_io handler set up.
+ */
+int bio_integrity_prep(struct bio *bio)
+{
+ struct bio_integrity_payload *bip;
+ struct blk_integrity *bi;
+ struct request_queue *q;
+ void *buf;
+ unsigned long start, end;
+ unsigned int len, nr_pages;
+ unsigned int bytes, offset, i;
+ unsigned int sectors;
+
+ bi = bdev_get_integrity(bio->bi_bdev);
+ q = bdev_get_queue(bio->bi_bdev);
+ BUG_ON(bi == NULL);
+ BUG_ON(bio_integrity(bio));
+
+ sectors = bio_integrity_hw_sectors(bi, bio_sectors(bio));
+
+ /* Allocate kernel buffer for protection data */
+ len = sectors * blk_integrity_tuple_size(bi);
+ buf = kmalloc(len, GFP_NOIO | __GFP_NOFAIL | q->bounce_gfp);
+ if (unlikely(buf == NULL)) {
+ printk(KERN_ERR "could not allocate integrity buffer\n");
+ return -EIO;
+ }
+
+ end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ start = ((unsigned long) buf) >> PAGE_SHIFT;
+ nr_pages = end - start;
+
+ /* Allocate bio integrity payload and integrity vectors */
+ bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages);
+ if (unlikely(bip == NULL)) {
+ printk(KERN_ERR "could not allocate data integrity bioset\n");
+ kfree(buf);
+ return -EIO;
+ }
+
+ bip->bip_buf = buf;
+ bip->bip_size = len;
+ bip->bip_sector = bio->bi_sector;
+
+ /* Map it */
+ offset = offset_in_page(buf);
+ for (i = 0 ; i < nr_pages ; i++) {
+ int ret;
+ bytes = PAGE_SIZE - offset;
+
+ if (len <= 0)
+ break;
+
+ if (bytes > len)
+ bytes = len;
+
+ ret = bio_integrity_add_page(bio, virt_to_page(buf),
+ bytes, offset);
+
+ if (ret == 0)
+ return 0;
+
+ if (ret < bytes)
+ break;
+
+ buf += bytes;
+ len -= bytes;
+ offset = 0;
+ }
+
+ /* Install custom I/O completion handler if read verify is enabled */
+ if (bio_data_dir(bio) == READ) {
+ bip->bip_end_io = bio->bi_end_io;
+ bio->bi_end_io = bio_integrity_endio;
+ }
+
+ /* Auto-generate integrity metadata if this is a write */
+ if (bio_data_dir(bio) == WRITE)
+ bio_integrity_generate(bio);
+
+ return 0;
+}
+EXPORT_SYMBOL(bio_integrity_prep);
+
+/**
+ * bio_integrity_verify - Verify integrity metadata for a bio
+ * @bio: bio to verify
+ *
+ * Description: This function is called to verify the integrity of a
+ * bio. The data in the bio io_vec is compared to the integrity
+ * metadata returned by the HBA.
+ */
+static int bio_integrity_verify(struct bio *bio)
+{
+ struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+ struct blk_integrity_exchg bix;
+ struct bio_vec *bv;
+ sector_t sector = bio->bi_integrity->bip_sector;
+ unsigned int i, sectors, total, ret;
+ void *prot_buf = bio->bi_integrity->bip_buf;
+
+ ret = total = 0;
+ bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
+ bix.sector_size = bi->sector_size;
+
+ bio_for_each_segment(bv, bio, i) {
+ void *kaddr = kmap_atomic(bv->bv_page, KM_USER0);
+ bix.data_buf = kaddr + bv->bv_offset;
+ bix.data_size = bv->bv_len;
+ bix.prot_buf = prot_buf;
+ bix.sector = sector;
+
+ ret = bi->verify_fn(&bix);
+
+ if (ret) {
+ kunmap_atomic(kaddr, KM_USER0);
+ break;
+ }
+
+ sectors = bv->bv_len / bi->sector_size;
+ sector += sectors;
+ prot_buf += sectors * bi->tuple_size;
+ total += sectors * bi->tuple_size;
+ BUG_ON(total > bio->bi_integrity->bip_size);
+
+ kunmap_atomic(kaddr, KM_USER0);
+ }
+
+ return ret;
+}
+
+/**
+ * bio_integrity_verify_fn - Integrity I/O completion worker
+ * @work: Work struct stored in bio to be verified
+ *
+ * Description: This workqueue function is called to complete a READ
+ * request. The function verifies the transferred integrity metadata
+ * and then calls the original bio end_io function.
+ */
+static void bio_integrity_verify_fn(struct work_struct *work)
+{
+ struct bio_integrity_payload *bip =
+ container_of(work, struct bio_integrity_payload, bip_work);
+ struct bio *bio = bip->bip_bio;
+ int error = bip->bip_error;
+
+ if (bio_integrity_verify(bio)) {
+ clear_bit(BIO_UPTODATE, &bio->bi_flags);
+ error = -EIO;
+ }
+
+ /* Restore original bio completion handler */
+ bio->bi_end_io = bip->bip_end_io;
+
+ if (bio->bi_end_io)
+ bio->bi_end_io(bio, error);
+}
+
+/**
+ * bio_integrity_endio - Integrity I/O completion function
+ * @bio: Protected bio
+ * @error: Pointer to errno
+ *
+ * Description: Completion for integrity I/O
+ *
+ * Normally I/O completion is done in interrupt context. However,
+ * verifying I/O integrity is a time-consuming task which must be run
+ * in process context. This function postpones completion
+ * accordingly.
+ */
+void bio_integrity_endio(struct bio *bio, int error)
+{
+ struct bio_integrity_payload *bip = bio->bi_integrity;
+
+ BUG_ON(bip->bip_bio != bio);
+
+ bip->bip_error = error;
+ INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
+ queue_work(kintegrityd_wq, &bip->bip_work);
+}
+EXPORT_SYMBOL(bio_integrity_endio);
+
+/**
+ * bio_integrity_mark_head - Advance bip_vec skip bytes
+ * @bip: Integrity vector to advance
+ * @skip: Number of bytes to advance it
+ */
+void bio_integrity_mark_head(struct bio_integrity_payload *bip,
+ unsigned int skip)
+{
+ struct bio_vec *iv;
+ unsigned int i;
+
+ bip_for_each_vec(iv, bip, i) {
+ if (skip == 0) {
+ bip->bip_idx = i;
+ return;
+ } else if (skip >= iv->bv_len) {
+ skip -= iv->bv_len;
+ } else { /* skip < iv->bv_len) */
+ iv->bv_offset += skip;
+ iv->bv_len -= skip;
+ bip->bip_idx = i;
+ return;
+ }
+ }
+}
+
+/**
+ * bio_integrity_mark_tail - Truncate bip_vec to be len bytes long
+ * @bip: Integrity vector to truncate
+ * @len: New length of integrity vector
+ */
+void bio_integrity_mark_tail(struct bio_integrity_payload *bip,
+ unsigned int len)
+{
+ struct bio_vec *iv;
+ unsigned int i;
+
+ bip_for_each_vec(iv, bip, i) {
+ if (len == 0) {
+ bip->bip_vcnt = i;
+ return;
+ } else if (len >= iv->bv_len) {
+ len -= iv->bv_len;
+ } else { /* len < iv->bv_len) */
+ iv->bv_len = len;
+ len = 0;
+ }
+ }
+}
+
+/**
+ * bio_integrity_advance - Advance integrity vector
+ * @bio: bio whose integrity vector to update
+ * @bytes_done: number of data bytes that have been completed
+ *
+ * Description: This function calculates how many integrity bytes the
+ * number of completed data bytes correspond to and advances the
+ * integrity vector accordingly.
+ */
+void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
+{
+ struct bio_integrity_payload *bip = bio->bi_integrity;
+ struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+ unsigned int nr_sectors;
+
+ BUG_ON(bip == NULL);
+ BUG_ON(bi == NULL);
+
+ nr_sectors = bio_integrity_hw_sectors(bi, bytes_done >> 9);
+ bio_integrity_mark_head(bip, nr_sectors * bi->tuple_size);
+}
+EXPORT_SYMBOL(bio_integrity_advance);
+
+/**
+ * bio_integrity_trim - Trim integrity vector
+ * @bio: bio whose integrity vector to update
+ * @offset: offset to first data sector
+ * @sectors: number of data sectors
+ *
+ * Description: Used to trim the integrity vector in a cloned bio.
+ * The ivec will be advanced corresponding to 'offset' data sectors
+ * and the length will be truncated corresponding to 'len' data
+ * sectors.
+ */
+void bio_integrity_trim(struct bio *bio, unsigned int offset,
+ unsigned int sectors)
+{
+ struct bio_integrity_payload *bip = bio->bi_integrity;
+ struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+ unsigned int nr_sectors;
+
+ BUG_ON(bip == NULL);
+ BUG_ON(bi == NULL);
+ BUG_ON(!bio_flagged(bio, BIO_CLONED));
+
+ nr_sectors = bio_integrity_hw_sectors(bi, sectors);
+ bip->bip_sector = bip->bip_sector + offset;
+ bio_integrity_mark_head(bip, offset * bi->tuple_size);
+ bio_integrity_mark_tail(bip, sectors * bi->tuple_size);
+}
+EXPORT_SYMBOL(bio_integrity_trim);
+
+/**
+ * bio_integrity_split - Split integrity metadata
+ * @bio: Protected bio
+ * @bp: Resulting bio_pair
+ * @sectors: Offset
+ *
+ * Description: Splits an integrity page into a bio_pair.
+ */
+void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
+{
+ struct blk_integrity *bi;
+ struct bio_integrity_payload *bip = bio->bi_integrity;
+ unsigned int nr_sectors;
+
+ if (bio_integrity(bio) == 0)
+ return;
+
+ bi = bdev_get_integrity(bio->bi_bdev);
+ BUG_ON(bi == NULL);
+ BUG_ON(bip->bip_vcnt != 1);
+
+ nr_sectors = bio_integrity_hw_sectors(bi, sectors);
+
+ bp->bio1.bi_integrity = &bp->bip1;
+ bp->bio2.bi_integrity = &bp->bip2;
+
+ bp->iv1 = bip->bip_vec[0];
+ bp->iv2 = bip->bip_vec[0];
+
+ bp->bip1.bip_vec = &bp->iv1;
+ bp->bip2.bip_vec = &bp->iv2;
+
+ bp->iv1.bv_len = sectors * bi->tuple_size;
+ bp->iv2.bv_offset += sectors * bi->tuple_size;
+ bp->iv2.bv_len -= sectors * bi->tuple_size;
+
+ bp->bip1.bip_sector = bio->bi_integrity->bip_sector;
+ bp->bip2.bip_sector = bio->bi_integrity->bip_sector + nr_sectors;
+
+ bp->bip1.bip_vcnt = bp->bip2.bip_vcnt = 1;
+ bp->bip1.bip_idx = bp->bip2.bip_idx = 0;
+}
+EXPORT_SYMBOL(bio_integrity_split);
+
+/**
+ * bio_integrity_clone - Callback for cloning bios with integrity metadata
+ * @bio: New bio
+ * @bio_src: Original bio
+ * @bs: bio_set to allocate bip from
+ *
+ * Description: Called to allocate a bip when cloning a bio
+ */
+int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
+ struct bio_set *bs)
+{
+ struct bio_integrity_payload *bip_src = bio_src->bi_integrity;
+ struct bio_integrity_payload *bip;
+
+ BUG_ON(bip_src == NULL);
+
+ bip = bio_integrity_alloc_bioset(bio, GFP_NOIO, bip_src->bip_vcnt, bs);
+
+ if (bip == NULL)
+ return -EIO;
+
+ memcpy(bip->bip_vec, bip_src->bip_vec,
+ bip_src->bip_vcnt * sizeof(struct bio_vec));
+
+ bip->bip_sector = bip_src->bip_sector;
+ bip->bip_vcnt = bip_src->bip_vcnt;
+ bip->bip_idx = bip_src->bip_idx;
+
+ return 0;
+}
+EXPORT_SYMBOL(bio_integrity_clone);
+
+int bioset_integrity_create(struct bio_set *bs, int pool_size)
+{
+ bs->bio_integrity_pool = mempool_create_slab_pool(pool_size,
+ bio_integrity_slab);
+ if (!bs->bio_integrity_pool)
+ return -1;
+
+ return 0;
+}
+EXPORT_SYMBOL(bioset_integrity_create);
+
+void bioset_integrity_free(struct bio_set *bs)
+{
+ if (bs->bio_integrity_pool)
+ mempool_destroy(bs->bio_integrity_pool);
+}
+EXPORT_SYMBOL(bioset_integrity_free);
+
+void __init bio_integrity_init_slab(void)
+{
+ bio_integrity_slab = KMEM_CACHE(bio_integrity_payload,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC);
+}
+EXPORT_SYMBOL(bio_integrity_init_slab);
+
+static int __init integrity_init(void)
+{
+ kintegrityd_wq = create_workqueue("kintegrityd");
+
+ if (!kintegrityd_wq)
+ panic("Failed to create kintegrityd\n");
+
+ return 0;
+}
+subsys_initcall(integrity_init);
diff --git a/fs/bio.c b/fs/bio.c
index 78562574cb52..88322b066acb 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -28,25 +28,10 @@
#include <linux/blktrace_api.h>
#include <scsi/sg.h> /* for struct sg_iovec */
-#define BIO_POOL_SIZE 2
-
static struct kmem_cache *bio_slab __read_mostly;
-#define BIOVEC_NR_POOLS 6
-
-/*
- * a small number of entries is fine, not going to be performance critical.
- * basically we just need to survive
- */
-#define BIO_SPLIT_ENTRIES 2
mempool_t *bio_split_pool __read_mostly;
-struct biovec_slab {
- int nr_vecs;
- char *name;
- struct kmem_cache *slab;
-};
-
/*
* if you change this list, also change bvec_alloc or things will
* break badly! cannot be bigger than what you can fit into an
@@ -60,23 +45,17 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
#undef BV
/*
- * bio_set is used to allow other portions of the IO system to
- * allocate their own private memory pools for bio and iovec structures.
- * These memory pools in turn all allocate from the bio_slab
- * and the bvec_slabs[].
- */
-struct bio_set {
- mempool_t *bio_pool;
- mempool_t *bvec_pools[BIOVEC_NR_POOLS];
-};
-
-/*
* fs_bio_set is the bio_set containing bio and iovec memory pools used by
* IO code that does not need private memory pools.
*/
-static struct bio_set *fs_bio_set;
+struct bio_set *fs_bio_set;
+
+unsigned int bvec_nr_vecs(unsigned short idx)
+{
+ return bvec_slabs[idx].nr_vecs;
+}
-static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs)
+struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs)
{
struct bio_vec *bvl;
@@ -117,6 +96,9 @@ void bio_free(struct bio *bio, struct bio_set *bio_set)
mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
}
+ if (bio_integrity(bio))
+ bio_integrity_free(bio, bio_set);
+
mempool_free(bio, bio_set->bio_pool);
}
@@ -275,9 +257,19 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
{
struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set);
- if (b) {
- b->bi_destructor = bio_fs_destructor;
- __bio_clone(b, bio);
+ if (!b)
+ return NULL;
+
+ b->bi_destructor = bio_fs_destructor;
+ __bio_clone(b, bio);
+
+ if (bio_integrity(bio)) {
+ int ret;
+
+ ret = bio_integrity_clone(b, bio, fs_bio_set);
+
+ if (ret < 0)
+ return NULL;
}
return b;
@@ -333,10 +325,19 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
if (page == prev->bv_page &&
offset == prev->bv_offset + prev->bv_len) {
prev->bv_len += len;
- if (q->merge_bvec_fn &&
- q->merge_bvec_fn(q, bio, prev) < len) {
- prev->bv_len -= len;
- return 0;
+
+ if (q->merge_bvec_fn) {
+ struct bvec_merge_data bvm = {
+ .bi_bdev = bio->bi_bdev,
+ .bi_sector = bio->bi_sector,
+ .bi_size = bio->bi_size,
+ .bi_rw = bio->bi_rw,
+ };
+
+ if (q->merge_bvec_fn(q, &bvm, prev) < len) {
+ prev->bv_len -= len;
+ return 0;
+ }
}
goto done;
@@ -377,11 +378,18 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
* queue to get further control
*/
if (q->merge_bvec_fn) {
+ struct bvec_merge_data bvm = {
+ .bi_bdev = bio->bi_bdev,
+ .bi_sector = bio->bi_sector,
+ .bi_size = bio->bi_size,
+ .bi_rw = bio->bi_rw,
+ };
+
/*
* merge_bvec_fn() returns number of bytes it can accept
* at this offset
*/
- if (q->merge_bvec_fn(q, bio, bvec) < len) {
+ if (q->merge_bvec_fn(q, &bvm, bvec) < len) {
bvec->bv_page = NULL;
bvec->bv_len = 0;
bvec->bv_offset = 0;
@@ -1249,6 +1257,9 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
bp->bio1.bi_private = bi;
bp->bio2.bi_private = pool;
+ if (bio_integrity(bi))
+ bio_integrity_split(bi, bp, first_sectors);
+
return bp;
}
@@ -1290,6 +1301,7 @@ void bioset_free(struct bio_set *bs)
if (bs->bio_pool)
mempool_destroy(bs->bio_pool);
+ bioset_integrity_free(bs);
biovec_free_pools(bs);
kfree(bs);
@@ -1306,6 +1318,9 @@ struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size)
if (!bs->bio_pool)
goto bad;
+ if (bioset_integrity_create(bs, bio_pool_size))
+ goto bad;
+
if (!biovec_create_pools(bs, bvec_pool_size))
return bs;
@@ -1332,6 +1347,7 @@ static int __init init_bio(void)
{
bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
+ bio_integrity_init_slab();
biovec_init_slabs();
fs_bio_set = bioset_create(BIO_POOL_SIZE, 2);
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 9590b9024300..78f613cb9c76 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -45,6 +45,7 @@ const struct file_operations ramfs_file_operations = {
.mmap = generic_file_mmap,
.fsync = simple_sync_file,
.splice_read = generic_file_splice_read,
+ .splice_write = generic_file_splice_write,
.llseek = generic_file_llseek,
};
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 0989bc2c2f69..52312ec93ff4 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -43,6 +43,7 @@ const struct file_operations ramfs_file_operations = {
.aio_write = generic_file_aio_write,
.fsync = simple_sync_file,
.splice_read = generic_file_splice_read,
+ .splice_write = generic_file_splice_write,
.llseek = generic_file_llseek,
};
diff --git a/fs/splice.c b/fs/splice.c
index aa5f6f60b305..399442179d89 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -379,13 +379,22 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
lock_page(page);
/*
- * page was truncated, stop here. if this isn't the
- * first page, we'll just complete what we already
- * added
+ * Page was truncated, or invalidated by the
+ * filesystem. Redo the find/create, but this time the
+ * page is kept locked, so there's no chance of another
+ * race with truncate/invalidate.
*/
if (!page->mapping) {
unlock_page(page);
- break;
+ page = find_or_create_page(mapping, index,
+ mapping_gfp_mask(mapping));
+
+ if (!page) {
+ error = -ENOMEM;
+ break;
+ }
+ page_cache_release(pages[page_nr]);
+ pages[page_nr] = page;
}
/*
* page was already under io and is now done, great
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 61c15eaf3fb3..0933a14e6414 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -64,6 +64,7 @@ struct bio_vec {
struct bio_set;
struct bio;
+struct bio_integrity_payload;
typedef void (bio_end_io_t) (struct bio *, int);
typedef void (bio_destructor_t) (struct bio *);
@@ -112,6 +113,9 @@ struct bio {
atomic_t bi_cnt; /* pin count */
void *bi_private;
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+ struct bio_integrity_payload *bi_integrity; /* data integrity */
+#endif
bio_destructor_t *bi_destructor; /* destructor */
};
@@ -271,6 +275,29 @@ static inline void *bio_data(struct bio *bio)
*/
#define bio_get(bio) atomic_inc(&(bio)->bi_cnt)
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+/*
+ * bio integrity payload
+ */
+struct bio_integrity_payload {
+ struct bio *bip_bio; /* parent bio */
+ struct bio_vec *bip_vec; /* integrity data vector */
+
+ sector_t bip_sector; /* virtual start sector */
+
+ void *bip_buf; /* generated integrity data */
+ bio_end_io_t *bip_end_io; /* saved I/O completion fn */
+
+ int bip_error; /* saved I/O error */
+ unsigned int bip_size;
+
+ unsigned short bip_pool; /* pool the ivec came from */
+ unsigned short bip_vcnt; /* # of integrity bio_vecs */
+ unsigned short bip_idx; /* current bip_vec index */
+
+ struct work_struct bip_work; /* I/O completion */
+};
+#endif /* CONFIG_BLK_DEV_INTEGRITY */
/*
* A bio_pair is used when we need to split a bio.
@@ -283,10 +310,14 @@ static inline void *bio_data(struct bio *bio)
* in bio2.bi_private
*/
struct bio_pair {
- struct bio bio1, bio2;
- struct bio_vec bv1, bv2;
- atomic_t cnt;
- int error;
+ struct bio bio1, bio2;
+ struct bio_vec bv1, bv2;
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+ struct bio_integrity_payload bip1, bip2;
+ struct bio_vec iv1, iv2;
+#endif
+ atomic_t cnt;
+ int error;
};
extern struct bio_pair *bio_split(struct bio *bi, mempool_t *pool,
int first_sectors);
@@ -333,6 +364,39 @@ extern struct bio *bio_copy_user_iov(struct request_queue *, struct sg_iovec *,
int, int);
extern int bio_uncopy_user(struct bio *);
void zero_fill_bio(struct bio *bio);
+extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *);
+extern unsigned int bvec_nr_vecs(unsigned short idx);
+
+/*
+ * bio_set is used to allow other portions of the IO system to
+ * allocate their own private memory pools for bio and iovec structures.
+ * These memory pools in turn all allocate from the bio_slab
+ * and the bvec_slabs[].
+ */
+#define BIO_POOL_SIZE 2
+#define BIOVEC_NR_POOLS 6
+
+struct bio_set {
+ mempool_t *bio_pool;
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+ mempool_t *bio_integrity_pool;
+#endif
+ mempool_t *bvec_pools[BIOVEC_NR_POOLS];
+};
+
+struct biovec_slab {
+ int nr_vecs;
+ char *name;
+ struct kmem_cache *slab;
+};
+
+extern struct bio_set *fs_bio_set;
+
+/*
+ * a small number of entries is fine, not going to be performance critical.
+ * basically we just need to survive
+ */
+#define BIO_SPLIT_ENTRIES 2
#ifdef CONFIG_HIGHMEM
/*
@@ -381,5 +445,63 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx,
__bio_kmap_irq((bio), (bio)->bi_idx, (flags))
#define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags)
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+
+#define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)]))
+#define bip_vec(bip) bip_vec_idx(bip, 0)
+
+#define __bip_for_each_vec(bvl, bip, i, start_idx) \
+ for (bvl = bip_vec_idx((bip), (start_idx)), i = (start_idx); \
+ i < (bip)->bip_vcnt; \
+ bvl++, i++)
+
+#define bip_for_each_vec(bvl, bip, i) \
+ __bip_for_each_vec(bvl, bip, i, (bip)->bip_idx)
+
+static inline int bio_integrity(struct bio *bio)
+{
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+ return bio->bi_integrity != NULL;
+#else
+ return 0;
+#endif
+}
+
+extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *);
+extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
+extern void bio_integrity_free(struct bio *, struct bio_set *);
+extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
+extern int bio_integrity_enabled(struct bio *bio);
+extern int bio_integrity_set_tag(struct bio *, void *, unsigned int);
+extern int bio_integrity_get_tag(struct bio *, void *, unsigned int);
+extern int bio_integrity_prep(struct bio *);
+extern void bio_integrity_endio(struct bio *, int);
+extern void bio_integrity_advance(struct bio *, unsigned int);
+extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
+extern void bio_integrity_split(struct bio *, struct bio_pair *, int);
+extern int bio_integrity_clone(struct bio *, struct bio *, struct bio_set *);
+extern int bioset_integrity_create(struct bio_set *, int);
+extern void bioset_integrity_free(struct bio_set *);
+extern void bio_integrity_init_slab(void);
+
+#else /* CONFIG_BLK_DEV_INTEGRITY */
+
+#define bio_integrity(a) (0)
+#define bioset_integrity_create(a, b) (0)
+#define bio_integrity_prep(a) (0)
+#define bio_integrity_enabled(a) (0)
+#define bio_integrity_clone(a, b, c) (0)
+#define bioset_integrity_free(a) do { } while (0)
+#define bio_integrity_free(a, b) do { } while (0)
+#define bio_integrity_endio(a, b) do { } while (0)
+#define bio_integrity_advance(a, b) do { } while (0)
+#define bio_integrity_trim(a, b, c) do { } while (0)
+#define bio_integrity_split(a, b, c) do { } while (0)
+#define bio_integrity_set_tag(a, b, c) do { } while (0)
+#define bio_integrity_get_tag(a, b, c) do { } while (0)
+#define bio_integrity_init_slab(a) do { } while (0)
+
+#endif /* CONFIG_BLK_DEV_INTEGRITY */
+
#endif /* CONFIG_BLOCK */
#endif /* __LINUX_BIO_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d2a1b71e93c3..1ffd8bfdc4c9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -23,7 +23,6 @@
struct scsi_ioctl_command;
struct request_queue;
-typedef struct request_queue request_queue_t __deprecated;
struct elevator_queue;
typedef struct elevator_queue elevator_t;
struct request_pm_state;
@@ -34,12 +33,6 @@ struct sg_io_hdr;
#define BLKDEV_MIN_RQ 4
#define BLKDEV_MAX_RQ 128 /* Default maximum */
-int put_io_context(struct io_context *ioc);
-void exit_io_context(void);
-struct io_context *get_io_context(gfp_t gfp_flags, int node);
-struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
-void copy_io_context(struct io_context **pdst, struct io_context **psrc);
-
struct request;
typedef void (rq_end_io_fn)(struct request *, int);
@@ -113,6 +106,7 @@ enum rq_flag_bits {
__REQ_ALLOCED, /* request came from our alloc pool */
__REQ_RW_META, /* metadata io request */
__REQ_COPY_USER, /* contains copies of user pages */
+ __REQ_INTEGRITY, /* integrity metadata has been remapped */
__REQ_NR_BITS, /* stops here */
};
@@ -135,6 +129,7 @@ enum rq_flag_bits {
#define REQ_ALLOCED (1 << __REQ_ALLOCED)
#define REQ_RW_META (1 << __REQ_RW_META)
#define REQ_COPY_USER (1 << __REQ_COPY_USER)
+#define REQ_INTEGRITY (1 << __REQ_INTEGRITY)
#define BLK_MAX_CDB 16
@@ -259,7 +254,14 @@ typedef int (prep_rq_fn) (struct request_queue *, struct request *);
typedef void (unplug_fn) (struct request_queue *);
struct bio_vec;
-typedef int (merge_bvec_fn) (struct request_queue *, struct bio *, struct bio_vec *);
+struct bvec_merge_data {
+ struct block_device *bi_bdev;
+ sector_t bi_sector;
+ unsigned bi_size;
+ unsigned long bi_rw;
+};
+typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *,
+ struct bio_vec *);
typedef void (prepare_flush_fn) (struct request_queue *, struct request *);
typedef void (softirq_done_fn)(struct request *);
typedef int (dma_drain_needed_fn)(struct request *);
@@ -426,6 +428,32 @@ static inline void queue_flag_set_unlocked(unsigned int flag,
__set_bit(flag, &q->queue_flags);
}
+static inline int queue_flag_test_and_clear(unsigned int flag,
+ struct request_queue *q)
+{
+ WARN_ON_ONCE(!queue_is_locked(q));
+
+ if (test_bit(flag, &q->queue_flags)) {
+ __clear_bit(flag, &q->queue_flags);
+ return 1;
+ }
+
+ return 0;
+}
+
+static inline int queue_flag_test_and_set(unsigned int flag,
+ struct request_queue *q)
+{
+ WARN_ON_ONCE(!queue_is_locked(q));
+
+ if (!test_bit(flag, &q->queue_flags)) {
+ __set_bit(flag, &q->queue_flags);
+ return 0;
+ }
+
+ return 1;
+}
+
static inline void queue_flag_set(unsigned int flag, struct request_queue *q)
{
WARN_ON_ONCE(!queue_is_locked(q));
@@ -676,7 +704,6 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *,
struct request *, int);
extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
struct request *, int, rq_end_io_fn *);
-extern int blk_verify_command(unsigned char *, int);
extern void blk_unplug(struct request_queue *q);
static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
@@ -749,6 +776,7 @@ extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
extern void blk_queue_hardsect_size(struct request_queue *, unsigned short);
extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
extern void blk_queue_dma_pad(struct request_queue *, unsigned int);
+extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
extern int blk_queue_dma_drain(struct request_queue *q,
dma_drain_needed_fn *dma_drain_needed,
void *buf, unsigned int size);
@@ -802,6 +830,15 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
extern int blkdev_issue_flush(struct block_device *, sector_t *);
+/*
+* command filter functions
+*/
+extern int blk_verify_command(struct file *file, unsigned char *cmd);
+extern int blk_cmd_filter_verify_command(struct blk_scsi_cmd_filter *filter,
+ unsigned char *cmd, mode_t *f_mode);
+extern int blk_register_filter(struct gendisk *disk);
+extern void blk_unregister_filter(struct gendisk *disk);
+
#define MAX_PHYS_SEGMENTS 128
#define MAX_HW_SEGMENTS 128
#define SAFE_MAX_SECTORS 255
@@ -865,28 +902,116 @@ void kblockd_flush_work(struct work_struct *work);
#define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
MODULE_ALIAS("block-major-" __stringify(major) "-*")
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
-#else /* CONFIG_BLOCK */
-/*
- * stubs for when the block layer is configured out
- */
-#define buffer_heads_over_limit 0
+#define INTEGRITY_FLAG_READ 2 /* verify data integrity on read */
+#define INTEGRITY_FLAG_WRITE 4 /* generate data integrity on write */
-static inline long nr_blockdev_pages(void)
+struct blk_integrity_exchg {
+ void *prot_buf;
+ void *data_buf;
+ sector_t sector;
+ unsigned int data_size;
+ unsigned short sector_size;
+ const char *disk_name;
+};
+
+typedef void (integrity_gen_fn) (struct blk_integrity_exchg *);
+typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *);
+typedef void (integrity_set_tag_fn) (void *, void *, unsigned int);
+typedef void (integrity_get_tag_fn) (void *, void *, unsigned int);
+
+struct blk_integrity {
+ integrity_gen_fn *generate_fn;
+ integrity_vrfy_fn *verify_fn;
+ integrity_set_tag_fn *set_tag_fn;
+ integrity_get_tag_fn *get_tag_fn;
+
+ unsigned short flags;
+ unsigned short tuple_size;
+ unsigned short sector_size;
+ unsigned short tag_size;
+
+ const char *name;
+
+ struct kobject kobj;
+};
+
+extern int blk_integrity_register(struct gendisk *, struct blk_integrity *);
+extern void blk_integrity_unregister(struct gendisk *);
+extern int blk_integrity_compare(struct block_device *, struct block_device *);
+extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
+extern int blk_rq_count_integrity_sg(struct request *);
+
+static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi)
{
+ if (bi)
+ return bi->tuple_size;
+
return 0;
}
-static inline void exit_io_context(void)
+static inline struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
{
+ return bdev->bd_disk->integrity;
}
-struct io_context;
-static inline int put_io_context(struct io_context *ioc)
+static inline unsigned int bdev_get_tag_size(struct block_device *bdev)
{
- return 1;
+ struct blk_integrity *bi = bdev_get_integrity(bdev);
+
+ if (bi)
+ return bi->tag_size;
+
+ return 0;
+}
+
+static inline int bdev_integrity_enabled(struct block_device *bdev, int rw)
+{
+ struct blk_integrity *bi = bdev_get_integrity(bdev);
+
+ if (bi == NULL)
+ return 0;
+
+ if (rw == READ && bi->verify_fn != NULL &&
+ (bi->flags & INTEGRITY_FLAG_READ))
+ return 1;
+
+ if (rw == WRITE && bi->generate_fn != NULL &&
+ (bi->flags & INTEGRITY_FLAG_WRITE))
+ return 1;
+
+ return 0;
}
+static inline int blk_integrity_rq(struct request *rq)
+{
+ return bio_integrity(rq->bio);
+}
+
+#else /* CONFIG_BLK_DEV_INTEGRITY */
+
+#define blk_integrity_rq(rq) (0)
+#define blk_rq_count_integrity_sg(a) (0)
+#define blk_rq_map_integrity_sg(a, b) (0)
+#define bdev_get_integrity(a) (0)
+#define bdev_get_tag_size(a) (0)
+#define blk_integrity_compare(a, b) (0)
+#define blk_integrity_register(a, b) (0)
+#define blk_integrity_unregister(a) do { } while (0);
+
+#endif /* CONFIG_BLK_DEV_INTEGRITY */
+
+#else /* CONFIG_BLOCK */
+/*
+ * stubs for when the block layer is configured out
+ */
+#define buffer_heads_over_limit 0
+
+static inline long nr_blockdev_pages(void)
+{
+ return 0;
+}
#endif /* CONFIG_BLOCK */
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index e3ef903aae88..d084b8d227a5 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -129,6 +129,7 @@ struct blk_trace {
u32 dev;
struct dentry *dir;
struct dentry *dropped_file;
+ struct dentry *msg_file;
atomic_t dropped;
};
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index ae7aec3cabee..e8787417f65a 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -110,6 +110,14 @@ struct hd_struct {
#define GENHD_FL_SUPPRESS_PARTITION_INFO 32
#define GENHD_FL_FAIL 64
+#define BLK_SCSI_MAX_CMDS (256)
+#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
+
+struct blk_scsi_cmd_filter {
+ unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
+ unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
+ struct kobject kobj;
+};
struct gendisk {
int major; /* major number of driver */
@@ -120,6 +128,7 @@ struct gendisk {
struct hd_struct **part; /* [indexed by minor] */
struct block_device_operations *fops;
struct request_queue *queue;
+ struct blk_scsi_cmd_filter cmd_filter;
void *private_data;
sector_t capacity;
@@ -141,6 +150,9 @@ struct gendisk {
struct disk_stats dkstats;
#endif
struct work_struct async_notify;
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+ struct blk_integrity *integrity;
+#endif
};
/*
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 2b7a1187cb29..08b987bccf89 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -99,4 +99,22 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc)
return NULL;
}
+#ifdef CONFIG_BLOCK
+int put_io_context(struct io_context *ioc);
+void exit_io_context(void);
+struct io_context *get_io_context(gfp_t gfp_flags, int node);
+struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
+void copy_io_context(struct io_context **pdst, struct io_context **psrc);
+#else
+static inline void exit_io_context(void)
+{
+}
+
+struct io_context;
+static inline int put_io_context(struct io_context *ioc)
+{
+ return 1;
+}
+#endif
+
#endif
diff --git a/kernel/exit.c b/kernel/exit.c
index 8f6185e69b69..ceb258782835 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -13,6 +13,7 @@
#include <linux/personality.h>
#include <linux/tty.h>
#include <linux/mnt_namespace.h>
+#include <linux/iocontext.h>
#include <linux/key.h>
#include <linux/security.h>
#include <linux/cpu.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 19908b26cf80..b71ccd09fc8d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -23,6 +23,7 @@
#include <linux/sem.h>
#include <linux/file.h>
#include <linux/fdtable.h>
+#include <linux/iocontext.h>
#include <linux/key.h>
#include <linux/binfmts.h>
#include <linux/mman.h>