summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorPierre Ossman <drzeus@drzeus.cx>2008-12-31 19:56:05 +0100
committerPierre Ossman <drzeus@drzeus.cx>2008-12-31 19:56:05 +0100
commit418f19ea17a99421b22a64e101e14b6a16bed66d (patch)
tree7c21fcc368c63f1f9907deac6d16b30bd371792d /block
parentsdricoh_cs: Add support for Bay Controller devices (diff)
parentmmc: warn about voltage mismatches (diff)
downloadlinux-418f19ea17a99421b22a64e101e14b6a16bed66d.tar.xz
linux-418f19ea17a99421b22a64e101e14b6a16bed66d.zip
Merge branch 'master' of ../mmc
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig24
-rw-r--r--block/as-iosched.c10
-rw-r--r--block/blk-barrier.c124
-rw-r--r--block/blk-core.c135
-rw-r--r--block/blk-map.c2
-rw-r--r--block/blk-settings.c10
-rw-r--r--block/blk-softirq.c2
-rw-r--r--block/blk-sysfs.c7
-rw-r--r--block/blk-tag.c1
-rw-r--r--block/blk-timeout.c21
-rw-r--r--block/blktrace.c332
-rw-r--r--block/bsg.c2
-rw-r--r--block/cfq-iosched.c26
-rw-r--r--block/compat_ioctl.c33
-rw-r--r--block/deadline-iosched.c6
-rw-r--r--block/elevator.c92
-rw-r--r--block/genhd.c25
-rw-r--r--block/ioctl.c2
-rw-r--r--block/noop-iosched.c2
-rw-r--r--block/scsi_ioctl.c6
20 files changed, 628 insertions, 234 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 1ab7c15c8d7a..ac0956f77785 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -24,21 +24,17 @@ menuconfig BLOCK
if BLOCK
config LBD
- bool "Support for Large Block Devices"
+ bool "Support for large block devices and files"
depends on !64BIT
help
- Enable block devices of size 2TB and larger.
+ Enable block devices or files of size 2TB and larger.
This option is required to support the full capacity of large
(2TB+) block devices, including RAID, disk, Network Block Device,
Logical Volume Manager (LVM) and loopback.
-
- For example, RAID devices are frequently bigger than the capacity
- of the largest individual hard drive.
-
- This option is not required if you have individual disk drives
- which total 2TB+ and you are not aggregating the capacity into
- a large block device (e.g. using RAID or LVM).
+
+ This option also enables support for single files larger than
+ 2TB.
If unsure, say N.
@@ -47,6 +43,7 @@ config BLK_DEV_IO_TRACE
depends on SYSFS
select RELAY
select DEBUG_FS
+ select TRACEPOINTS
help
Say Y here if you want to be able to trace the block layer actions
on a given queue. Tracing allows you to see any traffic happening
@@ -57,15 +54,6 @@ config BLK_DEV_IO_TRACE
If unsure, say N.
-config LSF
- bool "Support for Large Single Files"
- depends on !64BIT
- help
- Say Y here if you want to be able to handle very large files (2TB
- and larger), otherwise say N.
-
- If unsure, say Y.
-
config BLK_DEV_BSG
bool "Block layer SG support v4 (EXPERIMENTAL)"
depends on EXPERIMENTAL
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 71f0abb219ee..631f6f44460a 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1339,12 +1339,12 @@ static int as_may_queue(struct request_queue *q, int rw)
return ret;
}
-static void as_exit_queue(elevator_t *e)
+static void as_exit_queue(struct elevator_queue *e)
{
struct as_data *ad = e->elevator_data;
del_timer_sync(&ad->antic_timer);
- kblockd_flush_work(&ad->antic_work);
+ cancel_work_sync(&ad->antic_work);
BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC]));
BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC]));
@@ -1409,7 +1409,7 @@ as_var_store(unsigned long *var, const char *page, size_t count)
return count;
}
-static ssize_t est_time_show(elevator_t *e, char *page)
+static ssize_t est_time_show(struct elevator_queue *e, char *page)
{
struct as_data *ad = e->elevator_data;
int pos = 0;
@@ -1427,7 +1427,7 @@ static ssize_t est_time_show(elevator_t *e, char *page)
}
#define SHOW_FUNCTION(__FUNC, __VAR) \
-static ssize_t __FUNC(elevator_t *e, char *page) \
+static ssize_t __FUNC(struct elevator_queue *e, char *page) \
{ \
struct as_data *ad = e->elevator_data; \
return as_var_show(jiffies_to_msecs((__VAR)), (page)); \
@@ -1440,7 +1440,7 @@ SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[REQ_ASYNC]);
#undef SHOW_FUNCTION
#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
-static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
{ \
struct as_data *ad = e->elevator_data; \
int ret = as_var_store(__PTR, (page), count); \
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 5c99ff8d2db8..8eba4e43bb0c 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -24,8 +24,8 @@
int blk_queue_ordered(struct request_queue *q, unsigned ordered,
prepare_flush_fn *prepare_flush_fn)
{
- if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) &&
- prepare_flush_fn == NULL) {
+ if (!prepare_flush_fn && (ordered & (QUEUE_ORDERED_DO_PREFLUSH |
+ QUEUE_ORDERED_DO_POSTFLUSH))) {
printk(KERN_ERR "%s: prepare_flush_fn required\n", __func__);
return -EINVAL;
}
@@ -88,7 +88,7 @@ unsigned blk_ordered_req_seq(struct request *rq)
return QUEUE_ORDSEQ_DONE;
}
-void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
+bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
{
struct request *rq;
@@ -99,7 +99,7 @@ void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
q->ordseq |= seq;
if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
- return;
+ return false;
/*
* Okay, sequence complete.
@@ -109,6 +109,8 @@ void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq)))
BUG();
+
+ return true;
}
static void pre_flush_end_io(struct request *rq, int error)
@@ -134,7 +136,7 @@ static void queue_flush(struct request_queue *q, unsigned which)
struct request *rq;
rq_end_io_fn *end_io;
- if (which == QUEUE_ORDERED_PREFLUSH) {
+ if (which == QUEUE_ORDERED_DO_PREFLUSH) {
rq = &q->pre_flush_rq;
end_io = pre_flush_end_io;
} else {
@@ -151,80 +153,110 @@ static void queue_flush(struct request_queue *q, unsigned which)
elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
}
-static inline struct request *start_ordered(struct request_queue *q,
- struct request *rq)
+static inline bool start_ordered(struct request_queue *q, struct request **rqp)
{
+ struct request *rq = *rqp;
+ unsigned skip = 0;
+
q->orderr = 0;
q->ordered = q->next_ordered;
q->ordseq |= QUEUE_ORDSEQ_STARTED;
/*
- * Prep proxy barrier request.
+ * For an empty barrier, there's no actual BAR request, which
+ * in turn makes POSTFLUSH unnecessary. Mask them off.
*/
- blkdev_dequeue_request(rq);
+ if (!rq->hard_nr_sectors) {
+ q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
+ QUEUE_ORDERED_DO_POSTFLUSH);
+ /*
+ * Empty barrier on a write-through device w/ ordered
+ * tag has no command to issue and without any command
+ * to issue, ordering by tag can't be used. Drain
+ * instead.
+ */
+ if ((q->ordered & QUEUE_ORDERED_BY_TAG) &&
+ !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) {
+ q->ordered &= ~QUEUE_ORDERED_BY_TAG;
+ q->ordered |= QUEUE_ORDERED_BY_DRAIN;
+ }
+ }
+
+ /* stash away the original request */
+ elv_dequeue_request(q, rq);
q->orig_bar_rq = rq;
- rq = &q->bar_rq;
- blk_rq_init(q, rq);
- if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
- rq->cmd_flags |= REQ_RW;
- if (q->ordered & QUEUE_ORDERED_FUA)
- rq->cmd_flags |= REQ_FUA;
- init_request_from_bio(rq, q->orig_bar_rq->bio);
- rq->end_io = bar_end_io;
+ rq = NULL;
/*
* Queue ordered sequence. As we stack them at the head, we
* need to queue in reverse order. Note that we rely on that
* no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
- * request gets inbetween ordered sequence. If this request is
- * an empty barrier, we don't need to do a postflush ever since
- * there will be no data written between the pre and post flush.
- * Hence a single flush will suffice.
+ * request gets inbetween ordered sequence.
*/
- if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq))
- queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
- else
- q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
+ if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
+ queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
+ rq = &q->post_flush_rq;
+ } else
+ skip |= QUEUE_ORDSEQ_POSTFLUSH;
- elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
+ if (q->ordered & QUEUE_ORDERED_DO_BAR) {
+ rq = &q->bar_rq;
+
+ /* initialize proxy request and queue it */
+ blk_rq_init(q, rq);
+ if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
+ rq->cmd_flags |= REQ_RW;
+ if (q->ordered & QUEUE_ORDERED_DO_FUA)
+ rq->cmd_flags |= REQ_FUA;
+ init_request_from_bio(rq, q->orig_bar_rq->bio);
+ rq->end_io = bar_end_io;
- if (q->ordered & QUEUE_ORDERED_PREFLUSH) {
- queue_flush(q, QUEUE_ORDERED_PREFLUSH);
+ elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
+ } else
+ skip |= QUEUE_ORDSEQ_BAR;
+
+ if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
+ queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
rq = &q->pre_flush_rq;
} else
- q->ordseq |= QUEUE_ORDSEQ_PREFLUSH;
+ skip |= QUEUE_ORDSEQ_PREFLUSH;
- if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0)
- q->ordseq |= QUEUE_ORDSEQ_DRAIN;
- else
+ if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && q->in_flight)
rq = NULL;
+ else
+ skip |= QUEUE_ORDSEQ_DRAIN;
+
+ *rqp = rq;
- return rq;
+ /*
+ * Complete skipped sequences. If whole sequence is complete,
+ * return false to tell elevator that this request is gone.
+ */
+ return !blk_ordered_complete_seq(q, skip, 0);
}
-int blk_do_ordered(struct request_queue *q, struct request **rqp)
+bool blk_do_ordered(struct request_queue *q, struct request **rqp)
{
struct request *rq = *rqp;
const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
if (!q->ordseq) {
if (!is_barrier)
- return 1;
+ return true;
- if (q->next_ordered != QUEUE_ORDERED_NONE) {
- *rqp = start_ordered(q, rq);
- return 1;
- } else {
+ if (q->next_ordered != QUEUE_ORDERED_NONE)
+ return start_ordered(q, rqp);
+ else {
/*
- * This can happen when the queue switches to
- * ORDERED_NONE while this request is on it.
+ * Queue ordering not supported. Terminate
+ * with prejudice.
*/
- blkdev_dequeue_request(rq);
+ elv_dequeue_request(q, rq);
if (__blk_end_request(rq, -EOPNOTSUPP,
blk_rq_bytes(rq)))
BUG();
*rqp = NULL;
- return 0;
+ return false;
}
}
@@ -235,9 +267,9 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp)
/* Special requests are not subject to ordering rules. */
if (!blk_fs_request(rq) &&
rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
- return 1;
+ return true;
- if (q->ordered & QUEUE_ORDERED_TAG) {
+ if (q->ordered & QUEUE_ORDERED_BY_TAG) {
/* Ordered by tag. Blocking the next barrier is enough. */
if (is_barrier && rq != &q->bar_rq)
*rqp = NULL;
@@ -248,7 +280,7 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp)
*rqp = NULL;
}
- return 1;
+ return true;
}
static void bio_end_empty_barrier(struct bio *bio, int err)
diff --git a/block/blk-core.c b/block/blk-core.c
index 10e8a64a5a5b..a824e49c0d0a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -28,9 +28,23 @@
#include <linux/task_io_accounting_ops.h>
#include <linux/blktrace_api.h>
#include <linux/fault-inject.h>
+#include <trace/block.h>
#include "blk.h"
+DEFINE_TRACE(block_plug);
+DEFINE_TRACE(block_unplug_io);
+DEFINE_TRACE(block_unplug_timer);
+DEFINE_TRACE(block_getrq);
+DEFINE_TRACE(block_sleeprq);
+DEFINE_TRACE(block_rq_requeue);
+DEFINE_TRACE(block_bio_backmerge);
+DEFINE_TRACE(block_bio_frontmerge);
+DEFINE_TRACE(block_bio_queue);
+DEFINE_TRACE(block_rq_complete);
+DEFINE_TRACE(block_remap); /* Also used in drivers/md/dm.c */
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
+
static int __make_request(struct request_queue *q, struct bio *bio);
/*
@@ -139,6 +153,9 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
nbytes = bio->bi_size;
}
+ if (unlikely(rq->cmd_flags & REQ_QUIET))
+ set_bit(BIO_QUIET, &bio->bi_flags);
+
bio->bi_size -= nbytes;
bio->bi_sector += (nbytes >> 9);
@@ -205,7 +222,7 @@ void blk_plug_device(struct request_queue *q)
if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
- blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
+ trace_block_plug(q);
}
}
EXPORT_SYMBOL(blk_plug_device);
@@ -251,8 +268,7 @@ void __generic_unplug_device(struct request_queue *q)
{
if (unlikely(blk_queue_stopped(q)))
return;
-
- if (!blk_remove_plug(q))
+ if (!blk_remove_plug(q) && !blk_queue_nonrot(q))
return;
q->request_fn(q);
@@ -292,9 +308,7 @@ void blk_unplug_work(struct work_struct *work)
struct request_queue *q =
container_of(work, struct request_queue, unplug_work);
- blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
- q->rq.count[READ] + q->rq.count[WRITE]);
-
+ trace_block_unplug_io(q);
q->unplug_fn(q);
}
@@ -302,9 +316,7 @@ void blk_unplug_timeout(unsigned long data)
{
struct request_queue *q = (struct request_queue *)data;
- blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
- q->rq.count[READ] + q->rq.count[WRITE]);
-
+ trace_block_unplug_timer(q);
kblockd_schedule_work(q, &q->unplug_work);
}
@@ -314,9 +326,7 @@ void blk_unplug(struct request_queue *q)
* devices don't necessarily have an ->unplug_fn defined
*/
if (q->unplug_fn) {
- blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
- q->rq.count[READ] + q->rq.count[WRITE]);
-
+ trace_block_unplug_io(q);
q->unplug_fn(q);
}
}
@@ -396,7 +406,8 @@ EXPORT_SYMBOL(blk_stop_queue);
void blk_sync_queue(struct request_queue *q)
{
del_timer_sync(&q->unplug_timer);
- kblockd_flush_work(&q->unplug_work);
+ del_timer_sync(&q->timeout);
+ cancel_work_sync(&q->unplug_work);
}
EXPORT_SYMBOL(blk_sync_queue);
@@ -592,7 +603,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
1 << QUEUE_FLAG_STACKABLE);
q->queue_lock = lock;
- blk_queue_segment_boundary(q, 0xffffffff);
+ blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK);
blk_queue_make_request(q, __make_request);
blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
@@ -822,7 +833,7 @@ rq_starved:
if (ioc_batching(q, ioc))
ioc->nr_batch_requests--;
- blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
+ trace_block_getrq(q, bio, rw);
out:
return rq;
}
@@ -848,7 +859,7 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
prepare_to_wait_exclusive(&rl->wait[rw], &wait,
TASK_UNINTERRUPTIBLE);
- blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
+ trace_block_sleeprq(q, bio, rw);
__generic_unplug_device(q);
spin_unlock_irq(q->queue_lock);
@@ -928,7 +939,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
{
blk_delete_timer(rq);
blk_clear_rq_complete(rq);
- blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
+ trace_block_rq_requeue(q, rq);
if (blk_rq_tagged(rq))
blk_queue_end_tag(q, rq);
@@ -1127,7 +1138,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
static int __make_request(struct request_queue *q, struct bio *bio)
{
struct request *req;
- int el_ret, nr_sectors, barrier, discard, err;
+ int el_ret, nr_sectors;
const unsigned short prio = bio_prio(bio);
const int sync = bio_sync(bio);
int rw_flags;
@@ -1141,22 +1152,9 @@ static int __make_request(struct request_queue *q, struct bio *bio)
*/
blk_queue_bounce(q, &bio);
- barrier = bio_barrier(bio);
- if (unlikely(barrier) && bio_has_data(bio) &&
- (q->next_ordered == QUEUE_ORDERED_NONE)) {
- err = -EOPNOTSUPP;
- goto end_io;
- }
-
- discard = bio_discard(bio);
- if (unlikely(discard) && !q->prepare_discard_fn) {
- err = -EOPNOTSUPP;
- goto end_io;
- }
-
spin_lock_irq(q->queue_lock);
- if (unlikely(barrier) || elv_queue_empty(q))
+ if (unlikely(bio_barrier(bio)) || elv_queue_empty(q))
goto get_rq;
el_ret = elv_merge(q, &req, bio);
@@ -1167,7 +1165,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
if (!ll_back_merge_fn(q, req, bio))
break;
- blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
+ trace_block_bio_backmerge(q, bio);
req->biotail->bi_next = bio;
req->biotail = bio;
@@ -1186,7 +1184,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
if (!ll_front_merge_fn(q, req, bio))
break;
- blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
+ trace_block_bio_frontmerge(q, bio);
bio->bi_next = req->bio;
req->bio = bio;
@@ -1242,18 +1240,14 @@ get_rq:
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
bio_flagged(bio, BIO_CPU_AFFINE))
req->cpu = blk_cpu_to_group(smp_processor_id());
- if (elv_queue_empty(q))
+ if (!blk_queue_nonrot(q) && elv_queue_empty(q))
blk_plug_device(q);
add_request(q, req);
out:
- if (sync)
+ if (sync || blk_queue_nonrot(q))
__generic_unplug_device(q);
spin_unlock_irq(q->queue_lock);
return 0;
-
-end_io:
- bio_endio(bio, err);
- return 0;
}
/*
@@ -1269,7 +1263,7 @@ static inline void blk_partition_remap(struct bio *bio)
bio->bi_sector += p->start_sect;
bio->bi_bdev = bdev->bd_contains;
- blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio,
+ trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
bdev->bd_dev, bio->bi_sector,
bio->bi_sector - p->start_sect);
}
@@ -1406,15 +1400,13 @@ static inline void __generic_make_request(struct bio *bio)
char b[BDEVNAME_SIZE];
q = bdev_get_queue(bio->bi_bdev);
- if (!q) {
+ if (unlikely(!q)) {
printk(KERN_ERR
"generic_make_request: Trying to access "
"nonexistent block-device %s (%Lu)\n",
bdevname(bio->bi_bdev, b),
(long long) bio->bi_sector);
-end_io:
- bio_endio(bio, err);
- break;
+ goto end_io;
}
if (unlikely(nr_sectors > q->max_hw_sectors)) {
@@ -1441,24 +1433,29 @@ end_io:
goto end_io;
if (old_sector != -1)
- blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
+ trace_block_remap(q, bio, old_dev, bio->bi_sector,
old_sector);
- blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
+ trace_block_bio_queue(q, bio);
old_sector = bio->bi_sector;
old_dev = bio->bi_bdev->bd_dev;
if (bio_check_eod(bio, nr_sectors))
goto end_io;
- if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) ||
- (bio_discard(bio) && !q->prepare_discard_fn)) {
+
+ if (bio_discard(bio) && !q->prepare_discard_fn) {
err = -EOPNOTSUPP;
goto end_io;
}
ret = q->make_request_fn(q, bio);
} while (ret);
+
+ return;
+
+end_io:
+ bio_endio(bio, err);
}
/*
@@ -1637,6 +1634,28 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
/**
+ * blkdev_dequeue_request - dequeue request and start timeout timer
+ * @req: request to dequeue
+ *
+ * Dequeue @req and start timeout timer on it. This hands off the
+ * request to the driver.
+ *
+ * Block internal functions which don't want to start timer should
+ * call elv_dequeue_request().
+ */
+void blkdev_dequeue_request(struct request *req)
+{
+ elv_dequeue_request(req->q, req);
+
+ /*
+ * We are now handing the request to the hardware, add the
+ * timeout handler.
+ */
+ blk_add_timer(req);
+}
+EXPORT_SYMBOL(blkdev_dequeue_request);
+
+/**
* __end_that_request_first - end I/O on a request
* @req: the request being processed
* @error: %0 for success, < %0 for error
@@ -1656,7 +1675,7 @@ static int __end_that_request_first(struct request *req, int error,
int total_bytes, bio_nbytes, next_idx = 0;
struct bio *bio;
- blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
+ trace_block_rq_complete(req->q, req);
/*
* for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual
@@ -1686,14 +1705,6 @@ static int __end_that_request_first(struct request *req, int error,
while ((bio = req->bio) != NULL) {
int nbytes;
- /*
- * For an empty barrier request, the low level driver must
- * store a potential error location in ->sector. We pass
- * that back up in ->bi_sector.
- */
- if (blk_empty_barrier(req))
- bio->bi_sector = req->sector;
-
if (nr_bytes >= bio->bi_size) {
req->bio = bio->bi_next;
nbytes = bio->bi_size;
@@ -1774,7 +1785,7 @@ static void end_that_request_last(struct request *req, int error)
blk_queue_end_tag(req->q, req);
if (blk_queued_rq(req))
- blkdev_dequeue_request(req);
+ elv_dequeue_request(req->q, req);
if (unlikely(laptop_mode) && blk_fs_request(req))
laptop_io_completion();
@@ -2113,12 +2124,6 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
}
EXPORT_SYMBOL(kblockd_schedule_work);
-void kblockd_flush_work(struct work_struct *work)
-{
- cancel_work_sync(work);
-}
-EXPORT_SYMBOL(kblockd_flush_work);
-
int __init blk_dev_init(void)
{
kblockd_workqueue = create_workqueue("kblockd");
diff --git a/block/blk-map.c b/block/blk-map.c
index 0f4b4b881811..2990447f45e9 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -224,7 +224,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
*/
bio_get(bio);
bio_endio(bio, 0);
- bio_unmap_user(bio);
+ __blk_rq_unmap_user(bio);
return -EINVAL;
}
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 41392fbe19ff..59fd05d9f1d5 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -125,6 +125,9 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
q->nr_requests = BLKDEV_MAX_RQ;
blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
+ blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK);
+ blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
+
q->make_request_fn = mfn;
q->backing_dev_info.ra_pages =
(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
@@ -314,10 +317,11 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
/* zero is "infinity" */
t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
+ t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, b->seg_boundary_mask);
- t->max_phys_segments = min(t->max_phys_segments, b->max_phys_segments);
- t->max_hw_segments = min(t->max_hw_segments, b->max_hw_segments);
- t->max_segment_size = min(t->max_segment_size, b->max_segment_size);
+ t->max_phys_segments = min_not_zero(t->max_phys_segments, b->max_phys_segments);
+ t->max_hw_segments = min_not_zero(t->max_hw_segments, b->max_hw_segments);
+ t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size);
t->hardsect_size = max(t->hardsect_size, b->hardsect_size);
if (!t->queue_lock)
WARN_ON_ONCE(1);
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index e660d26ca656..ce0efc6b26dc 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -161,7 +161,7 @@ void blk_complete_request(struct request *req)
}
EXPORT_SYMBOL(blk_complete_request);
-__init int blk_softirq_init(void)
+static __init int blk_softirq_init(void)
{
int i;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 21e275d7eed9..a29cb788e408 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -88,9 +88,7 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count)
unsigned long ra_kb;
ssize_t ret = queue_var_store(&ra_kb, page, count);
- spin_lock_irq(q->queue_lock);
q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
- spin_unlock_irq(q->queue_lock);
return ret;
}
@@ -117,10 +115,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
return -EINVAL;
- /*
- * Take the queue lock to update the readahead and max_sectors
- * values synchronously:
- */
+
spin_lock_irq(q->queue_lock);
q->max_sectors = max_sectors_kb << 1;
spin_unlock_irq(q->queue_lock);
diff --git a/block/blk-tag.c b/block/blk-tag.c
index c0d419e84ce7..3c518e3303ae 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -158,7 +158,6 @@ fail:
/**
* blk_init_tags - initialize the tag info for an external tag map
* @depth: the maximum queue depth supported
- * @tags: the tag to use
**/
struct blk_queue_tag *blk_init_tags(int depth)
{
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 69185ea9fae2..a09535377a94 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -73,11 +73,7 @@ ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr,
*/
void blk_delete_timer(struct request *req)
{
- struct request_queue *q = req->q;
-
list_del_init(&req->timeout_list);
- if (list_empty(&q->timeout_list))
- del_timer(&q->timeout);
}
static void blk_rq_timed_out(struct request *req)
@@ -111,7 +107,7 @@ static void blk_rq_timed_out(struct request *req)
void blk_rq_timed_out_timer(unsigned long data)
{
struct request_queue *q = (struct request_queue *) data;
- unsigned long flags, uninitialized_var(next), next_set = 0;
+ unsigned long flags, next = 0;
struct request *rq, *tmp;
spin_lock_irqsave(q->queue_lock, flags);
@@ -126,15 +122,18 @@ void blk_rq_timed_out_timer(unsigned long data)
if (blk_mark_rq_complete(rq))
continue;
blk_rq_timed_out(rq);
+ } else {
+ if (!next || time_after(next, rq->deadline))
+ next = rq->deadline;
}
- if (!next_set) {
- next = rq->deadline;
- next_set = 1;
- } else if (time_after(next, rq->deadline))
- next = rq->deadline;
}
- if (next_set && !list_empty(&q->timeout_list))
+ /*
+ * next can never be 0 here with the list non-empty, since we always
+ * bump ->deadline to 1 so we can detect if the timer was ever added
+ * or not. See comment in blk_add_timer()
+ */
+ if (next)
mod_timer(&q->timeout, round_jiffies_up(next));
spin_unlock_irqrestore(q->queue_lock, flags);
diff --git a/block/blktrace.c b/block/blktrace.c
index 85049a7e7a17..b0a2cae886db 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -23,10 +23,18 @@
#include <linux/mutex.h>
#include <linux/debugfs.h>
#include <linux/time.h>
+#include <trace/block.h>
#include <asm/uaccess.h>
static unsigned int blktrace_seq __read_mostly = 1;
+/* Global reference count of probes */
+static DEFINE_MUTEX(blk_probe_mutex);
+static atomic_t blk_probes_ref = ATOMIC_INIT(0);
+
+static int blk_register_tracepoints(void);
+static void blk_unregister_tracepoints(void);
+
/*
* Send out a notify message.
*/
@@ -119,7 +127,7 @@ static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK
* The worker for the various blk_add_trace*() types. Fills out a
* blk_io_trace structure and places it in a per-cpu subbuffer.
*/
-void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
+static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
int rw, u32 what, int error, int pdu_len, void *pdu_data)
{
struct task_struct *tsk = current;
@@ -177,8 +185,6 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
local_irq_restore(flags);
}
-EXPORT_SYMBOL_GPL(__blk_add_trace);
-
static struct dentry *blk_tree_root;
static DEFINE_MUTEX(blk_tree_mutex);
static unsigned int root_users;
@@ -237,6 +243,10 @@ static void blk_trace_cleanup(struct blk_trace *bt)
free_percpu(bt->sequence);
free_percpu(bt->msg_data);
kfree(bt);
+ mutex_lock(&blk_probe_mutex);
+ if (atomic_dec_and_test(&blk_probes_ref))
+ blk_unregister_tracepoints();
+ mutex_unlock(&blk_probe_mutex);
}
int blk_trace_remove(struct request_queue *q)
@@ -428,6 +438,14 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
bt->pid = buts->pid;
bt->trace_state = Blktrace_setup;
+ mutex_lock(&blk_probe_mutex);
+ if (atomic_add_return(1, &blk_probes_ref) == 1) {
+ ret = blk_register_tracepoints();
+ if (ret)
+ goto probe_err;
+ }
+ mutex_unlock(&blk_probe_mutex);
+
ret = -EBUSY;
old_bt = xchg(&q->blk_trace, bt);
if (old_bt) {
@@ -436,6 +454,9 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
}
return 0;
+probe_err:
+ atomic_dec(&blk_probes_ref);
+ mutex_unlock(&blk_probe_mutex);
err:
if (dir)
blk_remove_tree(dir);
@@ -562,3 +583,308 @@ void blk_trace_shutdown(struct request_queue *q)
blk_trace_remove(q);
}
}
+
+/*
+ * blktrace probes
+ */
+
+/**
+ * blk_add_trace_rq - Add a trace for a request oriented action
+ * @q: queue the io is for
+ * @rq: the source request
+ * @what: the action
+ *
+ * Description:
+ * Records an action against a request. Will log the bio offset + size.
+ *
+ **/
+static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
+ u32 what)
+{
+ struct blk_trace *bt = q->blk_trace;
+ int rw = rq->cmd_flags & 0x03;
+
+ if (likely(!bt))
+ return;
+
+ if (blk_discard_rq(rq))
+ rw |= (1 << BIO_RW_DISCARD);
+
+ if (blk_pc_request(rq)) {
+ what |= BLK_TC_ACT(BLK_TC_PC);
+ __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
+ sizeof(rq->cmd), rq->cmd);
+ } else {
+ what |= BLK_TC_ACT(BLK_TC_FS);
+ __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
+ rw, what, rq->errors, 0, NULL);
+ }
+}
+
+static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq)
+{
+ blk_add_trace_rq(q, rq, BLK_TA_ABORT);
+}
+
+static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq)
+{
+ blk_add_trace_rq(q, rq, BLK_TA_INSERT);
+}
+
+static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq)
+{
+ blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
+}
+
+static void blk_add_trace_rq_requeue(struct request_queue *q, struct request *rq)
+{
+ blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
+}
+
+static void blk_add_trace_rq_complete(struct request_queue *q, struct request *rq)
+{
+ blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
+}
+
+/**
+ * blk_add_trace_bio - Add a trace for a bio oriented action
+ * @q: queue the io is for
+ * @bio: the source bio
+ * @what: the action
+ *
+ * Description:
+ * Records an action against a bio. Will log the bio offset + size.
+ *
+ **/
+static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
+ u32 what)
+{
+ struct blk_trace *bt = q->blk_trace;
+
+ if (likely(!bt))
+ return;
+
+ __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
+ !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
+}
+
+static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio)
+{
+ blk_add_trace_bio(q, bio, BLK_TA_BOUNCE);
+}
+
+static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio)
+{
+ blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
+}
+
+static void blk_add_trace_bio_backmerge(struct request_queue *q, struct bio *bio)
+{
+ blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
+}
+
+static void blk_add_trace_bio_frontmerge(struct request_queue *q, struct bio *bio)
+{
+ blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
+}
+
+static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio)
+{
+ blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
+}
+
+static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw)
+{
+ if (bio)
+ blk_add_trace_bio(q, bio, BLK_TA_GETRQ);
+ else {
+ struct blk_trace *bt = q->blk_trace;
+
+ if (bt)
+ __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL);
+ }
+}
+
+
+static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int rw)
+{
+ if (bio)
+ blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ);
+ else {
+ struct blk_trace *bt = q->blk_trace;
+
+ if (bt)
+ __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, 0, 0, NULL);
+ }
+}
+
+static void blk_add_trace_plug(struct request_queue *q)
+{
+ struct blk_trace *bt = q->blk_trace;
+
+ if (bt)
+ __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
+}
+
+static void blk_add_trace_unplug_io(struct request_queue *q)
+{
+ struct blk_trace *bt = q->blk_trace;
+
+ if (bt) {
+ unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
+ __be64 rpdu = cpu_to_be64(pdu);
+
+ __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0,
+ sizeof(rpdu), &rpdu);
+ }
+}
+
+static void blk_add_trace_unplug_timer(struct request_queue *q)
+{
+ struct blk_trace *bt = q->blk_trace;
+
+ if (bt) {
+ unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
+ __be64 rpdu = cpu_to_be64(pdu);
+
+ __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0,
+ sizeof(rpdu), &rpdu);
+ }
+}
+
+static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
+ unsigned int pdu)
+{
+ struct blk_trace *bt = q->blk_trace;
+
+ if (bt) {
+ __be64 rpdu = cpu_to_be64(pdu);
+
+ __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
+ BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE),
+ sizeof(rpdu), &rpdu);
+ }
+}
+
+/**
+ * blk_add_trace_remap - Add a trace for a remap operation
+ * @q: queue the io is for
+ * @bio: the source bio
+ * @dev: target device
+ * @from: source sector
+ * @to: target sector
+ *
+ * Description:
+ * Device mapper or raid target sometimes need to split a bio because
+ * it spans a stripe (or similar). Add a trace for that action.
+ *
+ **/
+static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
+ dev_t dev, sector_t from, sector_t to)
+{
+ struct blk_trace *bt = q->blk_trace;
+ struct blk_io_trace_remap r;
+
+ if (likely(!bt))
+ return;
+
+ r.device = cpu_to_be32(dev);
+ r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
+ r.sector = cpu_to_be64(to);
+
+ __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP,
+ !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
+}
+
+/**
+ * blk_add_driver_data - Add binary message with driver-specific data
+ * @q: queue the io is for
+ * @rq: io request
+ * @data: driver-specific data
+ * @len: length of driver-specific data
+ *
+ * Description:
+ * Some drivers might want to write driver-specific data per request.
+ *
+ **/
+void blk_add_driver_data(struct request_queue *q,
+ struct request *rq,
+ void *data, size_t len)
+{
+ struct blk_trace *bt = q->blk_trace;
+
+ if (likely(!bt))
+ return;
+
+ if (blk_pc_request(rq))
+ __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
+ rq->errors, len, data);
+ else
+ __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
+ 0, BLK_TA_DRV_DATA, rq->errors, len, data);
+}
+EXPORT_SYMBOL_GPL(blk_add_driver_data);
+
+static int blk_register_tracepoints(void)
+{
+ int ret;
+
+ ret = register_trace_block_rq_abort(blk_add_trace_rq_abort);
+ WARN_ON(ret);
+ ret = register_trace_block_rq_insert(blk_add_trace_rq_insert);
+ WARN_ON(ret);
+ ret = register_trace_block_rq_issue(blk_add_trace_rq_issue);
+ WARN_ON(ret);
+ ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue);
+ WARN_ON(ret);
+ ret = register_trace_block_rq_complete(blk_add_trace_rq_complete);
+ WARN_ON(ret);
+ ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce);
+ WARN_ON(ret);
+ ret = register_trace_block_bio_complete(blk_add_trace_bio_complete);
+ WARN_ON(ret);
+ ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
+ WARN_ON(ret);
+ ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
+ WARN_ON(ret);
+ ret = register_trace_block_bio_queue(blk_add_trace_bio_queue);
+ WARN_ON(ret);
+ ret = register_trace_block_getrq(blk_add_trace_getrq);
+ WARN_ON(ret);
+ ret = register_trace_block_sleeprq(blk_add_trace_sleeprq);
+ WARN_ON(ret);
+ ret = register_trace_block_plug(blk_add_trace_plug);
+ WARN_ON(ret);
+ ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer);
+ WARN_ON(ret);
+ ret = register_trace_block_unplug_io(blk_add_trace_unplug_io);
+ WARN_ON(ret);
+ ret = register_trace_block_split(blk_add_trace_split);
+ WARN_ON(ret);
+ ret = register_trace_block_remap(blk_add_trace_remap);
+ WARN_ON(ret);
+ return 0;
+}
+
+static void blk_unregister_tracepoints(void)
+{
+ unregister_trace_block_remap(blk_add_trace_remap);
+ unregister_trace_block_split(blk_add_trace_split);
+ unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
+ unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer);
+ unregister_trace_block_plug(blk_add_trace_plug);
+ unregister_trace_block_sleeprq(blk_add_trace_sleeprq);
+ unregister_trace_block_getrq(blk_add_trace_getrq);
+ unregister_trace_block_bio_queue(blk_add_trace_bio_queue);
+ unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
+ unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
+ unregister_trace_block_bio_complete(blk_add_trace_bio_complete);
+ unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce);
+ unregister_trace_block_rq_complete(blk_add_trace_rq_complete);
+ unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue);
+ unregister_trace_block_rq_issue(blk_add_trace_rq_issue);
+ unregister_trace_block_rq_insert(blk_add_trace_rq_insert);
+ unregister_trace_block_rq_abort(blk_add_trace_rq_abort);
+
+ tracepoint_synchronize_unregister();
+}
diff --git a/block/bsg.c b/block/bsg.c
index e8bd2475682a..e73e50daf3d0 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -202,6 +202,8 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
rq->timeout = q->sg_timeout;
if (!rq->timeout)
rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
+ if (rq->timeout < BLK_MIN_SG_TIMEOUT)
+ rq->timeout = BLK_MIN_SG_TIMEOUT;
return 0;
}
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 6a062eebbd15..e8525fa72823 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1136,12 +1136,8 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
if (cfq_class_idle(cfqq))
max_dispatch = 1;
- if (cfqq->dispatched >= max_dispatch) {
- if (cfqd->busy_queues > 1)
- break;
- if (cfqq->dispatched >= 4 * max_dispatch)
- break;
- }
+ if (cfqq->dispatched >= max_dispatch && cfqd->busy_queues > 1)
+ break;
if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
break;
@@ -1318,7 +1314,15 @@ static void cfq_exit_single_io_context(struct io_context *ioc,
unsigned long flags;
spin_lock_irqsave(q->queue_lock, flags);
- __cfq_exit_single_io_context(cfqd, cic);
+
+ /*
+ * Ensure we get a fresh copy of the ->key to prevent
+ * race between exiting task and queue
+ */
+ smp_read_barrier_depends();
+ if (cic->key)
+ __cfq_exit_single_io_context(cfqd, cic);
+
spin_unlock_irqrestore(q->queue_lock, flags);
}
}
@@ -2160,7 +2164,7 @@ out_cont:
static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
{
del_timer_sync(&cfqd->idle_slice_timer);
- kblockd_flush_work(&cfqd->unplug_work);
+ cancel_work_sync(&cfqd->unplug_work);
}
static void cfq_put_async_queues(struct cfq_data *cfqd)
@@ -2178,7 +2182,7 @@ static void cfq_put_async_queues(struct cfq_data *cfqd)
cfq_put_queue(cfqd->async_idle_cfqq);
}
-static void cfq_exit_queue(elevator_t *e)
+static void cfq_exit_queue(struct elevator_queue *e)
{
struct cfq_data *cfqd = e->elevator_data;
struct request_queue *q = cfqd->queue;
@@ -2288,7 +2292,7 @@ cfq_var_store(unsigned int *var, const char *page, size_t count)
}
#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
-static ssize_t __FUNC(elevator_t *e, char *page) \
+static ssize_t __FUNC(struct elevator_queue *e, char *page) \
{ \
struct cfq_data *cfqd = e->elevator_data; \
unsigned int __data = __VAR; \
@@ -2308,7 +2312,7 @@ SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
#undef SHOW_FUNCTION
#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
-static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
{ \
struct cfq_data *cfqd = e->elevator_data; \
unsigned int __data; \
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 3d3e7a46f38c..f87615dea46b 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -677,6 +677,29 @@ static int compat_blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode,
case DVD_WRITE_STRUCT:
case DVD_AUTH:
arg = (unsigned long)compat_ptr(arg);
+ /* These intepret arg as an unsigned long, not as a pointer,
+ * so we must not do compat_ptr() conversion. */
+ case HDIO_SET_MULTCOUNT:
+ case HDIO_SET_UNMASKINTR:
+ case HDIO_SET_KEEPSETTINGS:
+ case HDIO_SET_32BIT:
+ case HDIO_SET_NOWERR:
+ case HDIO_SET_DMA:
+ case HDIO_SET_PIO_MODE:
+ case HDIO_SET_NICE:
+ case HDIO_SET_WCACHE:
+ case HDIO_SET_ACOUSTIC:
+ case HDIO_SET_BUSSTATE:
+ case HDIO_SET_ADDRESS:
+ case CDROMEJECT_SW:
+ case CDROM_SET_OPTIONS:
+ case CDROM_CLEAR_OPTIONS:
+ case CDROM_SELECT_SPEED:
+ case CDROM_SELECT_DISC:
+ case CDROM_MEDIA_CHANGED:
+ case CDROM_DRIVE_STATUS:
+ case CDROM_LOCKDOOR:
+ case CDROM_DEBUG:
break;
default:
/* unknown ioctl number */
@@ -699,8 +722,14 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
struct backing_dev_info *bdi;
loff_t size;
+ /*
+ * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
+ * to updated it before every ioctl.
+ */
if (file->f_flags & O_NDELAY)
- mode |= FMODE_NDELAY_NOW;
+ mode |= FMODE_NDELAY;
+ else
+ mode &= ~FMODE_NDELAY;
switch (cmd) {
case HDIO_GETGEO:
@@ -745,9 +774,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
bdi = blk_get_backing_dev_info(bdev);
if (bdi == NULL)
return -ENOTTY;
- lock_kernel();
bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
- unlock_kernel();
return 0;
case BLKGETSIZE:
size = bdev->bd_inode->i_size;
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index fd311179f44c..c4d991d4adef 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -334,7 +334,7 @@ static int deadline_queue_empty(struct request_queue *q)
&& list_empty(&dd->fifo_list[READ]);
}
-static void deadline_exit_queue(elevator_t *e)
+static void deadline_exit_queue(struct elevator_queue *e)
{
struct deadline_data *dd = e->elevator_data;
@@ -387,7 +387,7 @@ deadline_var_store(int *var, const char *page, size_t count)
}
#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
-static ssize_t __FUNC(elevator_t *e, char *page) \
+static ssize_t __FUNC(struct elevator_queue *e, char *page) \
{ \
struct deadline_data *dd = e->elevator_data; \
int __data = __VAR; \
@@ -403,7 +403,7 @@ SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
#undef SHOW_FUNCTION
#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
-static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
{ \
struct deadline_data *dd = e->elevator_data; \
int __data; \
diff --git a/block/elevator.c b/block/elevator.c
index 9ac82dde99dd..98259eda0ef6 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -33,6 +33,7 @@
#include <linux/compiler.h>
#include <linux/delay.h>
#include <linux/blktrace_api.h>
+#include <trace/block.h>
#include <linux/hash.h>
#include <linux/uaccess.h>
@@ -41,6 +42,8 @@
static DEFINE_SPINLOCK(elv_list_lock);
static LIST_HEAD(elv_list);
+DEFINE_TRACE(block_rq_abort);
+
/*
* Merge hash stuff.
*/
@@ -52,6 +55,9 @@ static const int elv_hash_shift = 6;
#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash))
+DEFINE_TRACE(block_rq_insert);
+DEFINE_TRACE(block_rq_issue);
+
/*
* Query io scheduler to see if the current process issuing bio may be
* merged with rq.
@@ -59,7 +65,7 @@ static const int elv_hash_shift = 6;
static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
{
struct request_queue *q = rq->q;
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
if (e->ops->elevator_allow_merge_fn)
return e->ops->elevator_allow_merge_fn(q, rq, bio);
@@ -202,13 +208,13 @@ __setup("elevator=", elevator_setup);
static struct kobj_type elv_ktype;
-static elevator_t *elevator_alloc(struct request_queue *q,
+static struct elevator_queue *elevator_alloc(struct request_queue *q,
struct elevator_type *e)
{
- elevator_t *eq;
+ struct elevator_queue *eq;
int i;
- eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL | __GFP_ZERO, q->node);
+ eq = kmalloc_node(sizeof(*eq), GFP_KERNEL | __GFP_ZERO, q->node);
if (unlikely(!eq))
goto err;
@@ -234,8 +240,9 @@ err:
static void elevator_release(struct kobject *kobj)
{
- elevator_t *e = container_of(kobj, elevator_t, kobj);
+ struct elevator_queue *e;
+ e = container_of(kobj, struct elevator_queue, kobj);
elevator_put(e->elevator_type);
kfree(e->hash);
kfree(e);
@@ -291,7 +298,7 @@ int elevator_init(struct request_queue *q, char *name)
}
EXPORT_SYMBOL(elevator_init);
-void elevator_exit(elevator_t *e)
+void elevator_exit(struct elevator_queue *e)
{
mutex_lock(&e->sysfs_lock);
if (e->ops->elevator_exit_fn)
@@ -305,7 +312,7 @@ EXPORT_SYMBOL(elevator_exit);
static void elv_activate_rq(struct request_queue *q, struct request *rq)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
if (e->ops->elevator_activate_req_fn)
e->ops->elevator_activate_req_fn(q, rq);
@@ -313,7 +320,7 @@ static void elv_activate_rq(struct request_queue *q, struct request *rq)
static void elv_deactivate_rq(struct request_queue *q, struct request *rq)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
if (e->ops->elevator_deactivate_req_fn)
e->ops->elevator_deactivate_req_fn(q, rq);
@@ -332,7 +339,7 @@ static void elv_rqhash_del(struct request_queue *q, struct request *rq)
static void elv_rqhash_add(struct request_queue *q, struct request *rq)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
BUG_ON(ELV_ON_HASH(rq));
hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
@@ -346,7 +353,7 @@ static void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
struct hlist_node *entry, *next;
struct request *rq;
@@ -488,7 +495,7 @@ EXPORT_SYMBOL(elv_dispatch_add_tail);
int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
struct request *__rq;
int ret;
@@ -523,7 +530,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
void elv_merged_request(struct request_queue *q, struct request *rq, int type)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
if (e->ops->elevator_merged_fn)
e->ops->elevator_merged_fn(q, rq, type);
@@ -537,7 +544,7 @@ void elv_merged_request(struct request_queue *q, struct request *rq, int type)
void elv_merge_requests(struct request_queue *q, struct request *rq,
struct request *next)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
if (e->ops->elevator_merge_req_fn)
e->ops->elevator_merge_req_fn(q, rq, next);
@@ -586,7 +593,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
unsigned ordseq;
int unplug_it = 1;
- blk_add_trace_rq(q, rq, BLK_TA_INSERT);
+ trace_block_rq_insert(q, rq);
rq->q = q;
@@ -749,14 +756,6 @@ struct request *elv_next_request(struct request_queue *q)
int ret;
while ((rq = __elv_next_request(q)) != NULL) {
- /*
- * Kill the empty barrier place holder, the driver must
- * not ever see it.
- */
- if (blk_empty_barrier(rq)) {
- __blk_end_request(rq, 0, blk_rq_bytes(rq));
- continue;
- }
if (!(rq->cmd_flags & REQ_STARTED)) {
/*
* This is the first time the device driver
@@ -772,7 +771,7 @@ struct request *elv_next_request(struct request_queue *q)
* not be passed by new incoming requests
*/
rq->cmd_flags |= REQ_STARTED;
- blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
+ trace_block_rq_issue(q, rq);
}
if (!q->boundary_rq || q->boundary_rq == rq) {
@@ -844,18 +843,11 @@ void elv_dequeue_request(struct request_queue *q, struct request *rq)
*/
if (blk_account_rq(rq))
q->in_flight++;
-
- /*
- * We are now handing the request to the hardware, add the
- * timeout handler.
- */
- blk_add_timer(rq);
}
-EXPORT_SYMBOL(elv_dequeue_request);
int elv_queue_empty(struct request_queue *q)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
if (!list_empty(&q->queue_head))
return 0;
@@ -869,7 +861,7 @@ EXPORT_SYMBOL(elv_queue_empty);
struct request *elv_latter_request(struct request_queue *q, struct request *rq)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
if (e->ops->elevator_latter_req_fn)
return e->ops->elevator_latter_req_fn(q, rq);
@@ -878,7 +870,7 @@ struct request *elv_latter_request(struct request_queue *q, struct request *rq)
struct request *elv_former_request(struct request_queue *q, struct request *rq)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
if (e->ops->elevator_former_req_fn)
return e->ops->elevator_former_req_fn(q, rq);
@@ -887,7 +879,7 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq)
int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
if (e->ops->elevator_set_req_fn)
return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
@@ -898,7 +890,7 @@ int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
void elv_put_request(struct request_queue *q, struct request *rq)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
if (e->ops->elevator_put_req_fn)
e->ops->elevator_put_req_fn(rq);
@@ -906,7 +898,7 @@ void elv_put_request(struct request_queue *q, struct request *rq)
int elv_may_queue(struct request_queue *q, int rw)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
if (e->ops->elevator_may_queue_fn)
return e->ops->elevator_may_queue_fn(q, rw);
@@ -921,7 +913,7 @@ void elv_abort_queue(struct request_queue *q)
while (!list_empty(&q->queue_head)) {
rq = list_entry_rq(q->queue_head.next);
rq->cmd_flags |= REQ_QUIET;
- blk_add_trace_rq(q, rq, BLK_TA_ABORT);
+ trace_block_rq_abort(q, rq);
__blk_end_request(rq, -EIO, blk_rq_bytes(rq));
}
}
@@ -929,7 +921,7 @@ EXPORT_SYMBOL(elv_abort_queue);
void elv_completed_request(struct request_queue *q, struct request *rq)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
/*
* request is released from the driver, io must be done
@@ -945,10 +937,14 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
* drained for flush sequence.
*/
if (unlikely(q->ordseq)) {
- struct request *first_rq = list_entry_rq(q->queue_head.next);
- if (q->in_flight == 0 &&
+ struct request *next = NULL;
+
+ if (!list_empty(&q->queue_head))
+ next = list_entry_rq(q->queue_head.next);
+
+ if (!q->in_flight &&
blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
- blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
+ (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) {
blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
blk_start_queueing(q);
}
@@ -960,13 +956,14 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
static ssize_t
elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
{
- elevator_t *e = container_of(kobj, elevator_t, kobj);
struct elv_fs_entry *entry = to_elv(attr);
+ struct elevator_queue *e;
ssize_t error;
if (!entry->show)
return -EIO;
+ e = container_of(kobj, struct elevator_queue, kobj);
mutex_lock(&e->sysfs_lock);
error = e->ops ? entry->show(e, page) : -ENOENT;
mutex_unlock(&e->sysfs_lock);
@@ -977,13 +974,14 @@ static ssize_t
elv_attr_store(struct kobject *kobj, struct attribute *attr,
const char *page, size_t length)
{
- elevator_t *e = container_of(kobj, elevator_t, kobj);
struct elv_fs_entry *entry = to_elv(attr);
+ struct elevator_queue *e;
ssize_t error;
if (!entry->store)
return -EIO;
+ e = container_of(kobj, struct elevator_queue, kobj);
mutex_lock(&e->sysfs_lock);
error = e->ops ? entry->store(e, page, length) : -ENOENT;
mutex_unlock(&e->sysfs_lock);
@@ -1002,7 +1000,7 @@ static struct kobj_type elv_ktype = {
int elv_register_queue(struct request_queue *q)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
int error;
error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
@@ -1020,7 +1018,7 @@ int elv_register_queue(struct request_queue *q)
return error;
}
-static void __elv_unregister_queue(elevator_t *e)
+static void __elv_unregister_queue(struct elevator_queue *e)
{
kobject_uevent(&e->kobj, KOBJ_REMOVE);
kobject_del(&e->kobj);
@@ -1083,7 +1081,7 @@ EXPORT_SYMBOL_GPL(elv_unregister);
*/
static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
{
- elevator_t *old_elevator, *e;
+ struct elevator_queue *old_elevator, *e;
void *data;
/*
@@ -1189,7 +1187,7 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
ssize_t elv_iosched_show(struct request_queue *q, char *name)
{
- elevator_t *e = q->elevator;
+ struct elevator_queue *e = q->elevator;
struct elevator_type *elv = e->elevator_type;
struct elevator_type *__e;
int len = 0;
diff --git a/block/genhd.c b/block/genhd.c
index 27549e470da5..d84a7df1e2a0 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -181,6 +181,12 @@ void disk_part_iter_exit(struct disk_part_iter *piter)
}
EXPORT_SYMBOL_GPL(disk_part_iter_exit);
+static inline int sector_in_part(struct hd_struct *part, sector_t sector)
+{
+ return part->start_sect <= sector &&
+ sector < part->start_sect + part->nr_sects;
+}
+
/**
* disk_map_sector_rcu - map sector to partition
* @disk: gendisk of interest
@@ -199,16 +205,22 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit);
struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
{
struct disk_part_tbl *ptbl;
+ struct hd_struct *part;
int i;
ptbl = rcu_dereference(disk->part_tbl);
+ part = rcu_dereference(ptbl->last_lookup);
+ if (part && sector_in_part(part, sector))
+ return part;
+
for (i = 1; i < ptbl->len; i++) {
- struct hd_struct *part = rcu_dereference(ptbl->part[i]);
+ part = rcu_dereference(ptbl->part[i]);
- if (part && part->start_sect <= sector &&
- sector < part->start_sect + part->nr_sects)
+ if (part && sector_in_part(part, sector)) {
+ rcu_assign_pointer(ptbl->last_lookup, part);
return part;
+ }
}
return &disk->part0;
}
@@ -888,8 +900,11 @@ static void disk_replace_part_tbl(struct gendisk *disk,
struct disk_part_tbl *old_ptbl = disk->part_tbl;
rcu_assign_pointer(disk->part_tbl, new_ptbl);
- if (old_ptbl)
+
+ if (old_ptbl) {
+ rcu_assign_pointer(old_ptbl->last_lookup, NULL);
call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
+ }
}
/**
@@ -1102,6 +1117,7 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
kfree(disk);
return NULL;
}
+ disk->node_id = node_id;
if (disk_expand_part_tbl(disk, 0)) {
free_part_stats(&disk->part0);
kfree(disk);
@@ -1116,7 +1132,6 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
device_initialize(disk_to_dev(disk));
INIT_WORK(&disk->async_notify,
media_change_notify_thread);
- disk->node_id = node_id;
}
return disk;
}
diff --git a/block/ioctl.c b/block/ioctl.c
index d03985b04d67..0f22e629b13c 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -323,9 +323,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
bdi = blk_get_backing_dev_info(bdev);
if (bdi == NULL)
return -ENOTTY;
- lock_kernel();
bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
- unlock_kernel();
return 0;
case BLKBSZSET:
/* set the logical block size */
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
index c23e02969650..3a0d369d08c7 100644
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -76,7 +76,7 @@ static void *noop_init_queue(struct request_queue *q)
return nd;
}
-static void noop_exit_queue(elevator_t *e)
+static void noop_exit_queue(struct elevator_queue *e)
{
struct noop_data *nd = e->elevator_data;
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 5963cf91a3a0..ee9c67d7e1be 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -60,7 +60,7 @@ static int scsi_get_bus(struct request_queue *q, int __user *p)
static int sg_get_timeout(struct request_queue *q)
{
- return q->sg_timeout / (HZ / USER_HZ);
+ return jiffies_to_clock_t(q->sg_timeout);
}
static int sg_set_timeout(struct request_queue *q, int __user *p)
@@ -68,7 +68,7 @@ static int sg_set_timeout(struct request_queue *q, int __user *p)
int timeout, err = get_user(timeout, p);
if (!err)
- q->sg_timeout = timeout * (HZ / USER_HZ);
+ q->sg_timeout = clock_t_to_jiffies(timeout);
return err;
}
@@ -208,6 +208,8 @@ static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
rq->timeout = q->sg_timeout;
if (!rq->timeout)
rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
+ if (rq->timeout < BLK_MIN_SG_TIMEOUT)
+ rq->timeout = BLK_MIN_SG_TIMEOUT;
return 0;
}