summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--block/Kconfig6
-rw-r--r--block/Makefile3
-rw-r--r--block/blk-cgroup.c46
-rw-r--r--block/blk-core.c249
-rw-r--r--block/blk-exec.c8
-rw-r--r--block/blk-ioc.c426
-rw-r--r--block/blk-map.c2
-rw-r--r--block/blk-merge.c37
-rw-r--r--block/blk-settings.c32
-rw-r--r--block/blk-sysfs.c12
-rw-r--r--block/blk-tag.c13
-rw-r--r--block/blk-throttle.c4
-rw-r--r--block/blk.h60
-rw-r--r--block/bsg.c9
-rw-r--r--block/cfq-iosched.c608
-rw-r--r--block/compat_ioctl.c3
-rw-r--r--block/deadline-iosched.c4
-rw-r--r--block/elevator.c272
-rw-r--r--block/genhd.c7
-rw-r--r--block/ioctl.c30
-rw-r--r--block/noop-iosched.c4
-rw-r--r--block/partition-generic.c (renamed from fs/partitions/check.c)152
-rw-r--r--block/partitions/Kconfig (renamed from fs/partitions/Kconfig)0
-rw-r--r--block/partitions/Makefile (renamed from fs/partitions/Makefile)0
-rw-r--r--block/partitions/acorn.c (renamed from fs/partitions/acorn.c)0
-rw-r--r--block/partitions/acorn.h (renamed from fs/partitions/acorn.h)0
-rw-r--r--block/partitions/amiga.c (renamed from fs/partitions/amiga.c)0
-rw-r--r--block/partitions/amiga.h (renamed from fs/partitions/amiga.h)0
-rw-r--r--block/partitions/atari.c (renamed from fs/partitions/atari.c)0
-rw-r--r--block/partitions/atari.h (renamed from fs/partitions/atari.h)0
-rw-r--r--block/partitions/check.c166
-rw-r--r--block/partitions/check.h (renamed from fs/partitions/check.h)3
-rw-r--r--block/partitions/efi.c (renamed from fs/partitions/efi.c)0
-rw-r--r--block/partitions/efi.h (renamed from fs/partitions/efi.h)0
-rw-r--r--block/partitions/ibm.c (renamed from fs/partitions/ibm.c)0
-rw-r--r--block/partitions/ibm.h (renamed from fs/partitions/ibm.h)0
-rw-r--r--block/partitions/karma.c (renamed from fs/partitions/karma.c)0
-rw-r--r--block/partitions/karma.h (renamed from fs/partitions/karma.h)0
-rw-r--r--block/partitions/ldm.c (renamed from fs/partitions/ldm.c)11
-rw-r--r--block/partitions/ldm.h (renamed from fs/partitions/ldm.h)0
-rw-r--r--block/partitions/mac.c (renamed from fs/partitions/mac.c)0
-rw-r--r--block/partitions/mac.h (renamed from fs/partitions/mac.h)0
-rw-r--r--block/partitions/msdos.c (renamed from fs/partitions/msdos.c)0
-rw-r--r--block/partitions/msdos.h (renamed from fs/partitions/msdos.h)0
-rw-r--r--block/partitions/osf.c (renamed from fs/partitions/osf.c)0
-rw-r--r--block/partitions/osf.h (renamed from fs/partitions/osf.h)0
-rw-r--r--block/partitions/sgi.c (renamed from fs/partitions/sgi.c)0
-rw-r--r--block/partitions/sgi.h (renamed from fs/partitions/sgi.h)0
-rw-r--r--block/partitions/sun.c (renamed from fs/partitions/sun.c)0
-rw-r--r--block/partitions/sun.h (renamed from fs/partitions/sun.h)0
-rw-r--r--block/partitions/sysv68.c (renamed from fs/partitions/sysv68.c)0
-rw-r--r--block/partitions/sysv68.h (renamed from fs/partitions/sysv68.h)0
-rw-r--r--block/partitions/ultrix.c (renamed from fs/partitions/ultrix.c)0
-rw-r--r--block/partitions/ultrix.h (renamed from fs/partitions/ultrix.h)0
-rw-r--r--block/scsi_ioctl.c52
55 files changed, 1144 insertions, 1075 deletions
diff --git a/block/Kconfig b/block/Kconfig
index e97934eececa..09acf1b39905 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -99,6 +99,12 @@ config BLK_DEV_THROTTLING
See Documentation/cgroups/blkio-controller.txt for more information.
+menu "Partition Types"
+
+source "block/partitions/Kconfig"
+
+endmenu
+
endif # BLOCK
config BLOCK_COMPAT
diff --git a/block/Makefile b/block/Makefile
index 514c6e4f427a..39b76ba66ffd 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,8 @@
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
- blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o
+ blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o \
+ partition-generic.o partitions/
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 8f630cec906e..75642a352a8f 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -30,8 +30,10 @@ EXPORT_SYMBOL_GPL(blkio_root_cgroup);
static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
struct cgroup *);
-static int blkiocg_can_attach_task(struct cgroup *, struct task_struct *);
-static void blkiocg_attach_task(struct cgroup *, struct task_struct *);
+static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *,
+ struct cgroup_taskset *);
+static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *,
+ struct cgroup_taskset *);
static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *);
static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
@@ -44,8 +46,8 @@ static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
struct cgroup_subsys blkio_subsys = {
.name = "blkio",
.create = blkiocg_create,
- .can_attach_task = blkiocg_can_attach_task,
- .attach_task = blkiocg_attach_task,
+ .can_attach = blkiocg_can_attach,
+ .attach = blkiocg_attach,
.destroy = blkiocg_destroy,
.populate = blkiocg_populate,
#ifdef CONFIG_BLK_CGROUP
@@ -1626,30 +1628,40 @@ done:
* of the main cic data structures. For now we allow a task to change
* its cgroup only if it's the only owner of its ioc.
*/
-static int blkiocg_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
+static int blkiocg_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
+ struct cgroup_taskset *tset)
{
+ struct task_struct *task;
struct io_context *ioc;
int ret = 0;
/* task_lock() is needed to avoid races with exit_io_context() */
- task_lock(tsk);
- ioc = tsk->io_context;
- if (ioc && atomic_read(&ioc->nr_tasks) > 1)
- ret = -EINVAL;
- task_unlock(tsk);
-
+ cgroup_taskset_for_each(task, cgrp, tset) {
+ task_lock(task);
+ ioc = task->io_context;
+ if (ioc && atomic_read(&ioc->nr_tasks) > 1)
+ ret = -EINVAL;
+ task_unlock(task);
+ if (ret)
+ break;
+ }
return ret;
}
-static void blkiocg_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
+static void blkiocg_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
+ struct cgroup_taskset *tset)
{
+ struct task_struct *task;
struct io_context *ioc;
- task_lock(tsk);
- ioc = tsk->io_context;
- if (ioc)
- ioc->cgroup_changed = 1;
- task_unlock(tsk);
+ cgroup_taskset_for_each(task, cgrp, tset) {
+ /* we don't lose anything even if ioc allocation fails */
+ ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
+ if (ioc) {
+ ioc_cgroup_changed(ioc);
+ put_io_context(ioc);
+ }
+ }
}
void blkio_policy_register(struct blkio_policy_type *blkiop)
diff --git a/block/blk-core.c b/block/blk-core.c
index ea70e6c80cd3..3a78b00edd71 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -39,6 +39,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
+DEFINE_IDA(blk_queue_ida);
+
/*
* For the allocated request tables
*/
@@ -358,7 +360,8 @@ EXPORT_SYMBOL(blk_put_queue);
void blk_drain_queue(struct request_queue *q, bool drain_all)
{
while (true) {
- int nr_rqs;
+ bool drain = false;
+ int i;
spin_lock_irq(q->queue_lock);
@@ -366,16 +369,34 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
if (drain_all)
blk_throtl_drain(q);
- __blk_run_queue(q);
+ /*
+ * This function might be called on a queue which failed
+ * driver init after queue creation. Some drivers
+ * (e.g. fd) get unhappy in such cases. Kick queue iff
+ * dispatch queue has something on it.
+ */
+ if (!list_empty(&q->queue_head))
+ __blk_run_queue(q);
- if (drain_all)
- nr_rqs = q->rq.count[0] + q->rq.count[1];
- else
- nr_rqs = q->rq.elvpriv;
+ drain |= q->rq.elvpriv;
+
+ /*
+ * Unfortunately, requests are queued at and tracked from
+ * multiple places and there's no single counter which can
+ * be drained. Check all the queues and counters.
+ */
+ if (drain_all) {
+ drain |= !list_empty(&q->queue_head);
+ for (i = 0; i < 2; i++) {
+ drain |= q->rq.count[i];
+ drain |= q->in_flight[i];
+ drain |= !list_empty(&q->flush_queue[i]);
+ }
+ }
spin_unlock_irq(q->queue_lock);
- if (!nr_rqs)
+ if (!drain)
break;
msleep(10);
}
@@ -462,27 +483,29 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
if (!q)
return NULL;
+ q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL);
+ if (q->id < 0)
+ goto fail_q;
+
q->backing_dev_info.ra_pages =
(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
q->backing_dev_info.state = 0;
q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
q->backing_dev_info.name = "block";
+ q->node = node_id;
err = bdi_init(&q->backing_dev_info);
- if (err) {
- kmem_cache_free(blk_requestq_cachep, q);
- return NULL;
- }
+ if (err)
+ goto fail_id;
- if (blk_throtl_init(q)) {
- kmem_cache_free(blk_requestq_cachep, q);
- return NULL;
- }
+ if (blk_throtl_init(q))
+ goto fail_id;
setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
laptop_mode_timer_fn, (unsigned long) q);
setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
INIT_LIST_HEAD(&q->timeout_list);
+ INIT_LIST_HEAD(&q->icq_list);
INIT_LIST_HEAD(&q->flush_queue[0]);
INIT_LIST_HEAD(&q->flush_queue[1]);
INIT_LIST_HEAD(&q->flush_data_in_flight);
@@ -500,6 +523,12 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
q->queue_lock = &q->__queue_lock;
return q;
+
+fail_id:
+ ida_simple_remove(&blk_queue_ida, q->id);
+fail_q:
+ kmem_cache_free(blk_requestq_cachep, q);
+ return NULL;
}
EXPORT_SYMBOL(blk_alloc_queue_node);
@@ -551,7 +580,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
if (!uninit_q)
return NULL;
- q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id);
+ q = blk_init_allocated_queue(uninit_q, rfn, lock);
if (!q)
blk_cleanup_queue(uninit_q);
@@ -563,18 +592,9 @@ struct request_queue *
blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
spinlock_t *lock)
{
- return blk_init_allocated_queue_node(q, rfn, lock, -1);
-}
-EXPORT_SYMBOL(blk_init_allocated_queue);
-
-struct request_queue *
-blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
- spinlock_t *lock, int node_id)
-{
if (!q)
return NULL;
- q->node = node_id;
if (blk_init_free_list(q))
return NULL;
@@ -604,28 +624,33 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
return NULL;
}
-EXPORT_SYMBOL(blk_init_allocated_queue_node);
+EXPORT_SYMBOL(blk_init_allocated_queue);
-int blk_get_queue(struct request_queue *q)
+bool blk_get_queue(struct request_queue *q)
{
- if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
- kobject_get(&q->kobj);
- return 0;
+ if (likely(!blk_queue_dead(q))) {
+ __blk_get_queue(q);
+ return true;
}
- return 1;
+ return false;
}
EXPORT_SYMBOL(blk_get_queue);
static inline void blk_free_request(struct request_queue *q, struct request *rq)
{
- if (rq->cmd_flags & REQ_ELVPRIV)
+ if (rq->cmd_flags & REQ_ELVPRIV) {
elv_put_request(q, rq);
+ if (rq->elv.icq)
+ put_io_context(rq->elv.icq->ioc);
+ }
+
mempool_free(rq, q->rq.rq_pool);
}
static struct request *
-blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
+blk_alloc_request(struct request_queue *q, struct io_cq *icq,
+ unsigned int flags, gfp_t gfp_mask)
{
struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
@@ -636,10 +661,15 @@ blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
rq->cmd_flags = flags | REQ_ALLOCED;
- if ((flags & REQ_ELVPRIV) &&
- unlikely(elv_set_request(q, rq, gfp_mask))) {
- mempool_free(rq, q->rq.rq_pool);
- return NULL;
+ if (flags & REQ_ELVPRIV) {
+ rq->elv.icq = icq;
+ if (unlikely(elv_set_request(q, rq, gfp_mask))) {
+ mempool_free(rq, q->rq.rq_pool);
+ return NULL;
+ }
+ /* @rq->elv.icq holds on to io_context until @rq is freed */
+ if (icq)
+ get_io_context(icq->ioc);
}
return rq;
@@ -751,11 +781,17 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
{
struct request *rq = NULL;
struct request_list *rl = &q->rq;
- struct io_context *ioc = NULL;
+ struct elevator_type *et;
+ struct io_context *ioc;
+ struct io_cq *icq = NULL;
const bool is_sync = rw_is_sync(rw_flags) != 0;
+ bool retried = false;
int may_queue;
+retry:
+ et = q->elevator->type;
+ ioc = current->io_context;
- if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+ if (unlikely(blk_queue_dead(q)))
return NULL;
may_queue = elv_may_queue(q, rw_flags);
@@ -764,7 +800,20 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
if (rl->count[is_sync]+1 >= q->nr_requests) {
- ioc = current_io_context(GFP_ATOMIC, q->node);
+ /*
+ * We want ioc to record batching state. If it's
+ * not already there, creating a new one requires
+ * dropping queue_lock, which in turn requires
+ * retesting conditions to avoid queue hang.
+ */
+ if (!ioc && !retried) {
+ spin_unlock_irq(q->queue_lock);
+ create_io_context(current, gfp_mask, q->node);
+ spin_lock_irq(q->queue_lock);
+ retried = true;
+ goto retry;
+ }
+
/*
* The queue will fill after this allocation, so set
* it as full, and mark this process as "batching".
@@ -800,17 +849,38 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
rl->count[is_sync]++;
rl->starved[is_sync] = 0;
+ /*
+ * Decide whether the new request will be managed by elevator. If
+ * so, mark @rw_flags and increment elvpriv. Non-zero elvpriv will
+ * prevent the current elevator from being destroyed until the new
+ * request is freed. This guarantees icq's won't be destroyed and
+ * makes creating new ones safe.
+ *
+ * Also, lookup icq while holding queue_lock. If it doesn't exist,
+ * it will be created after releasing queue_lock.
+ */
if (blk_rq_should_init_elevator(bio) &&
!test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) {
rw_flags |= REQ_ELVPRIV;
rl->elvpriv++;
+ if (et->icq_cache && ioc)
+ icq = ioc_lookup_icq(ioc, q);
}
if (blk_queue_io_stat(q))
rw_flags |= REQ_IO_STAT;
spin_unlock_irq(q->queue_lock);
- rq = blk_alloc_request(q, rw_flags, gfp_mask);
+ /* create icq if missing */
+ if ((rw_flags & REQ_ELVPRIV) && unlikely(et->icq_cache && !icq)) {
+ icq = ioc_create_icq(q, gfp_mask);
+ if (!icq)
+ goto fail_icq;
+ }
+
+ rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
+
+fail_icq:
if (unlikely(!rq)) {
/*
* Allocation failed presumably due to memory. Undo anything
@@ -872,10 +942,9 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
rq = get_request(q, rw_flags, bio, GFP_NOIO);
while (!rq) {
DEFINE_WAIT(wait);
- struct io_context *ioc;
struct request_list *rl = &q->rq;
- if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+ if (unlikely(blk_queue_dead(q)))
return NULL;
prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
@@ -892,8 +961,8 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
* up to a big batch of them for a small period time.
* See ioc_batching, ioc_set_batching
*/
- ioc = current_io_context(GFP_NOIO, q->node);
- ioc_set_batching(q, ioc);
+ create_io_context(current, GFP_NOIO, q->node);
+ ioc_set_batching(q, current->io_context);
spin_lock_irq(q->queue_lock);
finish_wait(&rl->wait[is_sync], &wait);
@@ -1010,54 +1079,6 @@ static void add_acct_request(struct request_queue *q, struct request *rq,
__elv_add_request(q, rq, where);
}
-/**
- * blk_insert_request - insert a special request into a request queue
- * @q: request queue where request should be inserted
- * @rq: request to be inserted
- * @at_head: insert request at head or tail of queue
- * @data: private data
- *
- * Description:
- * Many block devices need to execute commands asynchronously, so they don't
- * block the whole kernel from preemption during request execution. This is
- * accomplished normally by inserting aritficial requests tagged as
- * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
- * be scheduled for actual execution by the request queue.
- *
- * We have the option of inserting the head or the tail of the queue.
- * Typically we use the tail for new ioctls and so forth. We use the head
- * of the queue for things like a QUEUE_FULL message from a device, or a
- * host that is unable to accept a particular command.
- */
-void blk_insert_request(struct request_queue *q, struct request *rq,
- int at_head, void *data)
-{
- int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
- unsigned long flags;
-
- /*
- * tell I/O scheduler that this isn't a regular read/write (ie it
- * must not attempt merges on this) and that it acts as a soft
- * barrier
- */
- rq->cmd_type = REQ_TYPE_SPECIAL;
-
- rq->special = data;
-
- spin_lock_irqsave(q->queue_lock, flags);
-
- /*
- * If command is tagged, release the tag
- */
- if (blk_rq_tagged(rq))
- blk_queue_end_tag(q, rq);
-
- add_acct_request(q, rq, where);
- __blk_run_queue(q);
- spin_unlock_irqrestore(q->queue_lock, flags);
-}
-EXPORT_SYMBOL(blk_insert_request);
-
static void part_round_stats_single(int cpu, struct hd_struct *part,
unsigned long now)
{
@@ -1191,7 +1212,6 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
drive_stat_acct(req, 0);
- elv_bio_merged(q, req, bio);
return true;
}
@@ -1222,7 +1242,6 @@ static bool bio_attempt_front_merge(struct request_queue *q,
req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
drive_stat_acct(req, 0);
- elv_bio_merged(q, req, bio);
return true;
}
@@ -1236,13 +1255,12 @@ static bool bio_attempt_front_merge(struct request_queue *q,
* on %current's plugged list. Returns %true if merge was successful,
* otherwise %false.
*
- * This function is called without @q->queue_lock; however, elevator is
- * accessed iff there already are requests on the plugged list which in
- * turn guarantees validity of the elevator.
- *
- * Note that, on successful merge, elevator operation
- * elevator_bio_merged_fn() will be called without queue lock. Elevator
- * must be ready for this.
+ * Plugging coalesces IOs from the same issuer for the same purpose without
+ * going through @q->queue_lock. As such it's more of an issuing mechanism
+ * than scheduling, and the request, while may have elvpriv data, is not
+ * added on the elevator at this point. In addition, we don't have
+ * reliable access to the elevator outside queue lock. Only check basic
+ * merging parameters without querying the elevator.
*/
static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
unsigned int *request_count)
@@ -1261,10 +1279,10 @@ static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
(*request_count)++;
- if (rq->q != q)
+ if (rq->q != q || !blk_rq_merge_ok(rq, bio))
continue;
- el_ret = elv_try_merge(rq, bio);
+ el_ret = blk_try_merge(rq, bio);
if (el_ret == ELEVATOR_BACK_MERGE) {
ret = bio_attempt_back_merge(q, rq, bio);
if (ret)
@@ -1326,12 +1344,14 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
el_ret = elv_merge(q, &req, bio);
if (el_ret == ELEVATOR_BACK_MERGE) {
if (bio_attempt_back_merge(q, req, bio)) {
+ elv_bio_merged(q, req, bio);
if (!attempt_back_merge(q, req))
elv_merged_request(q, req, el_ret);
goto out_unlock;
}
} else if (el_ret == ELEVATOR_FRONT_MERGE) {
if (bio_attempt_front_merge(q, req, bio)) {
+ elv_bio_merged(q, req, bio);
if (!attempt_front_merge(q, req))
elv_merged_request(q, req, el_ret);
goto out_unlock;
@@ -1767,6 +1787,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
return -EIO;
spin_lock_irqsave(q->queue_lock, flags);
+ if (unlikely(blk_queue_dead(q))) {
+ spin_unlock_irqrestore(q->queue_lock, flags);
+ return -ENODEV;
+ }
/*
* Submitting request must be dequeued before calling this function
@@ -2741,6 +2765,14 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
trace_block_unplug(q, depth, !from_schedule);
/*
+ * Don't mess with dead queue.
+ */
+ if (unlikely(blk_queue_dead(q))) {
+ spin_unlock(q->queue_lock);
+ return;
+ }
+
+ /*
* If we are punting this to kblockd, then we can safely drop
* the queue_lock before waking kblockd (which needs to take
* this lock).
@@ -2816,6 +2848,15 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
depth = 0;
spin_lock(q->queue_lock);
}
+
+ /*
+ * Short-circuit if @q is dead
+ */
+ if (unlikely(blk_queue_dead(q))) {
+ __blk_end_request_all(rq, -ENODEV);
+ continue;
+ }
+
/*
* rq is already accounted, so use raw insert
*/
diff --git a/block/blk-exec.c b/block/blk-exec.c
index a1ebceb332f9..fb2cbd551621 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -50,7 +50,11 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
{
int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
- if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
+ WARN_ON(irqs_disabled());
+ spin_lock_irq(q->queue_lock);
+
+ if (unlikely(blk_queue_dead(q))) {
+ spin_unlock_irq(q->queue_lock);
rq->errors = -ENXIO;
if (rq->end_io)
rq->end_io(rq, rq->errors);
@@ -59,8 +63,6 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
rq->rq_disk = bd_disk;
rq->end_io = done;
- WARN_ON(irqs_disabled());
- spin_lock_irq(q->queue_lock);
__elv_add_request(q, rq, where);
__blk_run_queue(q);
/* the queue is stopped so it won't be run */
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 6f9bbd978653..8b782a63c297 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -16,53 +16,153 @@
*/
static struct kmem_cache *iocontext_cachep;
-static void cfq_dtor(struct io_context *ioc)
+/**
+ * get_io_context - increment reference count to io_context
+ * @ioc: io_context to get
+ *
+ * Increment reference count to @ioc.
+ */
+void get_io_context(struct io_context *ioc)
{
- if (!hlist_empty(&ioc->cic_list)) {
- struct cfq_io_context *cic;
+ BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
+ atomic_long_inc(&ioc->refcount);
+}
+EXPORT_SYMBOL(get_io_context);
- cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context,
- cic_list);
- cic->dtor(ioc);
- }
+static void icq_free_icq_rcu(struct rcu_head *head)
+{
+ struct io_cq *icq = container_of(head, struct io_cq, __rcu_head);
+
+ kmem_cache_free(icq->__rcu_icq_cache, icq);
}
/*
- * IO Context helper functions. put_io_context() returns 1 if there are no
- * more users of this io context, 0 otherwise.
+ * Exit and free an icq. Called with both ioc and q locked.
*/
-int put_io_context(struct io_context *ioc)
+static void ioc_exit_icq(struct io_cq *icq)
{
- if (ioc == NULL)
- return 1;
+ struct io_context *ioc = icq->ioc;
+ struct request_queue *q = icq->q;
+ struct elevator_type *et = q->elevator->type;
- BUG_ON(atomic_long_read(&ioc->refcount) == 0);
+ lockdep_assert_held(&ioc->lock);
+ lockdep_assert_held(q->queue_lock);
- if (atomic_long_dec_and_test(&ioc->refcount)) {
- rcu_read_lock();
- cfq_dtor(ioc);
- rcu_read_unlock();
+ radix_tree_delete(&ioc->icq_tree, icq->q->id);
+ hlist_del_init(&icq->ioc_node);
+ list_del_init(&icq->q_node);
- kmem_cache_free(iocontext_cachep, ioc);
- return 1;
+ /*
+ * Both setting lookup hint to and clearing it from @icq are done
+ * under queue_lock. If it's not pointing to @icq now, it never
+ * will. Hint assignment itself can race safely.
+ */
+ if (rcu_dereference_raw(ioc->icq_hint) == icq)
+ rcu_assign_pointer(ioc->icq_hint, NULL);
+
+ if (et->ops.elevator_exit_icq_fn)
+ et->ops.elevator_exit_icq_fn(icq);
+
+ /*
+ * @icq->q might have gone away by the time RCU callback runs
+ * making it impossible to determine icq_cache. Record it in @icq.
+ */
+ icq->__rcu_icq_cache = et->icq_cache;
+ call_rcu(&icq->__rcu_head, icq_free_icq_rcu);
+}
+
+/*
+ * Slow path for ioc release in put_io_context(). Performs double-lock
+ * dancing to unlink all icq's and then frees ioc.
+ */
+static void ioc_release_fn(struct work_struct *work)
+{
+ struct io_context *ioc = container_of(work, struct io_context,
+ release_work);
+ struct request_queue *last_q = NULL;
+ unsigned long flags;
+
+ /*
+ * Exiting icq may call into put_io_context() through elevator
+ * which will trigger lockdep warning. The ioc's are guaranteed to
+ * be different, use a different locking subclass here. Use
+ * irqsave variant as there's no spin_lock_irq_nested().
+ */
+ spin_lock_irqsave_nested(&ioc->lock, flags, 1);
+
+ while (!hlist_empty(&ioc->icq_list)) {
+ struct io_cq *icq = hlist_entry(ioc->icq_list.first,
+ struct io_cq, ioc_node);
+ struct request_queue *this_q = icq->q;
+
+ if (this_q != last_q) {
+ /*
+ * Need to switch to @this_q. Once we release
+ * @ioc->lock, it can go away along with @cic.
+ * Hold on to it.
+ */
+ __blk_get_queue(this_q);
+
+ /*
+ * blk_put_queue() might sleep thanks to kobject
+ * idiocy. Always release both locks, put and
+ * restart.
+ */
+ if (last_q) {
+ spin_unlock(last_q->queue_lock);
+ spin_unlock_irqrestore(&ioc->lock, flags);
+ blk_put_queue(last_q);
+ } else {
+ spin_unlock_irqrestore(&ioc->lock, flags);
+ }
+
+ last_q = this_q;
+ spin_lock_irqsave(this_q->queue_lock, flags);
+ spin_lock_nested(&ioc->lock, 1);
+ continue;
+ }
+ ioc_exit_icq(icq);
}
- return 0;
+
+ if (last_q) {
+ spin_unlock(last_q->queue_lock);
+ spin_unlock_irqrestore(&ioc->lock, flags);
+ blk_put_queue(last_q);
+ } else {
+ spin_unlock_irqrestore(&ioc->lock, flags);
+ }
+
+ kmem_cache_free(iocontext_cachep, ioc);
}
-EXPORT_SYMBOL(put_io_context);
-static void cfq_exit(struct io_context *ioc)
+/**
+ * put_io_context - put a reference of io_context
+ * @ioc: io_context to put
+ *
+ * Decrement reference count of @ioc and release it if the count reaches
+ * zero.
+ */
+void put_io_context(struct io_context *ioc)
{
- rcu_read_lock();
+ unsigned long flags;
- if (!hlist_empty(&ioc->cic_list)) {
- struct cfq_io_context *cic;
+ if (ioc == NULL)
+ return;
+
+ BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
- cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context,
- cic_list);
- cic->exit(ioc);
+ /*
+ * Releasing ioc requires reverse order double locking and we may
+ * already be holding a queue_lock. Do it asynchronously from wq.
+ */
+ if (atomic_long_dec_and_test(&ioc->refcount)) {
+ spin_lock_irqsave(&ioc->lock, flags);
+ if (!hlist_empty(&ioc->icq_list))
+ schedule_work(&ioc->release_work);
+ spin_unlock_irqrestore(&ioc->lock, flags);
}
- rcu_read_unlock();
}
+EXPORT_SYMBOL(put_io_context);
/* Called by the exiting task */
void exit_io_context(struct task_struct *task)
@@ -74,86 +174,240 @@ void exit_io_context(struct task_struct *task)
task->io_context = NULL;
task_unlock(task);
- if (atomic_dec_and_test(&ioc->nr_tasks))
- cfq_exit(ioc);
-
+ atomic_dec(&ioc->nr_tasks);
put_io_context(ioc);
}
-struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
+/**
+ * ioc_clear_queue - break any ioc association with the specified queue
+ * @q: request_queue being cleared
+ *
+ * Walk @q->icq_list and exit all io_cq's. Must be called with @q locked.
+ */
+void ioc_clear_queue(struct request_queue *q)
{
- struct io_context *ioc;
+ lockdep_assert_held(q->queue_lock);
+
+ while (!list_empty(&q->icq_list)) {
+ struct io_cq *icq = list_entry(q->icq_list.next,
+ struct io_cq, q_node);
+ struct io_context *ioc = icq->ioc;
- ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
- if (ioc) {
- atomic_long_set(&ioc->refcount, 1);
- atomic_set(&ioc->nr_tasks, 1);
- spin_lock_init(&ioc->lock);
- ioc->ioprio_changed = 0;
- ioc->ioprio = 0;
- ioc->last_waited = 0; /* doesn't matter... */
- ioc->nr_batch_requests = 0; /* because this is 0 */
- INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
- INIT_HLIST_HEAD(&ioc->cic_list);
- ioc->ioc_data = NULL;
-#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
- ioc->cgroup_changed = 0;
-#endif
+ spin_lock(&ioc->lock);
+ ioc_exit_icq(icq);
+ spin_unlock(&ioc->lock);
}
+}
+
+void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags,
+ int node)
+{
+ struct io_context *ioc;
- return ioc;
+ ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO,
+ node);
+ if (unlikely(!ioc))
+ return;
+
+ /* initialize */
+ atomic_long_set(&ioc->refcount, 1);
+ atomic_set(&ioc->nr_tasks, 1);
+ spin_lock_init(&ioc->lock);
+ INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH);
+ INIT_HLIST_HEAD(&ioc->icq_list);
+ INIT_WORK(&ioc->release_work, ioc_release_fn);
+
+ /*
+ * Try to install. ioc shouldn't be installed if someone else
+ * already did or @task, which isn't %current, is exiting. Note
+ * that we need to allow ioc creation on exiting %current as exit
+ * path may issue IOs from e.g. exit_files(). The exit path is
+ * responsible for not issuing IO after exit_io_context().
+ */
+ task_lock(task);
+ if (!task->io_context &&
+ (task == current || !(task->flags & PF_EXITING)))
+ task->io_context = ioc;
+ else
+ kmem_cache_free(iocontext_cachep, ioc);
+ task_unlock(task);
}
-/*
- * If the current task has no IO context then create one and initialise it.
- * Otherwise, return its existing IO context.
+/**
+ * get_task_io_context - get io_context of a task
+ * @task: task of interest
+ * @gfp_flags: allocation flags, used if allocation is necessary
+ * @node: allocation node, used if allocation is necessary
+ *
+ * Return io_context of @task. If it doesn't exist, it is created with
+ * @gfp_flags and @node. The returned io_context has its reference count
+ * incremented.
*
- * This returned IO context doesn't have a specifically elevated refcount,
- * but since the current task itself holds a reference, the context can be
- * used in general code, so long as it stays within `current` context.
+ * This function always goes through task_lock() and it's better to use
+ * %current->io_context + get_io_context() for %current.
*/
-struct io_context *current_io_context(gfp_t gfp_flags, int node)
+struct io_context *get_task_io_context(struct task_struct *task,
+ gfp_t gfp_flags, int node)
{
- struct task_struct *tsk = current;
- struct io_context *ret;
-
- ret = tsk->io_context;
- if (likely(ret))
- return ret;
-
- ret = alloc_io_context(gfp_flags, node);
- if (ret) {
- /* make sure set_task_ioprio() sees the settings above */
- smp_wmb();
- tsk->io_context = ret;
- }
+ struct io_context *ioc;
+
+ might_sleep_if(gfp_flags & __GFP_WAIT);
+
+ do {
+ task_lock(task);
+ ioc = task->io_context;
+ if (likely(ioc)) {
+ get_io_context(ioc);
+ task_unlock(task);
+ return ioc;
+ }
+ task_unlock(task);
+ } while (create_io_context(task, gfp_flags, node));
- return ret;
+ return NULL;
}
+EXPORT_SYMBOL(get_task_io_context);
-/*
- * If the current task has no IO context then create one and initialise it.
- * If it does have a context, take a ref on it.
+/**
+ * ioc_lookup_icq - lookup io_cq from ioc
+ * @ioc: the associated io_context
+ * @q: the associated request_queue
*
- * This is always called in the context of the task which submitted the I/O.
+ * Look up io_cq associated with @ioc - @q pair from @ioc. Must be called
+ * with @q->queue_lock held.
*/
-struct io_context *get_io_context(gfp_t gfp_flags, int node)
+struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q)
{
- struct io_context *ioc = NULL;
+ struct io_cq *icq;
+
+ lockdep_assert_held(q->queue_lock);
/*
- * Check for unlikely race with exiting task. ioc ref count is
- * zero when ioc is being detached.
+ * icq's are indexed from @ioc using radix tree and hint pointer,
+ * both of which are protected with RCU. All removals are done
+ * holding both q and ioc locks, and we're holding q lock - if we
+ * find a icq which points to us, it's guaranteed to be valid.
*/
- do {
- ioc = current_io_context(gfp_flags, node);
- if (unlikely(!ioc))
- break;
- } while (!atomic_long_inc_not_zero(&ioc->refcount));
+ rcu_read_lock();
+ icq = rcu_dereference(ioc->icq_hint);
+ if (icq && icq->q == q)
+ goto out;
- return ioc;
+ icq = radix_tree_lookup(&ioc->icq_tree, q->id);
+ if (icq && icq->q == q)
+ rcu_assign_pointer(ioc->icq_hint, icq); /* allowed to race */
+ else
+ icq = NULL;
+out:
+ rcu_read_unlock();
+ return icq;
}
-EXPORT_SYMBOL(get_io_context);
+EXPORT_SYMBOL(ioc_lookup_icq);
+
+/**
+ * ioc_create_icq - create and link io_cq
+ * @q: request_queue of interest
+ * @gfp_mask: allocation mask
+ *
+ * Make sure io_cq linking %current->io_context and @q exists. If either
+ * io_context and/or icq don't exist, they will be created using @gfp_mask.
+ *
+ * The caller is responsible for ensuring @ioc won't go away and @q is
+ * alive and will stay alive until this function returns.
+ */
+struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask)
+{
+ struct elevator_type *et = q->elevator->type;
+ struct io_context *ioc;
+ struct io_cq *icq;
+
+ /* allocate stuff */
+ ioc = create_io_context(current, gfp_mask, q->node);
+ if (!ioc)
+ return NULL;
+
+ icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO,
+ q->node);
+ if (!icq)
+ return NULL;
+
+ if (radix_tree_preload(gfp_mask) < 0) {
+ kmem_cache_free(et->icq_cache, icq);
+ return NULL;
+ }
+
+ icq->ioc = ioc;
+ icq->q = q;
+ INIT_LIST_HEAD(&icq->q_node);
+ INIT_HLIST_NODE(&icq->ioc_node);
+
+ /* lock both q and ioc and try to link @icq */
+ spin_lock_irq(q->queue_lock);
+ spin_lock(&ioc->lock);
+
+ if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
+ hlist_add_head(&icq->ioc_node, &ioc->icq_list);
+ list_add(&icq->q_node, &q->icq_list);
+ if (et->ops.elevator_init_icq_fn)
+ et->ops.elevator_init_icq_fn(icq);
+ } else {
+ kmem_cache_free(et->icq_cache, icq);
+ icq = ioc_lookup_icq(ioc, q);
+ if (!icq)
+ printk(KERN_ERR "cfq: icq link failed!\n");
+ }
+
+ spin_unlock(&ioc->lock);
+ spin_unlock_irq(q->queue_lock);
+ radix_tree_preload_end();
+ return icq;
+}
+
+void ioc_set_changed(struct io_context *ioc, int which)
+{
+ struct io_cq *icq;
+ struct hlist_node *n;
+
+ hlist_for_each_entry(icq, n, &ioc->icq_list, ioc_node)
+ set_bit(which, &icq->changed);
+}
+
+/**
+ * ioc_ioprio_changed - notify ioprio change
+ * @ioc: io_context of interest
+ * @ioprio: new ioprio
+ *
+ * @ioc's ioprio has changed to @ioprio. Set %ICQ_IOPRIO_CHANGED for all
+ * icq's. iosched is responsible for checking the bit and applying it on
+ * request issue path.
+ */
+void ioc_ioprio_changed(struct io_context *ioc, int ioprio)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioc->lock, flags);
+ ioc->ioprio = ioprio;
+ ioc_set_changed(ioc, ICQ_IOPRIO_CHANGED);
+ spin_unlock_irqrestore(&ioc->lock, flags);
+}
+
+/**
+ * ioc_cgroup_changed - notify cgroup change
+ * @ioc: io_context of interest
+ *
+ * @ioc's cgroup has changed. Set %ICQ_CGROUP_CHANGED for all icq's.
+ * iosched is responsible for checking the bit and applying it on request
+ * issue path.
+ */
+void ioc_cgroup_changed(struct io_context *ioc)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioc->lock, flags);
+ ioc_set_changed(ioc, ICQ_CGROUP_CHANGED);
+ spin_unlock_irqrestore(&ioc->lock, flags);
+}
+EXPORT_SYMBOL(ioc_cgroup_changed);
static int __init blk_ioc_init(void)
{
diff --git a/block/blk-map.c b/block/blk-map.c
index 164cd0059706..623e1cd4cffe 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -311,7 +311,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
if (IS_ERR(bio))
return PTR_ERR(bio);
- if (rq_data_dir(rq) == WRITE)
+ if (!reading)
bio->bi_rw |= REQ_WRITE;
if (do_copy)
diff --git a/block/blk-merge.c b/block/blk-merge.c
index cfcc37cb222b..160035f54882 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -471,3 +471,40 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
{
return attempt_merge(q, rq, next);
}
+
+bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
+{
+ if (!rq_mergeable(rq))
+ return false;
+
+ /* don't merge file system requests and discard requests */
+ if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD))
+ return false;
+
+ /* don't merge discard requests and secure discard requests */
+ if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE))
+ return false;
+
+ /* different data direction or already started, don't merge */
+ if (bio_data_dir(bio) != rq_data_dir(rq))
+ return false;
+
+ /* must be same device and not a special request */
+ if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
+ return false;
+
+ /* only merge integrity protected bio into ditto rq */
+ if (bio_integrity(bio) != blk_integrity_rq(rq))
+ return false;
+
+ return true;
+}
+
+int blk_try_merge(struct request *rq, struct bio *bio)
+{
+ if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_sector)
+ return ELEVATOR_BACK_MERGE;
+ else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_sector)
+ return ELEVATOR_FRONT_MERGE;
+ return ELEVATOR_NO_MERGE;
+}
diff --git a/block/blk-settings.c b/block/blk-settings.c
index fa1eb0449a05..d3234fc494ad 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -104,9 +104,7 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
* @lim: the queue_limits structure to reset
*
* Description:
- * Returns a queue_limit struct to its default state. Can be used by
- * stacking drivers like DM that stage table swaps and reuse an
- * existing device queue.
+ * Returns a queue_limit struct to its default state.
*/
void blk_set_default_limits(struct queue_limits *lim)
{
@@ -114,13 +112,12 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->max_integrity_segments = 0;
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
- lim->max_sectors = BLK_DEF_MAX_SECTORS;
- lim->max_hw_sectors = INT_MAX;
+ lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
lim->max_discard_sectors = 0;
lim->discard_granularity = 0;
lim->discard_alignment = 0;
lim->discard_misaligned = 0;
- lim->discard_zeroes_data = 1;
+ lim->discard_zeroes_data = 0;
lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
lim->alignment_offset = 0;
@@ -131,6 +128,27 @@ void blk_set_default_limits(struct queue_limits *lim)
EXPORT_SYMBOL(blk_set_default_limits);
/**
+ * blk_set_stacking_limits - set default limits for stacking devices
+ * @lim: the queue_limits structure to reset
+ *
+ * Description:
+ * Returns a queue_limit struct to its default state. Should be used
+ * by stacking drivers like DM that have no internal limits.
+ */
+void blk_set_stacking_limits(struct queue_limits *lim)
+{
+ blk_set_default_limits(lim);
+
+ /* Inherit limits from component devices */
+ lim->discard_zeroes_data = 1;
+ lim->max_segments = USHRT_MAX;
+ lim->max_hw_sectors = UINT_MAX;
+
+ lim->max_sectors = BLK_DEF_MAX_SECTORS;
+}
+EXPORT_SYMBOL(blk_set_stacking_limits);
+
+/**
* blk_queue_make_request - define an alternate make_request function for a device
* @q: the request queue for the device to be affected
* @mfn: the alternate make_request function
@@ -165,8 +183,6 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
q->nr_batching = BLK_BATCH_REQ;
blk_set_default_limits(&q->limits);
- blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
- q->limits.discard_zeroes_data = 0;
/*
* by default assume old behaviour and bounce for any highmem page
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index e7f9f657f105..cf150011d808 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -425,7 +425,7 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
if (!entry->show)
return -EIO;
mutex_lock(&q->sysfs_lock);
- if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
+ if (blk_queue_dead(q)) {
mutex_unlock(&q->sysfs_lock);
return -ENOENT;
}
@@ -447,7 +447,7 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
q = container_of(kobj, struct request_queue, kobj);
mutex_lock(&q->sysfs_lock);
- if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
+ if (blk_queue_dead(q)) {
mutex_unlock(&q->sysfs_lock);
return -ENOENT;
}
@@ -479,8 +479,12 @@ static void blk_release_queue(struct kobject *kobj)
blk_sync_queue(q);
- if (q->elevator)
+ if (q->elevator) {
+ spin_lock_irq(q->queue_lock);
+ ioc_clear_queue(q);
+ spin_unlock_irq(q->queue_lock);
elevator_exit(q->elevator);
+ }
blk_throtl_exit(q);
@@ -494,6 +498,8 @@ static void blk_release_queue(struct kobject *kobj)
blk_trace_shutdown(q);
bdi_destroy(&q->backing_dev_info);
+
+ ida_simple_remove(&blk_queue_ida, q->id);
kmem_cache_free(blk_requestq_cachep, q);
}
diff --git a/block/blk-tag.c b/block/blk-tag.c
index e74d6d13838f..4af6f5cc1167 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -282,18 +282,9 @@ EXPORT_SYMBOL(blk_queue_resize_tags);
void blk_queue_end_tag(struct request_queue *q, struct request *rq)
{
struct blk_queue_tag *bqt = q->queue_tags;
- int tag = rq->tag;
+ unsigned tag = rq->tag; /* negative tags invalid */
- BUG_ON(tag == -1);
-
- if (unlikely(tag >= bqt->max_depth)) {
- /*
- * This can happen after tag depth has been reduced.
- * But tag shouldn't be larger than real_max_depth.
- */
- WARN_ON(tag >= bqt->real_max_depth);
- return;
- }
+ BUG_ON(tag >= bqt->real_max_depth);
list_del_init(&rq->queuelist);
rq->cmd_flags &= ~REQ_QUEUED;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 4553245d9317..5eed6a76721d 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -310,7 +310,7 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
struct request_queue *q = td->queue;
/* no throttling for dead queue */
- if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+ if (unlikely(blk_queue_dead(q)))
return NULL;
rcu_read_lock();
@@ -335,7 +335,7 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
spin_lock_irq(q->queue_lock);
/* Make sure @q is still alive */
- if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
+ if (unlikely(blk_queue_dead(q))) {
kfree(tg);
return NULL;
}
diff --git a/block/blk.h b/block/blk.h
index 3f6551b3c92d..9c12f80882b0 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -1,6 +1,8 @@
#ifndef BLK_INTERNAL_H
#define BLK_INTERNAL_H
+#include <linux/idr.h>
+
/* Amount of time in which a process may batch requests */
#define BLK_BATCH_TIME (HZ/50UL)
@@ -9,6 +11,12 @@
extern struct kmem_cache *blk_requestq_cachep;
extern struct kobj_type blk_queue_ktype;
+extern struct ida blk_queue_ida;
+
+static inline void __blk_get_queue(struct request_queue *q)
+{
+ kobject_get(&q->kobj);
+}
void init_request_from_bio(struct request *req, struct bio *bio);
void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
@@ -85,8 +93,8 @@ static inline struct request *__elv_next_request(struct request_queue *q)
q->flush_queue_delayed = 1;
return NULL;
}
- if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags) ||
- !q->elevator->ops->elevator_dispatch_fn(q, 0))
+ if (unlikely(blk_queue_dead(q)) ||
+ !q->elevator->type->ops.elevator_dispatch_fn(q, 0))
return NULL;
}
}
@@ -95,16 +103,16 @@ static inline void elv_activate_rq(struct request_queue *q, struct request *rq)
{
struct elevator_queue *e = q->elevator;
- if (e->ops->elevator_activate_req_fn)
- e->ops->elevator_activate_req_fn(q, rq);
+ if (e->type->ops.elevator_activate_req_fn)
+ e->type->ops.elevator_activate_req_fn(q, rq);
}
static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq)
{
struct elevator_queue *e = q->elevator;
- if (e->ops->elevator_deactivate_req_fn)
- e->ops->elevator_deactivate_req_fn(q, rq);
+ if (e->type->ops.elevator_deactivate_req_fn)
+ e->type->ops.elevator_deactivate_req_fn(q, rq);
}
#ifdef CONFIG_FAIL_IO_TIMEOUT
@@ -119,8 +127,6 @@ static inline int blk_should_fake_timeout(struct request_queue *q)
}
#endif
-struct io_context *current_io_context(gfp_t gfp_flags, int node);
-
int ll_back_merge_fn(struct request_queue *q, struct request *req,
struct bio *bio);
int ll_front_merge_fn(struct request_queue *q, struct request *req,
@@ -131,6 +137,8 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
struct request *next);
void blk_recalc_rq_segments(struct request *rq);
void blk_rq_set_mixed_merge(struct request *rq);
+bool blk_rq_merge_ok(struct request *rq, struct bio *bio);
+int blk_try_merge(struct request *rq, struct bio *bio);
void blk_queue_congestion_threshold(struct request_queue *q);
@@ -189,6 +197,42 @@ static inline int blk_do_io_stat(struct request *rq)
(rq->cmd_flags & REQ_DISCARD));
}
+/*
+ * Internal io_context interface
+ */
+void get_io_context(struct io_context *ioc);
+struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q);
+struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask);
+void ioc_clear_queue(struct request_queue *q);
+
+void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_mask,
+ int node);
+
+/**
+ * create_io_context - try to create task->io_context
+ * @task: target task
+ * @gfp_mask: allocation mask
+ * @node: allocation node
+ *
+ * If @task->io_context is %NULL, allocate a new io_context and install it.
+ * Returns the current @task->io_context which may be %NULL if allocation
+ * failed.
+ *
+ * Note that this function can't be called with IRQ disabled because
+ * task_lock which protects @task->io_context is IRQ-unsafe.
+ */
+static inline struct io_context *create_io_context(struct task_struct *task,
+ gfp_t gfp_mask, int node)
+{
+ WARN_ON_ONCE(irqs_disabled());
+ if (unlikely(!task->io_context))
+ create_io_context_slowpath(task, gfp_mask, node);
+ return task->io_context;
+}
+
+/*
+ * Internal throttling interface
+ */
#ifdef CONFIG_BLK_DEV_THROTTLING
extern bool blk_throtl_bio(struct request_queue *q, struct bio *bio);
extern void blk_throtl_drain(struct request_queue *q);
diff --git a/block/bsg.c b/block/bsg.c
index 702f1316bb8f..ff64ae3bacee 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -769,12 +769,10 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
struct file *file)
{
struct bsg_device *bd;
- int ret;
#ifdef BSG_DEBUG
unsigned char buf[32];
#endif
- ret = blk_get_queue(rq);
- if (ret)
+ if (!blk_get_queue(rq))
return ERR_PTR(-ENXIO);
bd = bsg_alloc_device();
@@ -985,7 +983,8 @@ void bsg_unregister_queue(struct request_queue *q)
mutex_lock(&bsg_mutex);
idr_remove(&bsg_minor_idr, bcd->minor);
- sysfs_remove_link(&q->kobj, "bsg");
+ if (q->kobj.sd)
+ sysfs_remove_link(&q->kobj, "bsg");
device_unregister(bcd->class_dev);
bcd->class_dev = NULL;
kref_put(&bcd->ref, bsg_kref_release_function);
@@ -1070,7 +1069,7 @@ EXPORT_SYMBOL_GPL(bsg_register_queue);
static struct cdev bsg_cdev;
-static char *bsg_devnode(struct device *dev, mode_t *mode)
+static char *bsg_devnode(struct device *dev, umode_t *mode)
{
return kasprintf(GFP_KERNEL, "bsg/%s", dev_name(dev));
}
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 16ace89613bc..d0ba50533668 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -14,6 +14,7 @@
#include <linux/rbtree.h>
#include <linux/ioprio.h>
#include <linux/blktrace_api.h>
+#include "blk.h"
#include "cfq.h"
/*
@@ -53,20 +54,11 @@ static const int cfq_hist_divisor = 4;
#define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32)
#define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8)
-#define RQ_CIC(rq) \
- ((struct cfq_io_context *) (rq)->elevator_private[0])
-#define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private[1])
-#define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elevator_private[2])
+#define RQ_CIC(rq) icq_to_cic((rq)->elv.icq)
+#define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elv.priv[0])
+#define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elv.priv[1])
static struct kmem_cache *cfq_pool;
-static struct kmem_cache *cfq_ioc_pool;
-
-static DEFINE_PER_CPU(unsigned long, cfq_ioc_count);
-static struct completion *ioc_gone;
-static DEFINE_SPINLOCK(ioc_gone_lock);
-
-static DEFINE_SPINLOCK(cic_index_lock);
-static DEFINE_IDA(cic_index_ida);
#define CFQ_PRIO_LISTS IOPRIO_BE_NR
#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
@@ -75,6 +67,14 @@ static DEFINE_IDA(cic_index_ida);
#define sample_valid(samples) ((samples) > 80)
#define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node)
+struct cfq_ttime {
+ unsigned long last_end_request;
+
+ unsigned long ttime_total;
+ unsigned long ttime_samples;
+ unsigned long ttime_mean;
+};
+
/*
* Most of our rbtree usage is for sorting with min extraction, so
* if we cache the leftmost node we don't have to walk down the tree
@@ -216,6 +216,12 @@ struct cfq_group {
struct cfq_ttime ttime;
};
+struct cfq_io_cq {
+ struct io_cq icq; /* must be the first member */
+ struct cfq_queue *cfqq[2];
+ struct cfq_ttime ttime;
+};
+
/*
* Per block device queue structure
*/
@@ -267,7 +273,7 @@ struct cfq_data {
struct work_struct unplug_work;
struct cfq_queue *active_queue;
- struct cfq_io_context *active_cic;
+ struct cfq_io_cq *active_cic;
/*
* async queue for each priority case
@@ -290,9 +296,6 @@ struct cfq_data {
unsigned int cfq_group_idle;
unsigned int cfq_latency;
- unsigned int cic_index;
- struct list_head cic_list;
-
/*
* Fallback dummy cfqq for extreme OOM conditions
*/
@@ -464,37 +467,35 @@ static inline int cfqg_busy_async_queues(struct cfq_data *cfqd,
static void cfq_dispatch_insert(struct request_queue *, struct request *);
static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool,
struct io_context *, gfp_t);
-static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
- struct io_context *);
-static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
- bool is_sync)
+static inline struct cfq_io_cq *icq_to_cic(struct io_cq *icq)
{
- return cic->cfqq[is_sync];
+ /* cic->icq is the first member, %NULL will convert to %NULL */
+ return container_of(icq, struct cfq_io_cq, icq);
}
-static inline void cic_set_cfqq(struct cfq_io_context *cic,
- struct cfq_queue *cfqq, bool is_sync)
+static inline struct cfq_io_cq *cfq_cic_lookup(struct cfq_data *cfqd,
+ struct io_context *ioc)
{
- cic->cfqq[is_sync] = cfqq;
+ if (ioc)
+ return icq_to_cic(ioc_lookup_icq(ioc, cfqd->queue));
+ return NULL;
}
-#define CIC_DEAD_KEY 1ul
-#define CIC_DEAD_INDEX_SHIFT 1
-
-static inline void *cfqd_dead_key(struct cfq_data *cfqd)
+static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_cq *cic, bool is_sync)
{
- return (void *)(cfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY);
+ return cic->cfqq[is_sync];
}
-static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic)
+static inline void cic_set_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq,
+ bool is_sync)
{
- struct cfq_data *cfqd = cic->key;
-
- if (unlikely((unsigned long) cfqd & CIC_DEAD_KEY))
- return NULL;
+ cic->cfqq[is_sync] = cfqq;
+}
- return cfqd;
+static inline struct cfq_data *cic_to_cfqd(struct cfq_io_cq *cic)
+{
+ return cic->icq.q->elevator->elevator_data;
}
/*
@@ -1561,7 +1562,7 @@ static struct request *
cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
{
struct task_struct *tsk = current;
- struct cfq_io_context *cic;
+ struct cfq_io_cq *cic;
struct cfq_queue *cfqq;
cic = cfq_cic_lookup(cfqd, tsk->io_context);
@@ -1655,6 +1656,8 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
struct request *next)
{
struct cfq_queue *cfqq = RQ_CFQQ(rq);
+ struct cfq_data *cfqd = q->elevator->elevator_data;
+
/*
* reposition in fifo if next is older than rq
*/
@@ -1669,13 +1672,23 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
cfq_remove_request(next);
cfq_blkiocg_update_io_merged_stats(&(RQ_CFQG(rq))->blkg,
rq_data_dir(next), rq_is_sync(next));
+
+ cfqq = RQ_CFQQ(next);
+ /*
+ * all requests of this queue are merged to other queues, delete it
+ * from the service tree. If it's the active_queue,
+ * cfq_dispatch_requests() will choose to expire it or do idle
+ */
+ if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list) &&
+ cfqq != cfqd->active_queue)
+ cfq_del_cfqq_rr(cfqd, cfqq);
}
static int cfq_allow_merge(struct request_queue *q, struct request *rq,
struct bio *bio)
{
struct cfq_data *cfqd = q->elevator->elevator_data;
- struct cfq_io_context *cic;
+ struct cfq_io_cq *cic;
struct cfq_queue *cfqq;
/*
@@ -1685,7 +1698,7 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
return false;
/*
- * Lookup the cfqq that this bio will be queued with. Allow
+ * Lookup the cfqq that this bio will be queued with and allow
* merge only if rq is queued there.
*/
cic = cfq_cic_lookup(cfqd, current->io_context);
@@ -1774,7 +1787,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
cfqd->active_queue = NULL;
if (cfqd->active_cic) {
- put_io_context(cfqd->active_cic->ioc);
+ put_io_context(cfqd->active_cic->icq.ioc);
cfqd->active_cic = NULL;
}
}
@@ -1994,7 +2007,7 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
static void cfq_arm_slice_timer(struct cfq_data *cfqd)
{
struct cfq_queue *cfqq = cfqd->active_queue;
- struct cfq_io_context *cic;
+ struct cfq_io_cq *cic;
unsigned long sl, group_idle = 0;
/*
@@ -2029,7 +2042,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
* task has exited, don't wait
*/
cic = cfqd->active_cic;
- if (!cic || !atomic_read(&cic->ioc->nr_tasks))
+ if (!cic || !atomic_read(&cic->icq.ioc->nr_tasks))
return;
/*
@@ -2580,9 +2593,9 @@ static bool cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
cfq_dispatch_insert(cfqd->queue, rq);
if (!cfqd->active_cic) {
- struct cfq_io_context *cic = RQ_CIC(rq);
+ struct cfq_io_cq *cic = RQ_CIC(rq);
- atomic_long_inc(&cic->ioc->refcount);
+ atomic_long_inc(&cic->icq.ioc->refcount);
cfqd->active_cic = cic;
}
@@ -2665,84 +2678,6 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
cfq_put_cfqg(cfqg);
}
-/*
- * Call func for each cic attached to this ioc.
- */
-static void
-call_for_each_cic(struct io_context *ioc,
- void (*func)(struct io_context *, struct cfq_io_context *))
-{
- struct cfq_io_context *cic;
- struct hlist_node *n;
-
- rcu_read_lock();
-
- hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list)
- func(ioc, cic);
-
- rcu_read_unlock();
-}
-
-static void cfq_cic_free_rcu(struct rcu_head *head)
-{
- struct cfq_io_context *cic;
-
- cic = container_of(head, struct cfq_io_context, rcu_head);
-
- kmem_cache_free(cfq_ioc_pool, cic);
- elv_ioc_count_dec(cfq_ioc_count);
-
- if (ioc_gone) {
- /*
- * CFQ scheduler is exiting, grab exit lock and check
- * the pending io context count. If it hits zero,
- * complete ioc_gone and set it back to NULL
- */
- spin_lock(&ioc_gone_lock);
- if (ioc_gone && !elv_ioc_count_read(cfq_ioc_count)) {
- complete(ioc_gone);
- ioc_gone = NULL;
- }
- spin_unlock(&ioc_gone_lock);
- }
-}
-
-static void cfq_cic_free(struct cfq_io_context *cic)
-{
- call_rcu(&cic->rcu_head, cfq_cic_free_rcu);
-}
-
-static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
-{
- unsigned long flags;
- unsigned long dead_key = (unsigned long) cic->key;
-
- BUG_ON(!(dead_key & CIC_DEAD_KEY));
-
- spin_lock_irqsave(&ioc->lock, flags);
- radix_tree_delete(&ioc->radix_root, dead_key >> CIC_DEAD_INDEX_SHIFT);
- hlist_del_rcu(&cic->cic_list);
- spin_unlock_irqrestore(&ioc->lock, flags);
-
- cfq_cic_free(cic);
-}
-
-/*
- * Must be called with rcu_read_lock() held or preemption otherwise disabled.
- * Only two callers of this - ->dtor() which is called with the rcu_read_lock(),
- * and ->trim() which is called with the task lock held
- */
-static void cfq_free_io_context(struct io_context *ioc)
-{
- /*
- * ioc->refcount is zero here, or we are called from elv_unregister(),
- * so no more cic's are allowed to be linked into this ioc. So it
- * should be ok to iterate over the known list, we will see all cic's
- * since no new ones are added.
- */
- call_for_each_cic(ioc, cic_free_func);
-}
-
static void cfq_put_cooperator(struct cfq_queue *cfqq)
{
struct cfq_queue *__cfqq, *next;
@@ -2776,27 +2711,17 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
cfq_put_queue(cfqq);
}
-static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
- struct cfq_io_context *cic)
+static void cfq_init_icq(struct io_cq *icq)
{
- struct io_context *ioc = cic->ioc;
-
- list_del_init(&cic->queue_list);
+ struct cfq_io_cq *cic = icq_to_cic(icq);
- /*
- * Make sure dead mark is seen for dead queues
- */
- smp_wmb();
- cic->key = cfqd_dead_key(cfqd);
+ cic->ttime.last_end_request = jiffies;
+}
- rcu_read_lock();
- if (rcu_dereference(ioc->ioc_data) == cic) {
- rcu_read_unlock();
- spin_lock(&ioc->lock);
- rcu_assign_pointer(ioc->ioc_data, NULL);
- spin_unlock(&ioc->lock);
- } else
- rcu_read_unlock();
+static void cfq_exit_icq(struct io_cq *icq)
+{
+ struct cfq_io_cq *cic = icq_to_cic(icq);
+ struct cfq_data *cfqd = cic_to_cfqd(cic);
if (cic->cfqq[BLK_RW_ASYNC]) {
cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]);
@@ -2809,57 +2734,6 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
}
}
-static void cfq_exit_single_io_context(struct io_context *ioc,
- struct cfq_io_context *cic)
-{
- struct cfq_data *cfqd = cic_to_cfqd(cic);
-
- if (cfqd) {
- struct request_queue *q = cfqd->queue;
- unsigned long flags;
-
- spin_lock_irqsave(q->queue_lock, flags);
-
- /*
- * Ensure we get a fresh copy of the ->key to prevent
- * race between exiting task and queue
- */
- smp_read_barrier_depends();
- if (cic->key == cfqd)
- __cfq_exit_single_io_context(cfqd, cic);
-
- spin_unlock_irqrestore(q->queue_lock, flags);
- }
-}
-
-/*
- * The process that ioc belongs to has exited, we need to clean up
- * and put the internal structures we have that belongs to that process.
- */
-static void cfq_exit_io_context(struct io_context *ioc)
-{
- call_for_each_cic(ioc, cfq_exit_single_io_context);
-}
-
-static struct cfq_io_context *
-cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
-{
- struct cfq_io_context *cic;
-
- cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO,
- cfqd->queue->node);
- if (cic) {
- cic->ttime.last_end_request = jiffies;
- INIT_LIST_HEAD(&cic->queue_list);
- INIT_HLIST_NODE(&cic->cic_list);
- cic->dtor = cfq_free_io_context;
- cic->exit = cfq_exit_io_context;
- elv_ioc_count_inc(cfq_ioc_count);
- }
-
- return cic;
-}
-
static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
{
struct task_struct *tsk = current;
@@ -2902,21 +2776,18 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
cfq_clear_cfqq_prio_changed(cfqq);
}
-static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
+static void changed_ioprio(struct cfq_io_cq *cic)
{
struct cfq_data *cfqd = cic_to_cfqd(cic);
struct cfq_queue *cfqq;
- unsigned long flags;
if (unlikely(!cfqd))
return;
- spin_lock_irqsave(cfqd->queue->queue_lock, flags);
-
cfqq = cic->cfqq[BLK_RW_ASYNC];
if (cfqq) {
struct cfq_queue *new_cfqq;
- new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->ioc,
+ new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->icq.ioc,
GFP_ATOMIC);
if (new_cfqq) {
cic->cfqq[BLK_RW_ASYNC] = new_cfqq;
@@ -2927,14 +2798,6 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
cfqq = cic->cfqq[BLK_RW_SYNC];
if (cfqq)
cfq_mark_cfqq_prio_changed(cfqq);
-
- spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
-}
-
-static void cfq_ioc_set_ioprio(struct io_context *ioc)
-{
- call_for_each_cic(ioc, changed_ioprio);
- ioc->ioprio_changed = 0;
}
static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
@@ -2958,11 +2821,10 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
}
#ifdef CONFIG_CFQ_GROUP_IOSCHED
-static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic)
+static void changed_cgroup(struct cfq_io_cq *cic)
{
struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1);
struct cfq_data *cfqd = cic_to_cfqd(cic);
- unsigned long flags;
struct request_queue *q;
if (unlikely(!cfqd))
@@ -2970,8 +2832,6 @@ static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic)
q = cfqd->queue;
- spin_lock_irqsave(q->queue_lock, flags);
-
if (sync_cfqq) {
/*
* Drop reference to sync queue. A new sync queue will be
@@ -2981,14 +2841,6 @@ static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic)
cic_set_cfqq(cic, NULL, 1);
cfq_put_queue(sync_cfqq);
}
-
- spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
-static void cfq_ioc_set_cgroup(struct io_context *ioc)
-{
- call_for_each_cic(ioc, changed_cgroup);
- ioc->cgroup_changed = 0;
}
#endif /* CONFIG_CFQ_GROUP_IOSCHED */
@@ -2997,7 +2849,7 @@ cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
struct io_context *ioc, gfp_t gfp_mask)
{
struct cfq_queue *cfqq, *new_cfqq = NULL;
- struct cfq_io_context *cic;
+ struct cfq_io_cq *cic;
struct cfq_group *cfqg;
retry:
@@ -3088,153 +2940,6 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
return cfqq;
}
-/*
- * We drop cfq io contexts lazily, so we may find a dead one.
- */
-static void
-cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
- struct cfq_io_context *cic)
-{
- unsigned long flags;
-
- WARN_ON(!list_empty(&cic->queue_list));
- BUG_ON(cic->key != cfqd_dead_key(cfqd));
-
- spin_lock_irqsave(&ioc->lock, flags);
-
- BUG_ON(rcu_dereference_check(ioc->ioc_data,
- lockdep_is_held(&ioc->lock)) == cic);
-
- radix_tree_delete(&ioc->radix_root, cfqd->cic_index);
- hlist_del_rcu(&cic->cic_list);
- spin_unlock_irqrestore(&ioc->lock, flags);
-
- cfq_cic_free(cic);
-}
-
-static struct cfq_io_context *
-cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
-{
- struct cfq_io_context *cic;
- unsigned long flags;
-
- if (unlikely(!ioc))
- return NULL;
-
- rcu_read_lock();
-
- /*
- * we maintain a last-hit cache, to avoid browsing over the tree
- */
- cic = rcu_dereference(ioc->ioc_data);
- if (cic && cic->key == cfqd) {
- rcu_read_unlock();
- return cic;
- }
-
- do {
- cic = radix_tree_lookup(&ioc->radix_root, cfqd->cic_index);
- rcu_read_unlock();
- if (!cic)
- break;
- if (unlikely(cic->key != cfqd)) {
- cfq_drop_dead_cic(cfqd, ioc, cic);
- rcu_read_lock();
- continue;
- }
-
- spin_lock_irqsave(&ioc->lock, flags);
- rcu_assign_pointer(ioc->ioc_data, cic);
- spin_unlock_irqrestore(&ioc->lock, flags);
- break;
- } while (1);
-
- return cic;
-}
-
-/*
- * Add cic into ioc, using cfqd as the search key. This enables us to lookup
- * the process specific cfq io context when entered from the block layer.
- * Also adds the cic to a per-cfqd list, used when this queue is removed.
- */
-static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
- struct cfq_io_context *cic, gfp_t gfp_mask)
-{
- unsigned long flags;
- int ret;
-
- ret = radix_tree_preload(gfp_mask);
- if (!ret) {
- cic->ioc = ioc;
- cic->key = cfqd;
-
- spin_lock_irqsave(&ioc->lock, flags);
- ret = radix_tree_insert(&ioc->radix_root,
- cfqd->cic_index, cic);
- if (!ret)
- hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list);
- spin_unlock_irqrestore(&ioc->lock, flags);
-
- radix_tree_preload_end();
-
- if (!ret) {
- spin_lock_irqsave(cfqd->queue->queue_lock, flags);
- list_add(&cic->queue_list, &cfqd->cic_list);
- spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
- }
- }
-
- if (ret)
- printk(KERN_ERR "cfq: cic link failed!\n");
-
- return ret;
-}
-
-/*
- * Setup general io context and cfq io context. There can be several cfq
- * io contexts per general io context, if this process is doing io to more
- * than one device managed by cfq.
- */
-static struct cfq_io_context *
-cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
-{
- struct io_context *ioc = NULL;
- struct cfq_io_context *cic;
-
- might_sleep_if(gfp_mask & __GFP_WAIT);
-
- ioc = get_io_context(gfp_mask, cfqd->queue->node);
- if (!ioc)
- return NULL;
-
- cic = cfq_cic_lookup(cfqd, ioc);
- if (cic)
- goto out;
-
- cic = cfq_alloc_io_context(cfqd, gfp_mask);
- if (cic == NULL)
- goto err;
-
- if (cfq_cic_link(cfqd, ioc, cic, gfp_mask))
- goto err_free;
-
-out:
- smp_read_barrier_depends();
- if (unlikely(ioc->ioprio_changed))
- cfq_ioc_set_ioprio(ioc);
-
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
- if (unlikely(ioc->cgroup_changed))
- cfq_ioc_set_cgroup(ioc);
-#endif
- return cic;
-err_free:
- cfq_cic_free(cic);
-err:
- put_io_context(ioc);
- return NULL;
-}
-
static void
__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle)
{
@@ -3248,7 +2953,7 @@ __cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle)
static void
cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
- struct cfq_io_context *cic)
+ struct cfq_io_cq *cic)
{
if (cfq_cfqq_sync(cfqq)) {
__cfq_update_io_thinktime(&cic->ttime, cfqd->cfq_slice_idle);
@@ -3286,7 +2991,7 @@ cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
*/
static void
cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
- struct cfq_io_context *cic)
+ struct cfq_io_cq *cic)
{
int old_idle, enable_idle;
@@ -3303,8 +3008,9 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE))
enable_idle = 0;
- else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
- (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
+ else if (!atomic_read(&cic->icq.ioc->nr_tasks) ||
+ !cfqd->cfq_slice_idle ||
+ (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
enable_idle = 0;
else if (sample_valid(cic->ttime.ttime_samples)) {
if (cic->ttime.ttime_mean > cfqd->cfq_slice_idle)
@@ -3404,7 +3110,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
*/
static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
- struct cfq_queue *old_cfqq = cfqd->active_queue;
+ enum wl_type_t old_type = cfqq_type(cfqd->active_queue);
cfq_log_cfqq(cfqd, cfqq, "preempt");
cfq_slice_expired(cfqd, 1);
@@ -3413,7 +3119,7 @@ static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
* workload type is changed, don't save slice, otherwise preempt
* doesn't happen
*/
- if (cfqq_type(old_cfqq) != cfqq_type(cfqq))
+ if (old_type != cfqq_type(cfqq))
cfqq->cfqg->saved_workload_slice = 0;
/*
@@ -3436,7 +3142,7 @@ static void
cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
struct request *rq)
{
- struct cfq_io_context *cic = RQ_CIC(rq);
+ struct cfq_io_cq *cic = RQ_CIC(rq);
cfqd->rq_queued++;
if (rq->cmd_flags & REQ_PRIO)
@@ -3489,7 +3195,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
struct cfq_queue *cfqq = RQ_CFQQ(rq);
cfq_log_cfqq(cfqd, cfqq, "insert_request");
- cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc);
+ cfq_init_prio_data(cfqq, RQ_CIC(rq)->icq.ioc);
rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]);
list_add_tail(&rq->queuelist, &cfqq->fifo);
@@ -3539,7 +3245,7 @@ static void cfq_update_hw_tag(struct cfq_data *cfqd)
static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
- struct cfq_io_context *cic = cfqd->active_cic;
+ struct cfq_io_cq *cic = cfqd->active_cic;
/* If the queue already has requests, don't wait */
if (!RB_EMPTY_ROOT(&cfqq->sort_list))
@@ -3676,7 +3382,7 @@ static int cfq_may_queue(struct request_queue *q, int rw)
{
struct cfq_data *cfqd = q->elevator->elevator_data;
struct task_struct *tsk = current;
- struct cfq_io_context *cic;
+ struct cfq_io_cq *cic;
struct cfq_queue *cfqq;
/*
@@ -3691,7 +3397,7 @@ static int cfq_may_queue(struct request_queue *q, int rw)
cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
if (cfqq) {
- cfq_init_prio_data(cfqq, cic->ioc);
+ cfq_init_prio_data(cfqq, cic->icq.ioc);
return __cfq_may_queue(cfqq);
}
@@ -3712,21 +3418,17 @@ static void cfq_put_request(struct request *rq)
BUG_ON(!cfqq->allocated[rw]);
cfqq->allocated[rw]--;
- put_io_context(RQ_CIC(rq)->ioc);
-
- rq->elevator_private[0] = NULL;
- rq->elevator_private[1] = NULL;
-
/* Put down rq reference on cfqg */
cfq_put_cfqg(RQ_CFQG(rq));
- rq->elevator_private[2] = NULL;
+ rq->elv.priv[0] = NULL;
+ rq->elv.priv[1] = NULL;
cfq_put_queue(cfqq);
}
}
static struct cfq_queue *
-cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_context *cic,
+cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_cq *cic,
struct cfq_queue *cfqq)
{
cfq_log_cfqq(cfqd, cfqq, "merging with queue %p", cfqq->new_cfqq);
@@ -3741,7 +3443,7 @@ cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_context *cic,
* was the last process referring to said cfqq.
*/
static struct cfq_queue *
-split_cfqq(struct cfq_io_context *cic, struct cfq_queue *cfqq)
+split_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq)
{
if (cfqq_process_refs(cfqq) == 1) {
cfqq->pid = current->pid;
@@ -3764,25 +3466,29 @@ static int
cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
{
struct cfq_data *cfqd = q->elevator->elevator_data;
- struct cfq_io_context *cic;
+ struct cfq_io_cq *cic = icq_to_cic(rq->elv.icq);
const int rw = rq_data_dir(rq);
const bool is_sync = rq_is_sync(rq);
struct cfq_queue *cfqq;
- unsigned long flags;
might_sleep_if(gfp_mask & __GFP_WAIT);
- cic = cfq_get_io_context(cfqd, gfp_mask);
-
- spin_lock_irqsave(q->queue_lock, flags);
+ spin_lock_irq(q->queue_lock);
- if (!cic)
- goto queue_fail;
+ /* handle changed notifications */
+ if (unlikely(cic->icq.changed)) {
+ if (test_and_clear_bit(ICQ_IOPRIO_CHANGED, &cic->icq.changed))
+ changed_ioprio(cic);
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+ if (test_and_clear_bit(ICQ_CGROUP_CHANGED, &cic->icq.changed))
+ changed_cgroup(cic);
+#endif
+ }
new_queue:
cfqq = cic_to_cfqq(cic, is_sync);
if (!cfqq || cfqq == &cfqd->oom_cfqq) {
- cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask);
+ cfqq = cfq_get_queue(cfqd, is_sync, cic->icq.ioc, gfp_mask);
cic_set_cfqq(cic, cfqq, is_sync);
} else {
/*
@@ -3808,17 +3514,10 @@ new_queue:
cfqq->allocated[rw]++;
cfqq->ref++;
- rq->elevator_private[0] = cic;
- rq->elevator_private[1] = cfqq;
- rq->elevator_private[2] = cfq_ref_get_cfqg(cfqq->cfqg);
- spin_unlock_irqrestore(q->queue_lock, flags);
+ rq->elv.priv[0] = cfqq;
+ rq->elv.priv[1] = cfq_ref_get_cfqg(cfqq->cfqg);
+ spin_unlock_irq(q->queue_lock);
return 0;
-
-queue_fail:
- cfq_schedule_dispatch(cfqd);
- spin_unlock_irqrestore(q->queue_lock, flags);
- cfq_log(cfqd, "set_request fail");
- return 1;
}
static void cfq_kick_queue(struct work_struct *work)
@@ -3922,14 +3621,6 @@ static void cfq_exit_queue(struct elevator_queue *e)
if (cfqd->active_queue)
__cfq_slice_expired(cfqd, cfqd->active_queue, 0);
- while (!list_empty(&cfqd->cic_list)) {
- struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
- struct cfq_io_context,
- queue_list);
-
- __cfq_exit_single_io_context(cfqd, cic);
- }
-
cfq_put_async_queues(cfqd);
cfq_release_cfq_groups(cfqd);
@@ -3944,10 +3635,6 @@ static void cfq_exit_queue(struct elevator_queue *e)
cfq_shutdown_timer_wq(cfqd);
- spin_lock(&cic_index_lock);
- ida_remove(&cic_index_ida, cfqd->cic_index);
- spin_unlock(&cic_index_lock);
-
/*
* Wait for cfqg->blkg->key accessors to exit their grace periods.
* Do this wait only if there are other unlinked groups out
@@ -3969,24 +3656,6 @@ static void cfq_exit_queue(struct elevator_queue *e)
kfree(cfqd);
}
-static int cfq_alloc_cic_index(void)
-{
- int index, error;
-
- do {
- if (!ida_pre_get(&cic_index_ida, GFP_KERNEL))
- return -ENOMEM;
-
- spin_lock(&cic_index_lock);
- error = ida_get_new(&cic_index_ida, &index);
- spin_unlock(&cic_index_lock);
- if (error && error != -EAGAIN)
- return error;
- } while (error);
-
- return index;
-}
-
static void *cfq_init_queue(struct request_queue *q)
{
struct cfq_data *cfqd;
@@ -3994,23 +3663,9 @@ static void *cfq_init_queue(struct request_queue *q)
struct cfq_group *cfqg;
struct cfq_rb_root *st;
- i = cfq_alloc_cic_index();
- if (i < 0)
- return NULL;
-
cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
- if (!cfqd) {
- spin_lock(&cic_index_lock);
- ida_remove(&cic_index_ida, i);
- spin_unlock(&cic_index_lock);
+ if (!cfqd)
return NULL;
- }
-
- /*
- * Don't need take queue_lock in the routine, since we are
- * initializing the ioscheduler, and nobody is using cfqd
- */
- cfqd->cic_index = i;
/* Init root service tree */
cfqd->grp_service_tree = CFQ_RB_ROOT;
@@ -4067,8 +3722,6 @@ static void *cfq_init_queue(struct request_queue *q)
cfqd->oom_cfqq.ref++;
cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group);
- INIT_LIST_HEAD(&cfqd->cic_list);
-
cfqd->queue = q;
init_timer(&cfqd->idle_slice_timer);
@@ -4097,34 +3750,6 @@ static void *cfq_init_queue(struct request_queue *q)
return cfqd;
}
-static void cfq_slab_kill(void)
-{
- /*
- * Caller already ensured that pending RCU callbacks are completed,
- * so we should have no busy allocations at this point.
- */
- if (cfq_pool)
- kmem_cache_destroy(cfq_pool);
- if (cfq_ioc_pool)
- kmem_cache_destroy(cfq_ioc_pool);
-}
-
-static int __init cfq_slab_setup(void)
-{
- cfq_pool = KMEM_CACHE(cfq_queue, 0);
- if (!cfq_pool)
- goto fail;
-
- cfq_ioc_pool = KMEM_CACHE(cfq_io_context, 0);
- if (!cfq_ioc_pool)
- goto fail;
-
- return 0;
-fail:
- cfq_slab_kill();
- return -ENOMEM;
-}
-
/*
* sysfs parts below -->
*/
@@ -4230,15 +3855,18 @@ static struct elevator_type iosched_cfq = {
.elevator_completed_req_fn = cfq_completed_request,
.elevator_former_req_fn = elv_rb_former_request,
.elevator_latter_req_fn = elv_rb_latter_request,
+ .elevator_init_icq_fn = cfq_init_icq,
+ .elevator_exit_icq_fn = cfq_exit_icq,
.elevator_set_req_fn = cfq_set_request,
.elevator_put_req_fn = cfq_put_request,
.elevator_may_queue_fn = cfq_may_queue,
.elevator_init_fn = cfq_init_queue,
.elevator_exit_fn = cfq_exit_queue,
- .trim = cfq_free_io_context,
},
+ .icq_size = sizeof(struct cfq_io_cq),
+ .icq_align = __alignof__(struct cfq_io_cq),
.elevator_attrs = cfq_attrs,
- .elevator_name = "cfq",
+ .elevator_name = "cfq",
.elevator_owner = THIS_MODULE,
};
@@ -4256,6 +3884,8 @@ static struct blkio_policy_type blkio_policy_cfq;
static int __init cfq_init(void)
{
+ int ret;
+
/*
* could be 0 on HZ < 1000 setups
*/
@@ -4270,10 +3900,16 @@ static int __init cfq_init(void)
#else
cfq_group_idle = 0;
#endif
- if (cfq_slab_setup())
+ cfq_pool = KMEM_CACHE(cfq_queue, 0);
+ if (!cfq_pool)
return -ENOMEM;
- elv_register(&iosched_cfq);
+ ret = elv_register(&iosched_cfq);
+ if (ret) {
+ kmem_cache_destroy(cfq_pool);
+ return ret;
+ }
+
blkio_policy_register(&blkio_policy_cfq);
return 0;
@@ -4281,21 +3917,9 @@ static int __init cfq_init(void)
static void __exit cfq_exit(void)
{
- DECLARE_COMPLETION_ONSTACK(all_gone);
blkio_policy_unregister(&blkio_policy_cfq);
elv_unregister(&iosched_cfq);
- ioc_gone = &all_gone;
- /* ioc_gone's update must be visible before reading ioc_count */
- smp_wmb();
-
- /*
- * this also protects us from entering cfq_slab_kill() with
- * pending RCU callbacks
- */
- if (elv_ioc_count_read(cfq_ioc_count))
- wait_for_completion(&all_gone);
- ida_destroy(&cic_index_ida);
- cfq_slab_kill();
+ kmem_cache_destroy(cfq_pool);
}
module_init(cfq_init);
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 7b725020823c..7c668c8a6f95 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -719,6 +719,9 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
case BLKSECTGET:
return compat_put_ushort(arg,
queue_max_sectors(bdev_get_queue(bdev)));
+ case BLKROTATIONAL:
+ return compat_put_ushort(arg,
+ !blk_queue_nonrot(bdev_get_queue(bdev)));
case BLKRASET: /* compatible, but no compat_ptr (!) */
case BLKFRASET:
if (!capable(CAP_SYS_ADMIN))
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index c644137d9cd6..7bf12d793fcd 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -448,9 +448,7 @@ static struct elevator_type iosched_deadline = {
static int __init deadline_init(void)
{
- elv_register(&iosched_deadline);
-
- return 0;
+ return elv_register(&iosched_deadline);
}
static void __exit deadline_exit(void)
diff --git a/block/elevator.c b/block/elevator.c
index 66343d6917d0..f016855a46b0 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -61,8 +61,8 @@ static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
- if (e->ops->elevator_allow_merge_fn)
- return e->ops->elevator_allow_merge_fn(q, rq, bio);
+ if (e->type->ops.elevator_allow_merge_fn)
+ return e->type->ops.elevator_allow_merge_fn(q, rq, bio);
return 1;
}
@@ -70,39 +70,9 @@ static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
/*
* can we safely merge with this request?
*/
-int elv_rq_merge_ok(struct request *rq, struct bio *bio)
+bool elv_rq_merge_ok(struct request *rq, struct bio *bio)
{
- if (!rq_mergeable(rq))
- return 0;
-
- /*
- * Don't merge file system requests and discard requests
- */
- if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD))
- return 0;
-
- /*
- * Don't merge discard requests and secure discard requests
- */
- if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE))
- return 0;
-
- /*
- * different data direction or already started, don't merge
- */
- if (bio_data_dir(bio) != rq_data_dir(rq))
- return 0;
-
- /*
- * must be same device and not a special request
- */
- if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
- return 0;
-
- /*
- * only merge integrity protected bio into ditto rq
- */
- if (bio_integrity(bio) != blk_integrity_rq(rq))
+ if (!blk_rq_merge_ok(rq, bio))
return 0;
if (!elv_iosched_allow_merge(rq, bio))
@@ -112,23 +82,6 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
}
EXPORT_SYMBOL(elv_rq_merge_ok);
-int elv_try_merge(struct request *__rq, struct bio *bio)
-{
- int ret = ELEVATOR_NO_MERGE;
-
- /*
- * we can merge and sequence is ok, check if it's possible
- */
- if (elv_rq_merge_ok(__rq, bio)) {
- if (blk_rq_pos(__rq) + blk_rq_sectors(__rq) == bio->bi_sector)
- ret = ELEVATOR_BACK_MERGE;
- else if (blk_rq_pos(__rq) - bio_sectors(bio) == bio->bi_sector)
- ret = ELEVATOR_FRONT_MERGE;
- }
-
- return ret;
-}
-
static struct elevator_type *elevator_find(const char *name)
{
struct elevator_type *e;
@@ -168,17 +121,13 @@ static struct elevator_type *elevator_get(const char *name)
return e;
}
-static void *elevator_init_queue(struct request_queue *q,
- struct elevator_queue *eq)
-{
- return eq->ops->elevator_init_fn(q);
-}
-
-static void elevator_attach(struct request_queue *q, struct elevator_queue *eq,
- void *data)
+static int elevator_init_queue(struct request_queue *q,
+ struct elevator_queue *eq)
{
- q->elevator = eq;
- eq->elevator_data = data;
+ eq->elevator_data = eq->type->ops.elevator_init_fn(q);
+ if (eq->elevator_data)
+ return 0;
+ return -ENOMEM;
}
static char chosen_elevator[ELV_NAME_MAX];
@@ -207,8 +156,7 @@ static struct elevator_queue *elevator_alloc(struct request_queue *q,
if (unlikely(!eq))
goto err;
- eq->ops = &e->ops;
- eq->elevator_type = e;
+ eq->type = e;
kobject_init(&eq->kobj, &elv_ktype);
mutex_init(&eq->sysfs_lock);
@@ -232,7 +180,7 @@ static void elevator_release(struct kobject *kobj)
struct elevator_queue *e;
e = container_of(kobj, struct elevator_queue, kobj);
- elevator_put(e->elevator_type);
+ elevator_put(e->type);
kfree(e->hash);
kfree(e);
}
@@ -241,7 +189,7 @@ int elevator_init(struct request_queue *q, char *name)
{
struct elevator_type *e = NULL;
struct elevator_queue *eq;
- void *data;
+ int err;
if (unlikely(q->elevator))
return 0;
@@ -278,13 +226,13 @@ int elevator_init(struct request_queue *q, char *name)
if (!eq)
return -ENOMEM;
- data = elevator_init_queue(q, eq);
- if (!data) {
+ err = elevator_init_queue(q, eq);
+ if (err) {
kobject_put(&eq->kobj);
- return -ENOMEM;
+ return err;
}
- elevator_attach(q, eq, data);
+ q->elevator = eq;
return 0;
}
EXPORT_SYMBOL(elevator_init);
@@ -292,9 +240,8 @@ EXPORT_SYMBOL(elevator_init);
void elevator_exit(struct elevator_queue *e)
{
mutex_lock(&e->sysfs_lock);
- if (e->ops->elevator_exit_fn)
- e->ops->elevator_exit_fn(e);
- e->ops = NULL;
+ if (e->type->ops.elevator_exit_fn)
+ e->type->ops.elevator_exit_fn(e);
mutex_unlock(&e->sysfs_lock);
kobject_put(&e->kobj);
@@ -484,8 +431,8 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
/*
* First try one-hit cache.
*/
- if (q->last_merge) {
- ret = elv_try_merge(q->last_merge, bio);
+ if (q->last_merge && elv_rq_merge_ok(q->last_merge, bio)) {
+ ret = blk_try_merge(q->last_merge, bio);
if (ret != ELEVATOR_NO_MERGE) {
*req = q->last_merge;
return ret;
@@ -504,8 +451,8 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
return ELEVATOR_BACK_MERGE;
}
- if (e->ops->elevator_merge_fn)
- return e->ops->elevator_merge_fn(q, req, bio);
+ if (e->type->ops.elevator_merge_fn)
+ return e->type->ops.elevator_merge_fn(q, req, bio);
return ELEVATOR_NO_MERGE;
}
@@ -548,8 +495,8 @@ void elv_merged_request(struct request_queue *q, struct request *rq, int type)
{
struct elevator_queue *e = q->elevator;
- if (e->ops->elevator_merged_fn)
- e->ops->elevator_merged_fn(q, rq, type);
+ if (e->type->ops.elevator_merged_fn)
+ e->type->ops.elevator_merged_fn(q, rq, type);
if (type == ELEVATOR_BACK_MERGE)
elv_rqhash_reposition(q, rq);
@@ -563,8 +510,8 @@ void elv_merge_requests(struct request_queue *q, struct request *rq,
struct elevator_queue *e = q->elevator;
const int next_sorted = next->cmd_flags & REQ_SORTED;
- if (next_sorted && e->ops->elevator_merge_req_fn)
- e->ops->elevator_merge_req_fn(q, rq, next);
+ if (next_sorted && e->type->ops.elevator_merge_req_fn)
+ e->type->ops.elevator_merge_req_fn(q, rq, next);
elv_rqhash_reposition(q, rq);
@@ -581,8 +528,8 @@ void elv_bio_merged(struct request_queue *q, struct request *rq,
{
struct elevator_queue *e = q->elevator;
- if (e->ops->elevator_bio_merged_fn)
- e->ops->elevator_bio_merged_fn(q, rq, bio);
+ if (e->type->ops.elevator_bio_merged_fn)
+ e->type->ops.elevator_bio_merged_fn(q, rq, bio);
}
void elv_requeue_request(struct request_queue *q, struct request *rq)
@@ -608,12 +555,12 @@ void elv_drain_elevator(struct request_queue *q)
lockdep_assert_held(q->queue_lock);
- while (q->elevator->ops->elevator_dispatch_fn(q, 1))
+ while (q->elevator->type->ops.elevator_dispatch_fn(q, 1))
;
if (q->nr_sorted && printed++ < 10) {
printk(KERN_ERR "%s: forced dispatching is broken "
"(nr_sorted=%u), please report this\n",
- q->elevator->elevator_type->elevator_name, q->nr_sorted);
+ q->elevator->type->elevator_name, q->nr_sorted);
}
}
@@ -702,7 +649,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
* rq cannot be accessed after calling
* elevator_add_req_fn.
*/
- q->elevator->ops->elevator_add_req_fn(q, rq);
+ q->elevator->type->ops.elevator_add_req_fn(q, rq);
break;
case ELEVATOR_INSERT_FLUSH:
@@ -731,8 +678,8 @@ struct request *elv_latter_request(struct request_queue *q, struct request *rq)
{
struct elevator_queue *e = q->elevator;
- if (e->ops->elevator_latter_req_fn)
- return e->ops->elevator_latter_req_fn(q, rq);
+ if (e->type->ops.elevator_latter_req_fn)
+ return e->type->ops.elevator_latter_req_fn(q, rq);
return NULL;
}
@@ -740,8 +687,8 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq)
{
struct elevator_queue *e = q->elevator;
- if (e->ops->elevator_former_req_fn)
- return e->ops->elevator_former_req_fn(q, rq);
+ if (e->type->ops.elevator_former_req_fn)
+ return e->type->ops.elevator_former_req_fn(q, rq);
return NULL;
}
@@ -749,10 +696,8 @@ int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
{
struct elevator_queue *e = q->elevator;
- if (e->ops->elevator_set_req_fn)
- return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
-
- rq->elevator_private[0] = NULL;
+ if (e->type->ops.elevator_set_req_fn)
+ return e->type->ops.elevator_set_req_fn(q, rq, gfp_mask);
return 0;
}
@@ -760,16 +705,16 @@ void elv_put_request(struct request_queue *q, struct request *rq)
{
struct elevator_queue *e = q->elevator;
- if (e->ops->elevator_put_req_fn)
- e->ops->elevator_put_req_fn(rq);
+ if (e->type->ops.elevator_put_req_fn)
+ e->type->ops.elevator_put_req_fn(rq);
}
int elv_may_queue(struct request_queue *q, int rw)
{
struct elevator_queue *e = q->elevator;
- if (e->ops->elevator_may_queue_fn)
- return e->ops->elevator_may_queue_fn(q, rw);
+ if (e->type->ops.elevator_may_queue_fn)
+ return e->type->ops.elevator_may_queue_fn(q, rw);
return ELV_MQUEUE_MAY;
}
@@ -804,8 +749,8 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
if (blk_account_rq(rq)) {
q->in_flight[rq_is_sync(rq)]--;
if ((rq->cmd_flags & REQ_SORTED) &&
- e->ops->elevator_completed_req_fn)
- e->ops->elevator_completed_req_fn(q, rq);
+ e->type->ops.elevator_completed_req_fn)
+ e->type->ops.elevator_completed_req_fn(q, rq);
}
}
@@ -823,7 +768,7 @@ elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
e = container_of(kobj, struct elevator_queue, kobj);
mutex_lock(&e->sysfs_lock);
- error = e->ops ? entry->show(e, page) : -ENOENT;
+ error = e->type ? entry->show(e, page) : -ENOENT;
mutex_unlock(&e->sysfs_lock);
return error;
}
@@ -841,7 +786,7 @@ elv_attr_store(struct kobject *kobj, struct attribute *attr,
e = container_of(kobj, struct elevator_queue, kobj);
mutex_lock(&e->sysfs_lock);
- error = e->ops ? entry->store(e, page, length) : -ENOENT;
+ error = e->type ? entry->store(e, page, length) : -ENOENT;
mutex_unlock(&e->sysfs_lock);
return error;
}
@@ -856,14 +801,13 @@ static struct kobj_type elv_ktype = {
.release = elevator_release,
};
-int elv_register_queue(struct request_queue *q)
+int __elv_register_queue(struct request_queue *q, struct elevator_queue *e)
{
- struct elevator_queue *e = q->elevator;
int error;
error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
if (!error) {
- struct elv_fs_entry *attr = e->elevator_type->elevator_attrs;
+ struct elv_fs_entry *attr = e->type->elevator_attrs;
if (attr) {
while (attr->attr.name) {
if (sysfs_create_file(&e->kobj, &attr->attr))
@@ -876,31 +820,55 @@ int elv_register_queue(struct request_queue *q)
}
return error;
}
-EXPORT_SYMBOL(elv_register_queue);
-static void __elv_unregister_queue(struct elevator_queue *e)
+int elv_register_queue(struct request_queue *q)
{
- kobject_uevent(&e->kobj, KOBJ_REMOVE);
- kobject_del(&e->kobj);
- e->registered = 0;
+ return __elv_register_queue(q, q->elevator);
}
+EXPORT_SYMBOL(elv_register_queue);
void elv_unregister_queue(struct request_queue *q)
{
- if (q)
- __elv_unregister_queue(q->elevator);
+ if (q) {
+ struct elevator_queue *e = q->elevator;
+
+ kobject_uevent(&e->kobj, KOBJ_REMOVE);
+ kobject_del(&e->kobj);
+ e->registered = 0;
+ }
}
EXPORT_SYMBOL(elv_unregister_queue);
-void elv_register(struct elevator_type *e)
+int elv_register(struct elevator_type *e)
{
char *def = "";
+ /* create icq_cache if requested */
+ if (e->icq_size) {
+ if (WARN_ON(e->icq_size < sizeof(struct io_cq)) ||
+ WARN_ON(e->icq_align < __alignof__(struct io_cq)))
+ return -EINVAL;
+
+ snprintf(e->icq_cache_name, sizeof(e->icq_cache_name),
+ "%s_io_cq", e->elevator_name);
+ e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size,
+ e->icq_align, 0, NULL);
+ if (!e->icq_cache)
+ return -ENOMEM;
+ }
+
+ /* register, don't allow duplicate names */
spin_lock(&elv_list_lock);
- BUG_ON(elevator_find(e->elevator_name));
+ if (elevator_find(e->elevator_name)) {
+ spin_unlock(&elv_list_lock);
+ if (e->icq_cache)
+ kmem_cache_destroy(e->icq_cache);
+ return -EBUSY;
+ }
list_add_tail(&e->list, &elv_list);
spin_unlock(&elv_list_lock);
+ /* print pretty message */
if (!strcmp(e->elevator_name, chosen_elevator) ||
(!*chosen_elevator &&
!strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
@@ -908,30 +876,26 @@ void elv_register(struct elevator_type *e)
printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name,
def);
+ return 0;
}
EXPORT_SYMBOL_GPL(elv_register);
void elv_unregister(struct elevator_type *e)
{
- struct task_struct *g, *p;
+ /* unregister */
+ spin_lock(&elv_list_lock);
+ list_del_init(&e->list);
+ spin_unlock(&elv_list_lock);
/*
- * Iterate every thread in the process to remove the io contexts.
+ * Destroy icq_cache if it exists. icq's are RCU managed. Make
+ * sure all RCU operations are complete before proceeding.
*/
- if (e->ops.trim) {
- read_lock(&tasklist_lock);
- do_each_thread(g, p) {
- task_lock(p);
- if (p->io_context)
- e->ops.trim(p->io_context);
- task_unlock(p);
- } while_each_thread(g, p);
- read_unlock(&tasklist_lock);
+ if (e->icq_cache) {
+ rcu_barrier();
+ kmem_cache_destroy(e->icq_cache);
+ e->icq_cache = NULL;
}
-
- spin_lock(&elv_list_lock);
- list_del_init(&e->list);
- spin_unlock(&elv_list_lock);
}
EXPORT_SYMBOL_GPL(elv_unregister);
@@ -944,54 +908,41 @@ EXPORT_SYMBOL_GPL(elv_unregister);
static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
{
struct elevator_queue *old_elevator, *e;
- void *data;
int err;
- /*
- * Allocate new elevator
- */
+ /* allocate new elevator */
e = elevator_alloc(q, new_e);
if (!e)
return -ENOMEM;
- data = elevator_init_queue(q, e);
- if (!data) {
+ err = elevator_init_queue(q, e);
+ if (err) {
kobject_put(&e->kobj);
- return -ENOMEM;
+ return err;
}
- /*
- * Turn on BYPASS and drain all requests w/ elevator private data
- */
+ /* turn on BYPASS and drain all requests w/ elevator private data */
elv_quiesce_start(q);
- /*
- * Remember old elevator.
- */
- old_elevator = q->elevator;
-
- /*
- * attach and start new elevator
- */
- spin_lock_irq(q->queue_lock);
- elevator_attach(q, e, data);
- spin_unlock_irq(q->queue_lock);
-
- if (old_elevator->registered) {
- __elv_unregister_queue(old_elevator);
-
- err = elv_register_queue(q);
+ /* unregister old queue, register new one and kill old elevator */
+ if (q->elevator->registered) {
+ elv_unregister_queue(q);
+ err = __elv_register_queue(q, e);
if (err)
goto fail_register;
}
- /*
- * finally exit old elevator and turn off BYPASS.
- */
+ /* done, clear io_cq's, switch elevators and turn off BYPASS */
+ spin_lock_irq(q->queue_lock);
+ ioc_clear_queue(q);
+ old_elevator = q->elevator;
+ q->elevator = e;
+ spin_unlock_irq(q->queue_lock);
+
elevator_exit(old_elevator);
elv_quiesce_end(q);
- blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name);
+ blk_add_trace_msg(q, "elv switch: %s", e->type->elevator_name);
return 0;
@@ -1001,7 +952,6 @@ fail_register:
* one again (along with re-adding the sysfs dir)
*/
elevator_exit(e);
- q->elevator = old_elevator;
elv_register_queue(q);
elv_quiesce_end(q);
@@ -1026,7 +976,7 @@ int elevator_change(struct request_queue *q, const char *name)
return -EINVAL;
}
- if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
+ if (!strcmp(elevator_name, q->elevator->type->elevator_name)) {
elevator_put(e);
return 0;
}
@@ -1061,7 +1011,7 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name)
if (!q->elevator || !blk_queue_stackable(q))
return sprintf(name, "none\n");
- elv = e->elevator_type;
+ elv = e->type;
spin_lock(&elv_list_lock);
list_for_each_entry(__e, &elv_list, list) {
diff --git a/block/genhd.c b/block/genhd.c
index 02e9fca80825..23b4f7063322 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -15,7 +15,6 @@
#include <linux/slab.h>
#include <linux/kmod.h>
#include <linux/kobj_map.h>
-#include <linux/buffer_head.h>
#include <linux/mutex.h>
#include <linux/idr.h>
#include <linux/log2.h>
@@ -507,7 +506,7 @@ static int exact_lock(dev_t devt, void *data)
return 0;
}
-void register_disk(struct gendisk *disk)
+static void register_disk(struct gendisk *disk)
{
struct device *ddev = disk_to_dev(disk);
struct block_device *bdev;
@@ -615,7 +614,7 @@ void add_disk(struct gendisk *disk)
* Take an extra ref on queue which will be put on disk_release()
* so that it sticks around as long as @disk is there.
*/
- WARN_ON_ONCE(blk_get_queue(disk->queue));
+ WARN_ON_ONCE(!blk_get_queue(disk->queue));
retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
"bdi");
@@ -1109,7 +1108,7 @@ struct class block_class = {
.name = "block",
};
-static char *block_devnode(struct device *dev, mode_t *mode)
+static char *block_devnode(struct device *dev, umode_t *mode)
{
struct gendisk *disk = dev_to_disk(dev);
diff --git a/block/ioctl.c b/block/ioctl.c
index ca939fc1030f..ba15b2dbfb98 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -5,7 +5,7 @@
#include <linux/blkpg.h>
#include <linux/hdreg.h>
#include <linux/backing-dev.h>
-#include <linux/buffer_head.h>
+#include <linux/fs.h>
#include <linux/blktrace_api.h>
#include <asm/uaccess.h>
@@ -180,6 +180,26 @@ int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode,
EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl);
/*
+ * Is it an unrecognized ioctl? The correct returns are either
+ * ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a
+ * fallback"). ENOIOCTLCMD gets turned into ENOTTY by the ioctl
+ * code before returning.
+ *
+ * Confused drivers sometimes return EINVAL, which is wrong. It
+ * means "I understood the ioctl command, but the parameters to
+ * it were wrong".
+ *
+ * We should aim to just fix the broken drivers, the EINVAL case
+ * should go away.
+ */
+static inline int is_unrecognized_ioctl(int ret)
+{
+ return ret == -EINVAL ||
+ ret == -ENOTTY ||
+ ret == -ENOIOCTLCMD;
+}
+
+/*
* always keep this in sync with compat_blkdev_ioctl()
*/
int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
@@ -196,8 +216,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
return -EACCES;
ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
- /* -EINVAL to handle old uncorrected drivers */
- if (ret != -EINVAL && ret != -ENOTTY)
+ if (!is_unrecognized_ioctl(ret))
return ret;
fsync_bdev(bdev);
@@ -206,8 +225,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
case BLKROSET:
ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
- /* -EINVAL to handle old uncorrected drivers */
- if (ret != -EINVAL && ret != -ENOTTY)
+ if (!is_unrecognized_ioctl(ret))
return ret;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
@@ -278,6 +296,8 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
return put_uint(arg, bdev_discard_zeroes_data(bdev));
case BLKSECTGET:
return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev)));
+ case BLKROTATIONAL:
+ return put_ushort(arg, !blk_queue_nonrot(bdev_get_queue(bdev)));
case BLKRASET:
case BLKFRASET:
if(!capable(CAP_SYS_ADMIN))
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
index 06389e9ef96d..413a0b1d788c 100644
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -94,9 +94,7 @@ static struct elevator_type elevator_noop = {
static int __init noop_init(void)
{
- elv_register(&elevator_noop);
-
- return 0;
+ return elv_register(&elevator_noop);
}
static void __exit noop_exit(void)
diff --git a/fs/partitions/check.c b/block/partition-generic.c
index e3c63d1c5e13..d06ec1c829c2 100644
--- a/fs/partitions/check.c
+++ b/block/partition-generic.c
@@ -1,6 +1,4 @@
/*
- * fs/partitions/check.c
- *
* Code extracted from drivers/block/genhd.c
* Copyright (C) 1991-1998 Linus Torvalds
* Re-organised Feb 1998 Russell King
@@ -9,8 +7,6 @@
* block drivers, which allows all the partition code to
* be grouped in one location, and it to be mostly self
* contained.
- *
- * Added needed MAJORS for new pairs, {hdi,hdj}, {hdk,hdl}
*/
#include <linux/init.h>
@@ -22,98 +18,11 @@
#include <linux/genhd.h>
#include <linux/blktrace_api.h>
-#include "check.h"
-
-#include "acorn.h"
-#include "amiga.h"
-#include "atari.h"
-#include "ldm.h"
-#include "mac.h"
-#include "msdos.h"
-#include "osf.h"
-#include "sgi.h"
-#include "sun.h"
-#include "ibm.h"
-#include "ultrix.h"
-#include "efi.h"
-#include "karma.h"
-#include "sysv68.h"
+#include "partitions/check.h"
#ifdef CONFIG_BLK_DEV_MD
extern void md_autodetect_dev(dev_t dev);
#endif
-
-int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/
-
-static int (*check_part[])(struct parsed_partitions *) = {
- /*
- * Probe partition formats with tables at disk address 0
- * that also have an ADFS boot block at 0xdc0.
- */
-#ifdef CONFIG_ACORN_PARTITION_ICS
- adfspart_check_ICS,
-#endif
-#ifdef CONFIG_ACORN_PARTITION_POWERTEC
- adfspart_check_POWERTEC,
-#endif
-#ifdef CONFIG_ACORN_PARTITION_EESOX
- adfspart_check_EESOX,
-#endif
-
- /*
- * Now move on to formats that only have partition info at
- * disk address 0xdc0. Since these may also have stale
- * PC/BIOS partition tables, they need to come before
- * the msdos entry.
- */
-#ifdef CONFIG_ACORN_PARTITION_CUMANA
- adfspart_check_CUMANA,
-#endif
-#ifdef CONFIG_ACORN_PARTITION_ADFS
- adfspart_check_ADFS,
-#endif
-
-#ifdef CONFIG_EFI_PARTITION
- efi_partition, /* this must come before msdos */
-#endif
-#ifdef CONFIG_SGI_PARTITION
- sgi_partition,
-#endif
-#ifdef CONFIG_LDM_PARTITION
- ldm_partition, /* this must come before msdos */
-#endif
-#ifdef CONFIG_MSDOS_PARTITION
- msdos_partition,
-#endif
-#ifdef CONFIG_OSF_PARTITION
- osf_partition,
-#endif
-#ifdef CONFIG_SUN_PARTITION
- sun_partition,
-#endif
-#ifdef CONFIG_AMIGA_PARTITION
- amiga_partition,
-#endif
-#ifdef CONFIG_ATARI_PARTITION
- atari_partition,
-#endif
-#ifdef CONFIG_MAC_PARTITION
- mac_partition,
-#endif
-#ifdef CONFIG_ULTRIX_PARTITION
- ultrix_partition,
-#endif
-#ifdef CONFIG_IBM_PARTITION
- ibm_partition,
-#endif
-#ifdef CONFIG_KARMA_PARTITION
- karma_partition,
-#endif
-#ifdef CONFIG_SYSV68_PARTITION
- sysv68_partition,
-#endif
- NULL
-};
/*
* disk_name() is used by partition check code and the genhd driver.
@@ -155,65 +64,6 @@ const char *__bdevname(dev_t dev, char *buffer)
EXPORT_SYMBOL(__bdevname);
-static struct parsed_partitions *
-check_partition(struct gendisk *hd, struct block_device *bdev)
-{
- struct parsed_partitions *state;
- int i, res, err;
-
- state = kzalloc(sizeof(struct parsed_partitions), GFP_KERNEL);
- if (!state)
- return NULL;
- state->pp_buf = (char *)__get_free_page(GFP_KERNEL);
- if (!state->pp_buf) {
- kfree(state);
- return NULL;
- }
- state->pp_buf[0] = '\0';
-
- state->bdev = bdev;
- disk_name(hd, 0, state->name);
- snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name);
- if (isdigit(state->name[strlen(state->name)-1]))
- sprintf(state->name, "p");
-
- state->limit = disk_max_parts(hd);
- i = res = err = 0;
- while (!res && check_part[i]) {
- memset(&state->parts, 0, sizeof(state->parts));
- res = check_part[i++](state);
- if (res < 0) {
- /* We have hit an I/O error which we don't report now.
- * But record it, and let the others do their job.
- */
- err = res;
- res = 0;
- }
-
- }
- if (res > 0) {
- printk(KERN_INFO "%s", state->pp_buf);
-
- free_page((unsigned long)state->pp_buf);
- return state;
- }
- if (state->access_beyond_eod)
- err = -ENOSPC;
- if (err)
- /* The partition is unrecognized. So report I/O errors if there were any */
- res = err;
- if (!res)
- strlcat(state->pp_buf, " unknown partition table\n", PAGE_SIZE);
- else if (warn_no_part)
- strlcat(state->pp_buf, " unable to read partition table\n", PAGE_SIZE);
-
- printk(KERN_INFO "%s", state->pp_buf);
-
- free_page((unsigned long)state->pp_buf);
- kfree(state);
- return ERR_PTR(res);
-}
-
static ssize_t part_partition_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
diff --git a/fs/partitions/Kconfig b/block/partitions/Kconfig
index cb5f0a3f1b03..cb5f0a3f1b03 100644
--- a/fs/partitions/Kconfig
+++ b/block/partitions/Kconfig
diff --git a/fs/partitions/Makefile b/block/partitions/Makefile
index 03af8eac51da..03af8eac51da 100644
--- a/fs/partitions/Makefile
+++ b/block/partitions/Makefile
diff --git a/fs/partitions/acorn.c b/block/partitions/acorn.c
index fbeb697374d5..fbeb697374d5 100644
--- a/fs/partitions/acorn.c
+++ b/block/partitions/acorn.c
diff --git a/fs/partitions/acorn.h b/block/partitions/acorn.h
index ede828529692..ede828529692 100644
--- a/fs/partitions/acorn.h
+++ b/block/partitions/acorn.h
diff --git a/fs/partitions/amiga.c b/block/partitions/amiga.c
index 70cbf44a1560..70cbf44a1560 100644
--- a/fs/partitions/amiga.c
+++ b/block/partitions/amiga.c
diff --git a/fs/partitions/amiga.h b/block/partitions/amiga.h
index d094585cadaa..d094585cadaa 100644
--- a/fs/partitions/amiga.h
+++ b/block/partitions/amiga.h
diff --git a/fs/partitions/atari.c b/block/partitions/atari.c
index 9875b05e80a2..9875b05e80a2 100644
--- a/fs/partitions/atari.c
+++ b/block/partitions/atari.c
diff --git a/fs/partitions/atari.h b/block/partitions/atari.h
index fe2d32a89f36..fe2d32a89f36 100644
--- a/fs/partitions/atari.h
+++ b/block/partitions/atari.h
diff --git a/block/partitions/check.c b/block/partitions/check.c
new file mode 100644
index 000000000000..bc908672c976
--- /dev/null
+++ b/block/partitions/check.c
@@ -0,0 +1,166 @@
+/*
+ * fs/partitions/check.c
+ *
+ * Code extracted from drivers/block/genhd.c
+ * Copyright (C) 1991-1998 Linus Torvalds
+ * Re-organised Feb 1998 Russell King
+ *
+ * We now have independent partition support from the
+ * block drivers, which allows all the partition code to
+ * be grouped in one location, and it to be mostly self
+ * contained.
+ *
+ * Added needed MAJORS for new pairs, {hdi,hdj}, {hdk,hdl}
+ */
+
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <linux/genhd.h>
+
+#include "check.h"
+
+#include "acorn.h"
+#include "amiga.h"
+#include "atari.h"
+#include "ldm.h"
+#include "mac.h"
+#include "msdos.h"
+#include "osf.h"
+#include "sgi.h"
+#include "sun.h"
+#include "ibm.h"
+#include "ultrix.h"
+#include "efi.h"
+#include "karma.h"
+#include "sysv68.h"
+
+int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/
+
+static int (*check_part[])(struct parsed_partitions *) = {
+ /*
+ * Probe partition formats with tables at disk address 0
+ * that also have an ADFS boot block at 0xdc0.
+ */
+#ifdef CONFIG_ACORN_PARTITION_ICS
+ adfspart_check_ICS,
+#endif
+#ifdef CONFIG_ACORN_PARTITION_POWERTEC
+ adfspart_check_POWERTEC,
+#endif
+#ifdef CONFIG_ACORN_PARTITION_EESOX
+ adfspart_check_EESOX,
+#endif
+
+ /*
+ * Now move on to formats that only have partition info at
+ * disk address 0xdc0. Since these may also have stale
+ * PC/BIOS partition tables, they need to come before
+ * the msdos entry.
+ */
+#ifdef CONFIG_ACORN_PARTITION_CUMANA
+ adfspart_check_CUMANA,
+#endif
+#ifdef CONFIG_ACORN_PARTITION_ADFS
+ adfspart_check_ADFS,
+#endif
+
+#ifdef CONFIG_EFI_PARTITION
+ efi_partition, /* this must come before msdos */
+#endif
+#ifdef CONFIG_SGI_PARTITION
+ sgi_partition,
+#endif
+#ifdef CONFIG_LDM_PARTITION
+ ldm_partition, /* this must come before msdos */
+#endif
+#ifdef CONFIG_MSDOS_PARTITION
+ msdos_partition,
+#endif
+#ifdef CONFIG_OSF_PARTITION
+ osf_partition,
+#endif
+#ifdef CONFIG_SUN_PARTITION
+ sun_partition,
+#endif
+#ifdef CONFIG_AMIGA_PARTITION
+ amiga_partition,
+#endif
+#ifdef CONFIG_ATARI_PARTITION
+ atari_partition,
+#endif
+#ifdef CONFIG_MAC_PARTITION
+ mac_partition,
+#endif
+#ifdef CONFIG_ULTRIX_PARTITION
+ ultrix_partition,
+#endif
+#ifdef CONFIG_IBM_PARTITION
+ ibm_partition,
+#endif
+#ifdef CONFIG_KARMA_PARTITION
+ karma_partition,
+#endif
+#ifdef CONFIG_SYSV68_PARTITION
+ sysv68_partition,
+#endif
+ NULL
+};
+
+struct parsed_partitions *
+check_partition(struct gendisk *hd, struct block_device *bdev)
+{
+ struct parsed_partitions *state;
+ int i, res, err;
+
+ state = kzalloc(sizeof(struct parsed_partitions), GFP_KERNEL);
+ if (!state)
+ return NULL;
+ state->pp_buf = (char *)__get_free_page(GFP_KERNEL);
+ if (!state->pp_buf) {
+ kfree(state);
+ return NULL;
+ }
+ state->pp_buf[0] = '\0';
+
+ state->bdev = bdev;
+ disk_name(hd, 0, state->name);
+ snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name);
+ if (isdigit(state->name[strlen(state->name)-1]))
+ sprintf(state->name, "p");
+
+ state->limit = disk_max_parts(hd);
+ i = res = err = 0;
+ while (!res && check_part[i]) {
+ memset(&state->parts, 0, sizeof(state->parts));
+ res = check_part[i++](state);
+ if (res < 0) {
+ /* We have hit an I/O error which we don't report now.
+ * But record it, and let the others do their job.
+ */
+ err = res;
+ res = 0;
+ }
+
+ }
+ if (res > 0) {
+ printk(KERN_INFO "%s", state->pp_buf);
+
+ free_page((unsigned long)state->pp_buf);
+ return state;
+ }
+ if (state->access_beyond_eod)
+ err = -ENOSPC;
+ if (err)
+ /* The partition is unrecognized. So report I/O errors if there were any */
+ res = err;
+ if (!res)
+ strlcat(state->pp_buf, " unknown partition table\n", PAGE_SIZE);
+ else if (warn_no_part)
+ strlcat(state->pp_buf, " unable to read partition table\n", PAGE_SIZE);
+
+ printk(KERN_INFO "%s", state->pp_buf);
+
+ free_page((unsigned long)state->pp_buf);
+ kfree(state);
+ return ERR_PTR(res);
+}
diff --git a/fs/partitions/check.h b/block/partitions/check.h
index d68bf4dc3bc2..52b100311ec3 100644
--- a/fs/partitions/check.h
+++ b/block/partitions/check.h
@@ -22,6 +22,9 @@ struct parsed_partitions {
char *pp_buf;
};
+struct parsed_partitions *
+check_partition(struct gendisk *, struct block_device *);
+
static inline void *read_part_sector(struct parsed_partitions *state,
sector_t n, Sector *p)
{
diff --git a/fs/partitions/efi.c b/block/partitions/efi.c
index 6296b403c67a..6296b403c67a 100644
--- a/fs/partitions/efi.c
+++ b/block/partitions/efi.c
diff --git a/fs/partitions/efi.h b/block/partitions/efi.h
index b69ab729558f..b69ab729558f 100644
--- a/fs/partitions/efi.h
+++ b/block/partitions/efi.h
diff --git a/fs/partitions/ibm.c b/block/partitions/ibm.c
index d513a07f44bb..d513a07f44bb 100644
--- a/fs/partitions/ibm.c
+++ b/block/partitions/ibm.c
diff --git a/fs/partitions/ibm.h b/block/partitions/ibm.h
index 08fb0804a812..08fb0804a812 100644
--- a/fs/partitions/ibm.h
+++ b/block/partitions/ibm.h
diff --git a/fs/partitions/karma.c b/block/partitions/karma.c
index 0ea19312706b..0ea19312706b 100644
--- a/fs/partitions/karma.c
+++ b/block/partitions/karma.c
diff --git a/fs/partitions/karma.h b/block/partitions/karma.h
index c764b2e9df21..c764b2e9df21 100644
--- a/fs/partitions/karma.h
+++ b/block/partitions/karma.h
diff --git a/fs/partitions/ldm.c b/block/partitions/ldm.c
index bd8ae788f689..e507cfbd044e 100644
--- a/fs/partitions/ldm.c
+++ b/block/partitions/ldm.c
@@ -2,7 +2,7 @@
* ldm - Support for Windows Logical Disk Manager (Dynamic Disks)
*
* Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org>
- * Copyright (c) 2001-2007 Anton Altaparmakov
+ * Copyright (c) 2001-2012 Anton Altaparmakov
* Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com>
*
* Documentation is available at http://www.linux-ntfs.org/doku.php?id=downloads
@@ -1341,20 +1341,17 @@ found:
ldm_error("REC value (%d) exceeds NUM value (%d)", rec, f->num);
return false;
}
-
if (f->map & (1 << rec)) {
ldm_error ("Duplicate VBLK, part %d.", rec);
f->map &= 0x7F; /* Mark the group as broken */
return false;
}
-
f->map |= (1 << rec);
-
+ if (!rec)
+ memcpy(f->data, data, VBLK_SIZE_HEAD);
data += VBLK_SIZE_HEAD;
size -= VBLK_SIZE_HEAD;
-
- memcpy (f->data+rec*(size-VBLK_SIZE_HEAD)+VBLK_SIZE_HEAD, data, size);
-
+ memcpy(f->data + VBLK_SIZE_HEAD + rec * size, data, size);
return true;
}
diff --git a/fs/partitions/ldm.h b/block/partitions/ldm.h
index 374242c0971a..374242c0971a 100644
--- a/fs/partitions/ldm.h
+++ b/block/partitions/ldm.h
diff --git a/fs/partitions/mac.c b/block/partitions/mac.c
index 11f688bd76c5..11f688bd76c5 100644
--- a/fs/partitions/mac.c
+++ b/block/partitions/mac.c
diff --git a/fs/partitions/mac.h b/block/partitions/mac.h
index 3c7d98436380..3c7d98436380 100644
--- a/fs/partitions/mac.h
+++ b/block/partitions/mac.h
diff --git a/fs/partitions/msdos.c b/block/partitions/msdos.c
index 5f79a6677c69..5f79a6677c69 100644
--- a/fs/partitions/msdos.c
+++ b/block/partitions/msdos.c
diff --git a/fs/partitions/msdos.h b/block/partitions/msdos.h
index 38c781c490b3..38c781c490b3 100644
--- a/fs/partitions/msdos.h
+++ b/block/partitions/msdos.h
diff --git a/fs/partitions/osf.c b/block/partitions/osf.c
index 764b86a01965..764b86a01965 100644
--- a/fs/partitions/osf.c
+++ b/block/partitions/osf.c
diff --git a/fs/partitions/osf.h b/block/partitions/osf.h
index 20ed2315ec16..20ed2315ec16 100644
--- a/fs/partitions/osf.h
+++ b/block/partitions/osf.h
diff --git a/fs/partitions/sgi.c b/block/partitions/sgi.c
index ea8a86dceaf4..ea8a86dceaf4 100644
--- a/fs/partitions/sgi.c
+++ b/block/partitions/sgi.c
diff --git a/fs/partitions/sgi.h b/block/partitions/sgi.h
index b9553ebdd5a9..b9553ebdd5a9 100644
--- a/fs/partitions/sgi.h
+++ b/block/partitions/sgi.h
diff --git a/fs/partitions/sun.c b/block/partitions/sun.c
index b5b6fcfb3d36..b5b6fcfb3d36 100644
--- a/fs/partitions/sun.c
+++ b/block/partitions/sun.c
diff --git a/fs/partitions/sun.h b/block/partitions/sun.h
index 2424baa8319f..2424baa8319f 100644
--- a/fs/partitions/sun.h
+++ b/block/partitions/sun.h
diff --git a/fs/partitions/sysv68.c b/block/partitions/sysv68.c
index 9627ccffc1c4..9627ccffc1c4 100644
--- a/fs/partitions/sysv68.c
+++ b/block/partitions/sysv68.c
diff --git a/fs/partitions/sysv68.h b/block/partitions/sysv68.h
index bf2f5ffa97ac..bf2f5ffa97ac 100644
--- a/fs/partitions/sysv68.h
+++ b/block/partitions/sysv68.h
diff --git a/fs/partitions/ultrix.c b/block/partitions/ultrix.c
index 8dbaf9f77a99..8dbaf9f77a99 100644
--- a/fs/partitions/ultrix.c
+++ b/block/partitions/ultrix.c
diff --git a/fs/partitions/ultrix.h b/block/partitions/ultrix.h
index a3cc00b2bded..a3cc00b2bded 100644
--- a/fs/partitions/ultrix.h
+++ b/block/partitions/ultrix.h
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index fbdf0d802ec4..260fa80ef575 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -24,6 +24,7 @@
#include <linux/capability.h>
#include <linux/completion.h>
#include <linux/cdrom.h>
+#include <linux/ratelimit.h>
#include <linux/slab.h>
#include <linux/times.h>
#include <asm/uaccess.h>
@@ -690,6 +691,57 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod
}
EXPORT_SYMBOL(scsi_cmd_ioctl);
+int scsi_verify_blk_ioctl(struct block_device *bd, unsigned int cmd)
+{
+ if (bd && bd == bd->bd_contains)
+ return 0;
+
+ /* Actually none of these is particularly useful on a partition,
+ * but they are safe.
+ */
+ switch (cmd) {
+ case SCSI_IOCTL_GET_IDLUN:
+ case SCSI_IOCTL_GET_BUS_NUMBER:
+ case SCSI_IOCTL_GET_PCI:
+ case SCSI_IOCTL_PROBE_HOST:
+ case SG_GET_VERSION_NUM:
+ case SG_SET_TIMEOUT:
+ case SG_GET_TIMEOUT:
+ case SG_GET_RESERVED_SIZE:
+ case SG_SET_RESERVED_SIZE:
+ case SG_EMULATED_HOST:
+ return 0;
+ case CDROM_GET_CAPABILITY:
+ /* Keep this until we remove the printk below. udev sends it
+ * and we do not want to spam dmesg about it. CD-ROMs do
+ * not have partitions, so we get here only for disks.
+ */
+ return -ENOIOCTLCMD;
+ default:
+ break;
+ }
+
+ /* In particular, rule out all resets and host-specific ioctls. */
+ printk_ratelimited(KERN_WARNING
+ "%s: sending ioctl %x to a partition!\n", current->comm, cmd);
+
+ return capable(CAP_SYS_RAWIO) ? 0 : -ENOIOCTLCMD;
+}
+EXPORT_SYMBOL(scsi_verify_blk_ioctl);
+
+int scsi_cmd_blk_ioctl(struct block_device *bd, fmode_t mode,
+ unsigned int cmd, void __user *arg)
+{
+ int ret;
+
+ ret = scsi_verify_blk_ioctl(bd, cmd);
+ if (ret < 0)
+ return ret;
+
+ return scsi_cmd_ioctl(bd->bd_disk->queue, bd->bd_disk, mode, cmd, arg);
+}
+EXPORT_SYMBOL(scsi_cmd_blk_ioctl);
+
static int __init blk_scsi_ioctl_init(void)
{
blk_set_cmd_filter_defaults(&blk_default_cmd_filter);