summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorJens Axboe <axboe@fb.com>2014-09-11 17:31:18 +0200
committerJens Axboe <axboe@fb.com>2014-09-11 17:31:18 +0200
commitb207892b061da7608878e273ae22ba9bf9be264b (patch)
tree51daa46b89b83cad422941f52110b19571b85b79 /block
parentbdi: reimplement bdev_inode_switch_bdi() (diff)
parentblk-mq: scale depth and rq map appropriate if low on memory (diff)
downloadlinux-b207892b061da7608878e273ae22ba9bf9be264b.tar.xz
linux-b207892b061da7608878e273ae22ba9bf9be264b.zip
Merge branch 'for-linus' into for-3.18/core
A bit of churn on the for-linus side that would be nice to have in the core bits for 3.18, so pull it in to catch us up and make forward progress easier. Signed-off-by: Jens Axboe <axboe@fb.com> Conflicts: block/scsi_ioctl.c
Diffstat (limited to 'block')
-rw-r--r--block/bio-integrity.c2
-rw-r--r--block/blk-core.c1
-rw-r--r--block/blk-merge.c17
-rw-r--r--block/blk-mq.c127
-rw-r--r--block/blk-sysfs.c6
-rw-r--r--block/cfq-iosched.c19
-rw-r--r--block/genhd.c24
-rw-r--r--block/partition-generic.c2
-rw-r--r--block/scsi_ioctl.c36
9 files changed, 163 insertions, 71 deletions
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index bc423f7b02da..f14b4abbebd8 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -520,7 +520,7 @@ void bio_integrity_endio(struct bio *bio, int error)
*/
if (error) {
bio->bi_end_io = bip->bip_end_io;
- bio_endio(bio, error);
+ bio_endio_nodec(bio, error);
return;
}
diff --git a/block/blk-core.c b/block/blk-core.c
index 817446175489..6946a4275e6f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1248,7 +1248,6 @@ void blk_rq_set_block_pc(struct request *rq)
rq->__sector = (sector_t) -1;
rq->bio = rq->biotail = NULL;
memset(rq->__cmd, 0, sizeof(rq->__cmd));
- rq->cmd = rq->__cmd;
}
EXPORT_SYMBOL(blk_rq_set_block_pc);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 54535831f1e1..77881798f793 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -10,10 +10,11 @@
#include "blk.h"
static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
- struct bio *bio)
+ struct bio *bio,
+ bool no_sg_merge)
{
struct bio_vec bv, bvprv = { NULL };
- int cluster, high, highprv = 1, no_sg_merge;
+ int cluster, high, highprv = 1;
unsigned int seg_size, nr_phys_segs;
struct bio *fbio, *bbio;
struct bvec_iter iter;
@@ -35,7 +36,6 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
cluster = blk_queue_cluster(q);
seg_size = 0;
nr_phys_segs = 0;
- no_sg_merge = test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags);
high = 0;
for_each_bio(bio) {
bio_for_each_segment(bv, bio, iter) {
@@ -88,18 +88,23 @@ new_segment:
void blk_recalc_rq_segments(struct request *rq)
{
- rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio);
+ bool no_sg_merge = !!test_bit(QUEUE_FLAG_NO_SG_MERGE,
+ &rq->q->queue_flags);
+
+ rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio,
+ no_sg_merge);
}
void blk_recount_segments(struct request_queue *q, struct bio *bio)
{
- if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags))
+ if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags) &&
+ bio->bi_vcnt < queue_max_segments(q))
bio->bi_phys_segments = bio->bi_vcnt;
else {
struct bio *nxt = bio->bi_next;
bio->bi_next = NULL;
- bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio);
+ bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, false);
bio->bi_next = nxt;
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 940aa8a34b70..067e600002d3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -112,18 +112,22 @@ static void blk_mq_usage_counter_release(struct percpu_ref *ref)
*/
void blk_mq_freeze_queue(struct request_queue *q)
{
+ bool freeze;
+
spin_lock_irq(q->queue_lock);
- q->mq_freeze_depth++;
+ freeze = !q->mq_freeze_depth++;
spin_unlock_irq(q->queue_lock);
- percpu_ref_kill(&q->mq_usage_counter);
- blk_mq_run_queues(q, false);
+ if (freeze) {
+ percpu_ref_kill(&q->mq_usage_counter);
+ blk_mq_run_queues(q, false);
+ }
wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
}
static void blk_mq_unfreeze_queue(struct request_queue *q)
{
- bool wake = false;
+ bool wake;
spin_lock_irq(q->queue_lock);
wake = !--q->mq_freeze_depth;
@@ -172,6 +176,8 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
/* tag was already set */
rq->errors = 0;
+ rq->cmd = rq->__cmd;
+
rq->extra_len = 0;
rq->sense_len = 0;
rq->resid_len = 0;
@@ -1072,13 +1078,17 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio)
blk_account_io_start(rq, 1);
}
+static inline bool hctx_allow_merges(struct blk_mq_hw_ctx *hctx)
+{
+ return (hctx->flags & BLK_MQ_F_SHOULD_MERGE) &&
+ !blk_queue_nomerges(hctx->queue);
+}
+
static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *ctx,
struct request *rq, struct bio *bio)
{
- struct request_queue *q = hctx->queue;
-
- if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE)) {
+ if (!hctx_allow_merges(hctx)) {
blk_mq_bio_to_request(rq, bio);
spin_lock(&ctx->lock);
insert_rq:
@@ -1086,6 +1096,8 @@ insert_rq:
spin_unlock(&ctx->lock);
return false;
} else {
+ struct request_queue *q = hctx->queue;
+
spin_lock(&ctx->lock);
if (!blk_mq_attempt_merge(q, ctx, bio)) {
blk_mq_bio_to_request(rq, bio);
@@ -1313,6 +1325,7 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
continue;
set->ops->exit_request(set->driver_data, tags->rqs[i],
hctx_idx, i);
+ tags->rqs[i] = NULL;
}
}
@@ -1346,8 +1359,9 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
INIT_LIST_HEAD(&tags->page_list);
- tags->rqs = kmalloc_node(set->queue_depth * sizeof(struct request *),
- GFP_KERNEL, set->numa_node);
+ tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *),
+ GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
+ set->numa_node);
if (!tags->rqs) {
blk_mq_free_tags(tags);
return NULL;
@@ -1371,8 +1385,9 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
this_order--;
do {
- page = alloc_pages_node(set->numa_node, GFP_KERNEL,
- this_order);
+ page = alloc_pages_node(set->numa_node,
+ GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
+ this_order);
if (page)
break;
if (!this_order--)
@@ -1396,8 +1411,10 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
if (set->ops->init_request) {
if (set->ops->init_request(set->driver_data,
tags->rqs[i], hctx_idx, i,
- set->numa_node))
+ set->numa_node)) {
+ tags->rqs[i] = NULL;
goto fail;
+ }
}
p += rq_size;
@@ -1408,7 +1425,6 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
return tags;
fail:
- pr_warn("%s: failed to allocate requests\n", __func__);
blk_mq_free_rq_map(set, tags, hctx_idx);
return NULL;
}
@@ -1578,7 +1594,7 @@ static int blk_mq_init_hw_queues(struct request_queue *q,
hctx->tags = set->tags[i];
/*
- * Allocate space for all possible cpus to avoid allocation in
+ * Allocate space for all possible cpus to avoid allocation at
* runtime
*/
hctx->ctxs = kmalloc_node(nr_cpu_ids * sizeof(void *),
@@ -1666,8 +1682,8 @@ static void blk_mq_map_swqueue(struct request_queue *q)
queue_for_each_hw_ctx(q, hctx, i) {
/*
- * If not software queues are mapped to this hardware queue,
- * disable it and free the request entries
+ * If no software queues are mapped to this hardware queue,
+ * disable it and free the request entries.
*/
if (!hctx->nr_ctx) {
struct blk_mq_tag_set *set = q->tag_set;
@@ -1717,14 +1733,10 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
{
struct blk_mq_tag_set *set = q->tag_set;
- blk_mq_freeze_queue(q);
-
mutex_lock(&set->tag_list_lock);
list_del_init(&q->tag_set_list);
blk_mq_update_tag_set_depth(set);
mutex_unlock(&set->tag_list_lock);
-
- blk_mq_unfreeze_queue(q);
}
static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
@@ -1932,6 +1944,61 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
return NOTIFY_OK;
}
+static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
+{
+ int i;
+
+ for (i = 0; i < set->nr_hw_queues; i++) {
+ set->tags[i] = blk_mq_init_rq_map(set, i);
+ if (!set->tags[i])
+ goto out_unwind;
+ }
+
+ return 0;
+
+out_unwind:
+ while (--i >= 0)
+ blk_mq_free_rq_map(set, set->tags[i], i);
+
+ set->tags = NULL;
+ return -ENOMEM;
+}
+
+/*
+ * Allocate the request maps associated with this tag_set. Note that this
+ * may reduce the depth asked for, if memory is tight. set->queue_depth
+ * will be updated to reflect the allocated depth.
+ */
+static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
+{
+ unsigned int depth;
+ int err;
+
+ depth = set->queue_depth;
+ do {
+ err = __blk_mq_alloc_rq_maps(set);
+ if (!err)
+ break;
+
+ set->queue_depth >>= 1;
+ if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) {
+ err = -ENOMEM;
+ break;
+ }
+ } while (set->queue_depth);
+
+ if (!set->queue_depth || err) {
+ pr_err("blk-mq: failed to allocate request map\n");
+ return -ENOMEM;
+ }
+
+ if (depth != set->queue_depth)
+ pr_info("blk-mq: reduced tag depth (%u -> %u)\n",
+ depth, set->queue_depth);
+
+ return 0;
+}
+
/*
* Alloc a tag set to be associated with one or more request queues.
* May fail with EINVAL for various error conditions. May adjust the
@@ -1940,8 +2007,6 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
*/
int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
{
- int i;
-
if (!set->nr_hw_queues)
return -EINVAL;
if (!set->queue_depth)
@@ -1962,23 +2027,18 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
sizeof(struct blk_mq_tags *),
GFP_KERNEL, set->numa_node);
if (!set->tags)
- goto out;
+ return -ENOMEM;
- for (i = 0; i < set->nr_hw_queues; i++) {
- set->tags[i] = blk_mq_init_rq_map(set, i);
- if (!set->tags[i])
- goto out_unwind;
- }
+ if (blk_mq_alloc_rq_maps(set))
+ goto enomem;
mutex_init(&set->tag_list_lock);
INIT_LIST_HEAD(&set->tag_list);
return 0;
-
-out_unwind:
- while (--i >= 0)
- blk_mq_free_rq_map(set, set->tags[i], i);
-out:
+enomem:
+ kfree(set->tags);
+ set->tags = NULL;
return -ENOMEM;
}
EXPORT_SYMBOL(blk_mq_alloc_tag_set);
@@ -1993,6 +2053,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
}
kfree(set->tags);
+ set->tags = NULL;
}
EXPORT_SYMBOL(blk_mq_free_tag_set);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 4db5abf96b9e..17f5c84ce7bf 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -554,8 +554,10 @@ int blk_register_queue(struct gendisk *disk)
* Initialization must be complete by now. Finish the initial
* bypass from queue allocation.
*/
- queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q);
- blk_queue_bypass_end(q);
+ if (!blk_queue_init_done(q)) {
+ queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q);
+ blk_queue_bypass_end(q);
+ }
ret = blk_trace_init_sysfs(dev);
if (ret)
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 900f569afcc5..6f2751d305de 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1272,15 +1272,22 @@ __cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
rb_insert_color(&cfqg->rb_node, &st->rb);
}
+/*
+ * This has to be called only on activation of cfqg
+ */
static void
cfq_update_group_weight(struct cfq_group *cfqg)
{
- BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
-
if (cfqg->new_weight) {
cfqg->weight = cfqg->new_weight;
cfqg->new_weight = 0;
}
+}
+
+static void
+cfq_update_group_leaf_weight(struct cfq_group *cfqg)
+{
+ BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
if (cfqg->new_leaf_weight) {
cfqg->leaf_weight = cfqg->new_leaf_weight;
@@ -1299,7 +1306,12 @@ cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
/* add to the service tree */
BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
- cfq_update_group_weight(cfqg);
+ /*
+ * Update leaf_weight. We cannot update weight at this point
+ * because cfqg might already have been activated and is
+ * contributing its current weight to the parent's child_weight.
+ */
+ cfq_update_group_leaf_weight(cfqg);
__cfq_group_service_tree_add(st, cfqg);
/*
@@ -1323,6 +1335,7 @@ cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
*/
while ((parent = cfqg_parent(pos))) {
if (propagate) {
+ cfq_update_group_weight(pos);
propagate = !parent->nr_active++;
parent->children_weight += pos->weight;
}
diff --git a/block/genhd.c b/block/genhd.c
index 791f41943132..09da5e4a8e03 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -28,10 +28,10 @@ struct kobject *block_depr;
/* for extended dynamic devt allocation, currently only one major is used */
#define NR_EXT_DEVT (1 << MINORBITS)
-/* For extended devt allocation. ext_devt_mutex prevents look up
+/* For extended devt allocation. ext_devt_lock prevents look up
* results from going away underneath its user.
*/
-static DEFINE_MUTEX(ext_devt_mutex);
+static DEFINE_SPINLOCK(ext_devt_lock);
static DEFINE_IDR(ext_devt_idr);
static struct device_type disk_type;
@@ -420,9 +420,13 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
}
/* allocate ext devt */
- mutex_lock(&ext_devt_mutex);
- idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_KERNEL);
- mutex_unlock(&ext_devt_mutex);
+ idr_preload(GFP_KERNEL);
+
+ spin_lock(&ext_devt_lock);
+ idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT);
+ spin_unlock(&ext_devt_lock);
+
+ idr_preload_end();
if (idx < 0)
return idx == -ENOSPC ? -EBUSY : idx;
@@ -447,9 +451,9 @@ void blk_free_devt(dev_t devt)
return;
if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
- mutex_lock(&ext_devt_mutex);
+ spin_lock(&ext_devt_lock);
idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
- mutex_unlock(&ext_devt_mutex);
+ spin_unlock(&ext_devt_lock);
}
}
@@ -665,7 +669,6 @@ void del_gendisk(struct gendisk *disk)
sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
device_del(disk_to_dev(disk));
- blk_free_devt(disk_to_dev(disk)->devt);
}
EXPORT_SYMBOL(del_gendisk);
@@ -690,13 +693,13 @@ struct gendisk *get_gendisk(dev_t devt, int *partno)
} else {
struct hd_struct *part;
- mutex_lock(&ext_devt_mutex);
+ spin_lock(&ext_devt_lock);
part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
if (part && get_disk(part_to_disk(part))) {
*partno = part->partno;
disk = part_to_disk(part);
}
- mutex_unlock(&ext_devt_mutex);
+ spin_unlock(&ext_devt_lock);
}
return disk;
@@ -1098,6 +1101,7 @@ static void disk_release(struct device *dev)
{
struct gendisk *disk = dev_to_disk(dev);
+ blk_free_devt(dev->devt);
disk_release_events(disk);
kfree(disk->random);
disk_replace_part_tbl(disk, NULL);
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 789cdea05893..0d9e5f97f0a8 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -211,6 +211,7 @@ static const struct attribute_group *part_attr_groups[] = {
static void part_release(struct device *dev)
{
struct hd_struct *p = dev_to_part(dev);
+ blk_free_devt(dev->devt);
free_part_stats(p);
free_part_info(p);
kfree(p);
@@ -253,7 +254,6 @@ void delete_partition(struct gendisk *disk, int partno)
rcu_assign_pointer(ptbl->last_lookup, NULL);
kobject_put(part->holder_dir);
device_del(part_to_dev(part));
- blk_free_devt(part_devt(part));
hd_struct_put(part);
}
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index a8b0d0208448..abb2e65b24cc 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -279,7 +279,6 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
r = blk_rq_unmap_user(bio);
if (!ret)
ret = r;
- blk_put_request(rq);
return ret;
}
@@ -297,8 +296,6 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
if (hdr->interface_id != 'S')
return -EINVAL;
- if (hdr->cmd_len > BLK_MAX_CDB)
- return -EINVAL;
if (hdr->dxfer_len > (queue_max_hw_sectors(q) << 9))
return -EIO;
@@ -317,16 +314,23 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
if (hdr->flags & SG_FLAG_Q_AT_HEAD)
at_head = 1;
+ ret = -ENOMEM;
rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL);
if (IS_ERR(rq))
return PTR_ERR(rq);
blk_rq_set_block_pc(rq);
- if (blk_fill_sghdr_rq(q, rq, hdr, mode)) {
- blk_put_request(rq);
- return -EFAULT;
+ if (hdr->cmd_len > BLK_MAX_CDB) {
+ rq->cmd = kzalloc(hdr->cmd_len, GFP_KERNEL);
+ if (!rq->cmd)
+ goto out_put_request;
}
+ ret = -EFAULT;
+ if (blk_fill_sghdr_rq(q, rq, hdr, mode))
+ goto out_free_cdb;
+
+ ret = 0;
if (hdr->iovec_count) {
size_t iov_data_len;
struct iovec *iov = NULL;
@@ -335,7 +339,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
0, NULL, &iov);
if (ret < 0) {
kfree(iov);
- goto out;
+ goto out_free_cdb;
}
iov_data_len = ret;
@@ -358,7 +362,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
GFP_KERNEL);
if (ret)
- goto out;
+ goto out_free_cdb;
bio = rq->bio;
memset(sense, 0, sizeof(sense));
@@ -376,8 +380,12 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
hdr->duration = jiffies_to_msecs(jiffies - start_time);
- return blk_complete_sghdr_rq(rq, hdr, bio);
-out:
+ ret = blk_complete_sghdr_rq(rq, hdr, bio);
+
+out_free_cdb:
+ if (rq->cmd != rq->__cmd)
+ kfree(rq->cmd);
+out_put_request:
blk_put_request(rq);
return ret;
}
@@ -450,8 +458,9 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_WAIT);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- goto error_free_buffer;
+ goto error;
}
+ blk_rq_set_block_pc(rq);
cmdlen = COMMAND_SIZE(opcode);
@@ -505,7 +514,6 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
memset(sense, 0, sizeof(sense));
rq->sense = sense;
rq->sense_len = 0;
- blk_rq_set_block_pc(rq);
blk_execute_rq(q, disk, rq, 0);
@@ -524,9 +532,9 @@ out:
}
error:
- blk_put_request(rq);
-error_free_buffer:
kfree(buffer);
+ if (rq)
+ blk_put_request(rq);
return err;
}
EXPORT_SYMBOL_GPL(sg_scsi_ioctl);