summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-01-21 15:17:03 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2022-01-21 15:17:03 +0100
commit3c7c25038b6c7d66a6816028219914379be6a5cc (patch)
treed21fb0d5e70dc309b9cdff78985d08c081a7135a
parentMerge tag 'io_uring-5.17-2022-01-21' of git://git.kernel.dk/linux-block (diff)
parentblock: fix async_depth sysfs interface for mq-deadline (diff)
downloadlinux-3c7c25038b6c7d66a6816028219914379be6a5cc.tar.xz
linux-3c7c25038b6c7d66a6816028219914379be6a5cc.zip
Merge tag 'block-5.17-2022-01-21' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "Various little minor fixes that should go into this release: - Fix issue with cloned bios and IO accounting (Christoph) - Remove redundant assignments (Colin, GuoYong) - Fix an issue with the mq-deadline async_depth sysfs interface (me) - Fix brd module loading race (Tetsuo) - Shared tag map wakeup fix (Laibin) - End of bdev read fix (OGAWA) - srcu leak fix (Ming)" * tag 'block-5.17-2022-01-21' of git://git.kernel.dk/linux-block: block: fix async_depth sysfs interface for mq-deadline block: Fix wrong offset in bio_truncate() block: assign bi_bdev for cloned bios in blk_rq_prep_clone block: cleanup q->srcu block: Remove unnecessary variable assignment brd: remove brd_devices_mutex mutex aoe: remove redundant assignment on variable n loop: remove redundant initialization of pointer node blk-mq: fix tag_get wait task can't be awakened
-rw-r--r--block/bio.c3
-rw-r--r--block/blk-mq-tag.c40
-rw-r--r--block/blk-mq.c1
-rw-r--r--block/blk-sysfs.c4
-rw-r--r--block/mq-deadline.c4
-rw-r--r--drivers/block/aoe/aoecmd.c2
-rw-r--r--drivers/block/brd.c73
-rw-r--r--drivers/block/loop.c2
-rw-r--r--include/linux/sbitmap.h11
-rw-r--r--lib/sbitmap.c25
10 files changed, 106 insertions, 59 deletions
diff --git a/block/bio.c b/block/bio.c
index 0d400ba2dbd1..4312a8085396 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -569,7 +569,8 @@ static void bio_truncate(struct bio *bio, unsigned new_size)
offset = new_size - done;
else
offset = 0;
- zero_user(bv.bv_page, offset, bv.bv_len - offset);
+ zero_user(bv.bv_page, bv.bv_offset + offset,
+ bv.bv_len - offset);
truncated = true;
}
done += bv.bv_len;
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index e55a6834c9a6..845f74e8dd7b 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -17,6 +17,21 @@
#include "blk-mq-tag.h"
/*
+ * Recalculate wakeup batch when tag is shared by hctx.
+ */
+static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
+ unsigned int users)
+{
+ if (!users)
+ return;
+
+ sbitmap_queue_recalculate_wake_batch(&tags->bitmap_tags,
+ users);
+ sbitmap_queue_recalculate_wake_batch(&tags->breserved_tags,
+ users);
+}
+
+/*
* If a previously inactive queue goes active, bump the active user count.
* We need to do this before try to allocate driver tag, then even if fail
* to get tag when first time, the other shared-tag users could reserve
@@ -24,18 +39,26 @@
*/
bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
{
+ unsigned int users;
+
if (blk_mq_is_shared_tags(hctx->flags)) {
struct request_queue *q = hctx->queue;
- if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) &&
- !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
- atomic_inc(&hctx->tags->active_queues);
+ if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
+ test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) {
+ return true;
+ }
} else {
- if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
- !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
- atomic_inc(&hctx->tags->active_queues);
+ if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
+ test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) {
+ return true;
+ }
}
+ users = atomic_inc_return(&hctx->tags->active_queues);
+
+ blk_mq_update_wake_batch(hctx->tags, users);
+
return true;
}
@@ -56,6 +79,7 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
{
struct blk_mq_tags *tags = hctx->tags;
+ unsigned int users;
if (blk_mq_is_shared_tags(hctx->flags)) {
struct request_queue *q = hctx->queue;
@@ -68,7 +92,9 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
return;
}
- atomic_dec(&tags->active_queues);
+ users = atomic_dec_return(&tags->active_queues);
+
+ blk_mq_update_wake_batch(tags, users);
blk_mq_tag_wakeup_all(tags, false);
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a6d4780580fc..b5e35e63adad 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2976,6 +2976,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
bio = bio_clone_fast(bio_src, gfp_mask, bs);
if (!bio)
goto free_and_out;
+ bio->bi_bdev = rq->q->disk->part0;
if (bio_ctr && bio_ctr(bio, bio_src, data))
goto free_and_out;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index e20eadfcf5c8..9f32882ceb2f 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -811,6 +811,9 @@ static void blk_release_queue(struct kobject *kobj)
bioset_exit(&q->bio_split);
+ if (blk_queue_has_srcu(q))
+ cleanup_srcu_struct(q->srcu);
+
ida_simple_remove(&blk_queue_ida, q->id);
call_rcu(&q->rcu_head, blk_free_queue_rcu);
}
@@ -887,7 +890,6 @@ int blk_register_queue(struct gendisk *disk)
kobject_uevent(&q->elevator->kobj, KOBJ_ADD);
mutex_unlock(&q->sysfs_lock);
- ret = 0;
unlock:
mutex_unlock(&q->sysfs_dir_lock);
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 85d919bf60c7..3ed5eaf3446a 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -865,7 +865,7 @@ SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]);
SHOW_JIFFIES(deadline_prio_aging_expire_show, dd->prio_aging_expire);
SHOW_INT(deadline_writes_starved_show, dd->writes_starved);
SHOW_INT(deadline_front_merges_show, dd->front_merges);
-SHOW_INT(deadline_async_depth_show, dd->front_merges);
+SHOW_INT(deadline_async_depth_show, dd->async_depth);
SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch);
#undef SHOW_INT
#undef SHOW_JIFFIES
@@ -895,7 +895,7 @@ STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MA
STORE_JIFFIES(deadline_prio_aging_expire_store, &dd->prio_aging_expire, 0, INT_MAX);
STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX);
STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1);
-STORE_INT(deadline_async_depth_store, &dd->front_merges, 1, INT_MAX);
+STORE_INT(deadline_async_depth_store, &dd->async_depth, 1, INT_MAX);
STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX);
#undef STORE_FUNCTION
#undef STORE_INT
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 588889bea7c3..6af111f568e4 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -122,7 +122,7 @@ newtag(struct aoedev *d)
register ulong n;
n = jiffies & 0xffff;
- return n |= (++d->lasttag & 0x7fff) << 16;
+ return n | (++d->lasttag & 0x7fff) << 16;
}
static u32
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 8fe2e4289dae..6e3f2f0d2352 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -362,7 +362,6 @@ __setup("ramdisk_size=", ramdisk_size);
* (should share code eventually).
*/
static LIST_HEAD(brd_devices);
-static DEFINE_MUTEX(brd_devices_mutex);
static struct dentry *brd_debugfs_dir;
static int brd_alloc(int i)
@@ -372,21 +371,14 @@ static int brd_alloc(int i)
char buf[DISK_NAME_LEN];
int err = -ENOMEM;
- mutex_lock(&brd_devices_mutex);
- list_for_each_entry(brd, &brd_devices, brd_list) {
- if (brd->brd_number == i) {
- mutex_unlock(&brd_devices_mutex);
+ list_for_each_entry(brd, &brd_devices, brd_list)
+ if (brd->brd_number == i)
return -EEXIST;
- }
- }
brd = kzalloc(sizeof(*brd), GFP_KERNEL);
- if (!brd) {
- mutex_unlock(&brd_devices_mutex);
+ if (!brd)
return -ENOMEM;
- }
brd->brd_number = i;
list_add_tail(&brd->brd_list, &brd_devices);
- mutex_unlock(&brd_devices_mutex);
spin_lock_init(&brd->brd_lock);
INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC);
@@ -429,9 +421,7 @@ static int brd_alloc(int i)
out_cleanup_disk:
blk_cleanup_disk(disk);
out_free_dev:
- mutex_lock(&brd_devices_mutex);
list_del(&brd->brd_list);
- mutex_unlock(&brd_devices_mutex);
kfree(brd);
return err;
}
@@ -441,15 +431,19 @@ static void brd_probe(dev_t dev)
brd_alloc(MINOR(dev) / max_part);
}
-static void brd_del_one(struct brd_device *brd)
+static void brd_cleanup(void)
{
- del_gendisk(brd->brd_disk);
- blk_cleanup_disk(brd->brd_disk);
- brd_free_pages(brd);
- mutex_lock(&brd_devices_mutex);
- list_del(&brd->brd_list);
- mutex_unlock(&brd_devices_mutex);
- kfree(brd);
+ struct brd_device *brd, *next;
+
+ debugfs_remove_recursive(brd_debugfs_dir);
+
+ list_for_each_entry_safe(brd, next, &brd_devices, brd_list) {
+ del_gendisk(brd->brd_disk);
+ blk_cleanup_disk(brd->brd_disk);
+ brd_free_pages(brd);
+ list_del(&brd->brd_list);
+ kfree(brd);
+ }
}
static inline void brd_check_and_reset_par(void)
@@ -473,9 +467,18 @@ static inline void brd_check_and_reset_par(void)
static int __init brd_init(void)
{
- struct brd_device *brd, *next;
int err, i;
+ brd_check_and_reset_par();
+
+ brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
+
+ for (i = 0; i < rd_nr; i++) {
+ err = brd_alloc(i);
+ if (err)
+ goto out_free;
+ }
+
/*
* brd module now has a feature to instantiate underlying device
* structure on-demand, provided that there is an access dev node.
@@ -491,28 +494,16 @@ static int __init brd_init(void)
* dynamically.
*/
- if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe))
- return -EIO;
-
- brd_check_and_reset_par();
-
- brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
-
- for (i = 0; i < rd_nr; i++) {
- err = brd_alloc(i);
- if (err)
- goto out_free;
+ if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe)) {
+ err = -EIO;
+ goto out_free;
}
pr_info("brd: module loaded\n");
return 0;
out_free:
- unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
- debugfs_remove_recursive(brd_debugfs_dir);
-
- list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
- brd_del_one(brd);
+ brd_cleanup();
pr_info("brd: module NOT loaded !!!\n");
return err;
@@ -520,13 +511,9 @@ out_free:
static void __exit brd_exit(void)
{
- struct brd_device *brd, *next;
unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
- debugfs_remove_recursive(brd_debugfs_dir);
-
- list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
- brd_del_one(brd);
+ brd_cleanup();
pr_info("brd: module unloaded\n");
}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index b1b05c45c07c..01cbbfc4e9e2 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -820,7 +820,7 @@ static inline int queue_on_root_worker(struct cgroup_subsys_state *css)
static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
{
- struct rb_node **node = &(lo->worker_tree.rb_node), *parent = NULL;
+ struct rb_node **node, *parent = NULL;
struct loop_worker *cur_worker, *worker = NULL;
struct work_struct *work;
struct list_head *cmd_list;
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index fc0357a6e19b..95df357ec009 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -416,6 +416,17 @@ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq)
}
/**
+ * sbitmap_queue_recalculate_wake_batch() - Recalculate wake batch
+ * @sbq: Bitmap queue to recalculate wake batch.
+ * @users: Number of shares.
+ *
+ * Like sbitmap_queue_update_wake_batch(), this will calculate wake batch
+ * by depth. This interface is for HCTX shared tags or queue shared tags.
+ */
+void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int users);
+
+/**
* sbitmap_queue_resize() - Resize a &struct sbitmap_queue.
* @sbq: Bitmap queue to resize.
* @depth: New number of bits to resize to.
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 2709ab825499..6220fa67fb7e 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -457,10 +457,9 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
}
EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
-static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
- unsigned int depth)
+static inline void __sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int wake_batch)
{
- unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
int i;
if (sbq->wake_batch != wake_batch) {
@@ -476,6 +475,26 @@ static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
}
}
+static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int depth)
+{
+ unsigned int wake_batch;
+
+ wake_batch = sbq_calc_wake_batch(sbq, depth);
+ __sbitmap_queue_update_wake_batch(sbq, wake_batch);
+}
+
+void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
+ unsigned int users)
+{
+ unsigned int wake_batch;
+
+ wake_batch = clamp_val((sbq->sb.depth + users - 1) /
+ users, 4, SBQ_WAKE_BATCH);
+ __sbitmap_queue_update_wake_batch(sbq, wake_batch);
+}
+EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch);
+
void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
{
sbitmap_queue_update_wake_batch(sbq, depth);