summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig6
-rw-r--r--block/bio.c1
-rw-r--r--block/blk-cgroup.c78
-rw-r--r--block/blk-cgroup.h15
-rw-r--r--block/blk-core.c19
-rw-r--r--block/blk-map.c7
-rw-r--r--block/blk-mq.c16
-rw-r--r--block/blk-mq.h5
-rw-r--r--block/bsg.c2
-rw-r--r--block/genhd.c28
10 files changed, 85 insertions, 92 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 0b7c13f9a089..86122e459fe0 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -41,6 +41,9 @@ config BLK_RQ_ALLOC_TIME
config BLK_CGROUP_RWSTAT
bool
+config BLK_CGROUP_PUNT_BIO
+ bool
+
config BLK_DEV_BSG_COMMON
tristate
@@ -204,9 +207,6 @@ config BLK_INLINE_ENCRYPTION_FALLBACK
source "block/partitions/Kconfig"
-config BLOCK_COMPAT
- def_bool COMPAT
-
config BLK_MQ_PCI
def_bool PCI
diff --git a/block/bio.c b/block/bio.c
index fd11614bba4d..043944fd46eb 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1159,6 +1159,7 @@ bool bio_add_folio(struct bio *bio, struct folio *folio, size_t len,
return false;
return bio_add_page(bio, &folio->page, len, off) > 0;
}
+EXPORT_SYMBOL(bio_add_folio);
void __bio_release_pages(struct bio *bio, bool mark_dirty)
{
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 0ecb4cce8af2..0ce64dd73cfe 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -55,7 +55,6 @@ static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */
bool blkcg_debug_stats = false;
-static struct workqueue_struct *blkcg_punt_bio_wq;
#define BLKG_DESTROY_BATCH_SIZE 64
@@ -165,7 +164,9 @@ static void __blkg_release(struct rcu_head *rcu)
{
struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
+#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
WARN_ON(!bio_list_empty(&blkg->async_bios));
+#endif
/* release the blkcg and parent blkg refs this blkg has been holding */
css_put(&blkg->blkcg->css);
@@ -187,6 +188,9 @@ static void blkg_release(struct percpu_ref *ref)
call_rcu(&blkg->rcu_head, __blkg_release);
}
+#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
+static struct workqueue_struct *blkcg_punt_bio_wq;
+
static void blkg_async_bio_workfn(struct work_struct *work)
{
struct blkcg_gq *blkg = container_of(work, struct blkcg_gq,
@@ -197,10 +201,10 @@ static void blkg_async_bio_workfn(struct work_struct *work)
bool need_plug = false;
/* as long as there are pending bios, @blkg can't go away */
- spin_lock_bh(&blkg->async_bio_lock);
+ spin_lock(&blkg->async_bio_lock);
bio_list_merge(&bios, &blkg->async_bios);
bio_list_init(&blkg->async_bios);
- spin_unlock_bh(&blkg->async_bio_lock);
+ spin_unlock(&blkg->async_bio_lock);
/* start plug only when bio_list contains at least 2 bios */
if (bios.head && bios.head->bi_next) {
@@ -213,6 +217,40 @@ static void blkg_async_bio_workfn(struct work_struct *work)
blk_finish_plug(&plug);
}
+/*
+ * When a shared kthread issues a bio for a cgroup, doing so synchronously can
+ * lead to priority inversions as the kthread can be trapped waiting for that
+ * cgroup. Use this helper instead of submit_bio to punt the actual issuing to
+ * a dedicated per-blkcg work item to avoid such priority inversions.
+ */
+void blkcg_punt_bio_submit(struct bio *bio)
+{
+ struct blkcg_gq *blkg = bio->bi_blkg;
+
+ if (blkg->parent) {
+ spin_lock(&blkg->async_bio_lock);
+ bio_list_add(&blkg->async_bios, bio);
+ spin_unlock(&blkg->async_bio_lock);
+ queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work);
+ } else {
+ /* never bounce for the root cgroup */
+ submit_bio(bio);
+ }
+}
+EXPORT_SYMBOL_GPL(blkcg_punt_bio_submit);
+
+static int __init blkcg_punt_bio_init(void)
+{
+ blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
+ WQ_MEM_RECLAIM | WQ_FREEZABLE |
+ WQ_UNBOUND | WQ_SYSFS, 0);
+ if (!blkcg_punt_bio_wq)
+ return -ENOMEM;
+ return 0;
+}
+subsys_initcall(blkcg_punt_bio_init);
+#endif /* CONFIG_BLK_CGROUP_PUNT_BIO */
+
/**
* bio_blkcg_css - return the blkcg CSS associated with a bio
* @bio: target bio
@@ -268,10 +306,12 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
blkg->q = disk->queue;
INIT_LIST_HEAD(&blkg->q_node);
+ blkg->blkcg = blkcg;
+#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
spin_lock_init(&blkg->async_bio_lock);
bio_list_init(&blkg->async_bios);
INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn);
- blkg->blkcg = blkcg;
+#endif
u64_stats_init(&blkg->iostat.sync);
for_each_possible_cpu(cpu) {
@@ -1682,25 +1722,6 @@ out_unlock:
}
EXPORT_SYMBOL_GPL(blkcg_policy_unregister);
-bool __blkcg_punt_bio_submit(struct bio *bio)
-{
- struct blkcg_gq *blkg = bio->bi_blkg;
-
- /* consume the flag first */
- bio->bi_opf &= ~REQ_CGROUP_PUNT;
-
- /* never bounce for the root cgroup */
- if (!blkg->parent)
- return false;
-
- spin_lock_bh(&blkg->async_bio_lock);
- bio_list_add(&blkg->async_bios, bio);
- spin_unlock_bh(&blkg->async_bio_lock);
-
- queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work);
- return true;
-}
-
/*
* Scale the accumulated delay based on how long it has been since we updated
* the delay. We only call this when we are adding delay, in case it's been a
@@ -2079,16 +2100,5 @@ bool blk_cgroup_congested(void)
return ret;
}
-static int __init blkcg_init(void)
-{
- blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
- WQ_MEM_RECLAIM | WQ_FREEZABLE |
- WQ_UNBOUND | WQ_SYSFS, 0);
- if (!blkcg_punt_bio_wq)
- return -ENOMEM;
- return 0;
-}
-subsys_initcall(blkcg_init);
-
module_param(blkcg_debug_stats, bool, 0644);
MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not");
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index d6ad3abc6eca..624c03c8fe64 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -72,9 +72,10 @@ struct blkcg_gq {
struct blkg_iostat_set iostat;
struct blkg_policy_data *pd[BLKCG_MAX_POLS];
-
+#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
spinlock_t async_bio_lock;
struct bio_list async_bios;
+#endif
union {
struct work_struct async_bio_work;
struct work_struct free_work;
@@ -375,16 +376,6 @@ static inline void blkg_put(struct blkcg_gq *blkg)
if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css), \
(p_blkg)->q)))
-bool __blkcg_punt_bio_submit(struct bio *bio);
-
-static inline bool blkcg_punt_bio_submit(struct bio *bio)
-{
- if (bio->bi_opf & REQ_CGROUP_PUNT)
- return __blkcg_punt_bio_submit(bio);
- else
- return false;
-}
-
static inline void blkcg_bio_issue_init(struct bio *bio)
{
bio_issue_init(&bio->bi_issue, bio_sectors(bio));
@@ -506,8 +497,6 @@ static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return
static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
static inline void blkg_get(struct blkcg_gq *blkg) { }
static inline void blkg_put(struct blkcg_gq *blkg) { }
-
-static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
static inline void blkcg_bio_issue_init(struct bio *bio) { }
static inline void blk_cgroup_bio_start(struct bio *bio) { }
static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; }
diff --git a/block/blk-core.c b/block/blk-core.c
index 06bdb352568c..00c74330fa92 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -824,9 +824,6 @@ EXPORT_SYMBOL(submit_bio_noacct);
*/
void submit_bio(struct bio *bio)
{
- if (blkcg_punt_bio_submit(bio))
- return;
-
if (bio_op(bio) == REQ_OP_READ) {
task_io_account_read(bio->bi_iter.bi_size);
count_vm_events(PGPGIN, bio_sectors(bio));
@@ -953,16 +950,11 @@ again:
}
}
-unsigned long bdev_start_io_acct(struct block_device *bdev,
- unsigned int sectors, enum req_op op,
+unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op,
unsigned long start_time)
{
- const int sgrp = op_stat_group(op);
-
part_stat_lock();
update_io_ticks(bdev, start_time, false);
- part_stat_inc(bdev, ios[sgrp]);
- part_stat_add(bdev, sectors[sgrp], sectors);
part_stat_local_inc(bdev, in_flight[op_is_write(op)]);
part_stat_unlock();
@@ -978,13 +970,12 @@ EXPORT_SYMBOL(bdev_start_io_acct);
*/
unsigned long bio_start_io_acct(struct bio *bio)
{
- return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
- bio_op(bio), jiffies);
+ return bdev_start_io_acct(bio->bi_bdev, bio_op(bio), jiffies);
}
EXPORT_SYMBOL_GPL(bio_start_io_acct);
void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
- unsigned long start_time)
+ unsigned int sectors, unsigned long start_time)
{
const int sgrp = op_stat_group(op);
unsigned long now = READ_ONCE(jiffies);
@@ -992,6 +983,8 @@ void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
part_stat_lock();
update_io_ticks(bdev, now, true);
+ part_stat_inc(bdev, ios[sgrp]);
+ part_stat_add(bdev, sectors[sgrp], sectors);
part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration));
part_stat_local_dec(bdev, in_flight[op_is_write(op)]);
part_stat_unlock();
@@ -1001,7 +994,7 @@ EXPORT_SYMBOL(bdev_end_io_acct);
void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
struct block_device *orig_bdev)
{
- bdev_end_io_acct(orig_bdev, bio_op(bio), start_time);
+ bdev_end_io_acct(orig_bdev, bio_op(bio), bio_sectors(bio), start_time);
}
EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped);
diff --git a/block/blk-map.c b/block/blk-map.c
index 9137d16cecdc..04c55f1c492e 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -29,10 +29,11 @@ static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask);
if (!bmd)
return NULL;
- memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs);
bmd->iter = *data;
- if (iter_is_iovec(data))
- bmd->iter.iov = bmd->iov;
+ if (iter_is_iovec(data)) {
+ memcpy(bmd->iov, iter_iov(data), sizeof(struct iovec) * data->nr_segs);
+ bmd->iter.__iov = bmd->iov;
+ }
return bmd;
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c2d297efe229..f6dad0886a2f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1333,8 +1333,6 @@ bool blk_rq_is_poll(struct request *rq)
return false;
if (rq->mq_hctx->type != HCTX_TYPE_POLL)
return false;
- if (WARN_ON_ONCE(!rq->bio))
- return false;
return true;
}
EXPORT_SYMBOL_GPL(blk_rq_is_poll);
@@ -1342,7 +1340,7 @@ EXPORT_SYMBOL_GPL(blk_rq_is_poll);
static void blk_rq_poll_completion(struct request *rq, struct completion *wait)
{
do {
- bio_poll(rq->bio, NULL, 0);
+ blk_mq_poll(rq->q, blk_rq_to_qc(rq), NULL, 0);
cond_resched();
} while (!completion_done(wait));
}
@@ -2711,6 +2709,7 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
struct blk_mq_hw_ctx *this_hctx = NULL;
struct blk_mq_ctx *this_ctx = NULL;
struct request *requeue_list = NULL;
+ struct request **requeue_lastp = &requeue_list;
unsigned int depth = 0;
LIST_HEAD(list);
@@ -2721,10 +2720,10 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
this_hctx = rq->mq_hctx;
this_ctx = rq->mq_ctx;
} else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx) {
- rq_list_add(&requeue_list, rq);
+ rq_list_add_tail(&requeue_lastp, rq);
continue;
}
- list_add_tail(&rq->queuelist, &list);
+ list_add(&rq->queuelist, &list);
depth++;
} while (!rq_list_empty(plug->mq_list));
@@ -2875,16 +2874,15 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
if (!plug)
return NULL;
+ rq = rq_list_peek(&plug->cached_rq);
+ if (!rq || rq->q != q)
+ return NULL;
if (blk_mq_attempt_bio_merge(q, *bio, nsegs)) {
*bio = NULL;
return NULL;
}
- rq = rq_list_peek(&plug->cached_rq);
- if (!rq || rq->q != q)
- return NULL;
-
type = blk_mq_get_hctx_type((*bio)->bi_opf);
hctx_type = rq->mq_hctx->type;
if (type != hctx_type &&
diff --git a/block/blk-mq.h b/block/blk-mq.h
index f882677ff106..e876584d3516 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -433,12 +433,13 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
#define __blk_mq_run_dispatch_ops(q, check_sleep, dispatch_ops) \
do { \
if ((q)->tag_set->flags & BLK_MQ_F_BLOCKING) { \
+ struct blk_mq_tag_set *__tag_set = (q)->tag_set; \
int srcu_idx; \
\
might_sleep_if(check_sleep); \
- srcu_idx = srcu_read_lock((q)->tag_set->srcu); \
+ srcu_idx = srcu_read_lock(__tag_set->srcu); \
(dispatch_ops); \
- srcu_read_unlock((q)->tag_set->srcu, srcu_idx); \
+ srcu_read_unlock(__tag_set->srcu, srcu_idx); \
} else { \
rcu_read_lock(); \
(dispatch_ops); \
diff --git a/block/bsg.c b/block/bsg.c
index 30fcc865ef4f..7eca43f33d7f 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -245,7 +245,7 @@ static int __init bsg_init(void)
dev_t devid;
int ret;
- bsg_class = class_create(THIS_MODULE, "bsg");
+ bsg_class = class_create("bsg");
if (IS_ERR(bsg_class))
return PTR_ERR(bsg_class);
bsg_class->devnode = bsg_devnode;
diff --git a/block/genhd.c b/block/genhd.c
index 9fa4a7cd978c..1cb489b927d5 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -363,7 +363,6 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
if (disk->open_partitions)
return -EBUSY;
- set_bit(GD_NEED_PART_SCAN, &disk->state);
/*
* If the device is opened exclusively by current thread already, it's
* safe to scan partitons, otherwise, use bd_prepare_to_claim() to
@@ -376,12 +375,19 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
return ret;
}
+ set_bit(GD_NEED_PART_SCAN, &disk->state);
bdev = blkdev_get_by_dev(disk_devt(disk), mode & ~FMODE_EXCL, NULL);
if (IS_ERR(bdev))
ret = PTR_ERR(bdev);
else
blkdev_put(bdev, mode & ~FMODE_EXCL);
+ /*
+ * If blkdev_get_by_dev() failed early, GD_NEED_PART_SCAN is still set,
+ * and this will cause that re-assemble partitioned raid device will
+ * creat partition for underlying disk.
+ */
+ clear_bit(GD_NEED_PART_SCAN, &disk->state);
if (!(mode & FMODE_EXCL))
bd_abort_claiming(disk->part0, disk_scan_partitions);
return ret;
@@ -464,12 +470,10 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
if (ret)
goto out_device_del;
- if (!sysfs_deprecated) {
- ret = sysfs_create_link(block_depr, &ddev->kobj,
- kobject_name(&ddev->kobj));
- if (ret)
- goto out_device_del;
- }
+ ret = sysfs_create_link(block_depr, &ddev->kobj,
+ kobject_name(&ddev->kobj));
+ if (ret)
+ goto out_device_del;
/*
* avoid probable deadlock caused by allocating memory with
@@ -546,8 +550,7 @@ out_put_slave_dir:
out_put_holder_dir:
kobject_put(disk->part0->bd_holder_dir);
out_del_block_link:
- if (!sysfs_deprecated)
- sysfs_remove_link(block_depr, dev_name(ddev));
+ sysfs_remove_link(block_depr, dev_name(ddev));
out_device_del:
device_del(ddev);
out_free_ext_minor:
@@ -648,8 +651,7 @@ void del_gendisk(struct gendisk *disk)
part_stat_set_all(disk->part0, 0);
disk->part0->bd_stamp = 0;
- if (!sysfs_deprecated)
- sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
+ sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
device_del(disk_to_dev(disk));
@@ -894,7 +896,6 @@ static int __init genhd_device_init(void)
{
int error;
- block_class.dev_kobj = sysfs_dev_block_kobj;
error = class_register(&block_class);
if (unlikely(error))
return error;
@@ -903,8 +904,7 @@ static int __init genhd_device_init(void)
register_blkdev(BLOCK_EXT_MAJOR, "blkext");
/* create top-level block dir */
- if (!sysfs_deprecated)
- block_depr = kobject_create_and_add("block", NULL);
+ block_depr = kobject_create_and_add("block", NULL);
return 0;
}