diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-04-26 21:52:58 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-04-26 21:52:58 +0200 |
commit | 9dd6956b38923dc1b7b349ca1eee3c0bb1f0163a (patch) | |
tree | c70bb7d65a50a51686378b6113a8663e0e60d9b8 /block/blk-cgroup.c | |
parent | Merge tag 'for-6.4/io_uring-2023-04-21' of git://git.kernel.dk/linux (diff) | |
parent | nbd: fix incomplete validation of ioctl arg (diff) | |
download | linux-9dd6956b38923dc1b7b349ca1eee3c0bb1f0163a.tar.xz linux-9dd6956b38923dc1b7b349ca1eee3c0bb1f0163a.zip |
Merge tag 'for-6.4/block-2023-04-21' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- drbd patches, bringing us closer to unifying the out-of-tree version
and the in tree one (Andreas, Christoph)
- support for auto-quiesce for the s390 dasd driver (Stefan)
- MD pull request via Song:
- md/bitmap: Optimal last page size (Jon Derrick)
- Various raid10 fixes (Yu Kuai, Li Nan)
- md: add error_handlers for raid0 and linear (Mariusz Tkaczyk)
- NVMe pull request via Christoph:
- Drop redundant pci_enable_pcie_error_reporting (Bjorn Helgaas)
- Validate nvmet module parameters (Chaitanya Kulkarni)
- Fence TCP socket on receive error (Chris Leech)
- Fix async event trace event (Keith Busch)
- Minor cleanups (Chaitanya Kulkarni, zhenwei pi)
- Fix and cleanup nvmet Identify handling (Damien Le Moal,
Christoph Hellwig)
- Fix double blk_mq_complete_request race in the timeout handler
(Lei Yin)
- Fix irq locking in nvme-fcloop (Ming Lei)
- Remove queue mapping helper for rdma devices (Sagi Grimberg)
- use structured request attribute checks for nbd (Jakub)
- fix blk-crypto race conditions between keyslot management (Eric)
- add sed-opal support for reading read locking range attributes
(Ondrej)
- make fault injection configurable for null_blk (Akinobu)
- clean up the request insertion API (Christoph)
- clean up the queue running API (Christoph)
- blkg config helper cleanups (Tejun)
- lazy init support for blk-iolatency (Tejun)
- various fixes and tweaks to ublk (Ming)
- remove hybrid polling. It hasn't really been useful since we got
async polled IO support, and these days we don't support sync polled
IO at all (Keith)
- misc fixes, cleanups, improvements (Zhong, Ondrej, Colin, Chengming,
Chaitanya, me)
* tag 'for-6.4/block-2023-04-21' of git://git.kernel.dk/linux: (118 commits)
nbd: fix incomplete validation of ioctl arg
ublk: don't return 0 in case of any failure
sed-opal: geometry feature reporting command
null_blk: Always check queue mode setting from configfs
block: ublk: switch to ioctl command encoding
blk-mq: fix the blk_mq_add_to_requeue_list call in blk_kick_flush
block, bfq: Fix division by zero error on zero wsum
fault-inject: fix build error when FAULT_INJECTION_CONFIGFS=y and CONFIGFS_FS=m
block: store bdev->bd_disk->fops->submit_bio state in bdev
block: re-arrange the struct block_device fields for better layout
md/raid5: remove unused working_disks variable
md/raid10: don't call bio_start_io_acct twice for bio which experienced read error
md/raid10: fix memleak of md thread
md/raid10: fix memleak for 'conf->bio_split'
md/raid10: fix leak of 'r10bio->remaining' for recovery
md/raid10: don't BUG_ON() in raise_barrier()
md: fix soft lockup in status_resync
md: add error_handlers for raid0 and linear
md: Use optimal I/O size for last bitmap page
md: Fix types in sb writer
...
Diffstat (limited to 'block/blk-cgroup.c')
-rw-r--r-- | block/blk-cgroup.c | 147 |
1 files changed, 69 insertions, 78 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 18c922579719..ff45649361e7 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -33,7 +33,6 @@ #include "blk-cgroup.h" #include "blk-ioprio.h" #include "blk-throttle.h" -#include "blk-rq-qos.h" /* * blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation. @@ -693,69 +692,93 @@ u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v) EXPORT_SYMBOL_GPL(__blkg_prfill_u64); /** - * blkcg_conf_open_bdev - parse and open bdev for per-blkg config update - * @inputp: input string pointer + * blkg_conf_init - initialize a blkg_conf_ctx + * @ctx: blkg_conf_ctx to initialize + * @input: input string * - * Parse the device node prefix part, MAJ:MIN, of per-blkg config update - * from @input and get and return the matching bdev. *@inputp is - * updated to point past the device node prefix. Returns an ERR_PTR() - * value on error. + * Initialize @ctx which can be used to parse blkg config input string @input. + * Once initialized, @ctx can be used with blkg_conf_open_bdev() and + * blkg_conf_prep(), and must be cleaned up with blkg_conf_exit(). + */ +void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input) +{ + *ctx = (struct blkg_conf_ctx){ .input = input }; +} +EXPORT_SYMBOL_GPL(blkg_conf_init); + +/** + * blkg_conf_open_bdev - parse and open bdev for per-blkg config update + * @ctx: blkg_conf_ctx initialized with blkg_conf_init() * - * Use this function iff blkg_conf_prep() can't be used for some reason. + * Parse the device node prefix part, MAJ:MIN, of per-blkg config update from + * @ctx->input and get and store the matching bdev in @ctx->bdev. @ctx->body is + * set to point past the device node prefix. + * + * This function may be called multiple times on @ctx and the extra calls become + * NOOPs. blkg_conf_prep() implicitly calls this function. Use this function + * explicitly if bdev access is needed without resolving the blkcg / policy part + * of @ctx->input. Returns -errno on error. */ -struct block_device *blkcg_conf_open_bdev(char **inputp) +int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx) { - char *input = *inputp; + char *input = ctx->input; unsigned int major, minor; struct block_device *bdev; int key_len; + if (ctx->bdev) + return 0; + if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2) - return ERR_PTR(-EINVAL); + return -EINVAL; input += key_len; if (!isspace(*input)) - return ERR_PTR(-EINVAL); + return -EINVAL; input = skip_spaces(input); bdev = blkdev_get_no_open(MKDEV(major, minor)); if (!bdev) - return ERR_PTR(-ENODEV); + return -ENODEV; if (bdev_is_partition(bdev)) { blkdev_put_no_open(bdev); - return ERR_PTR(-ENODEV); + return -ENODEV; } - *inputp = input; - return bdev; + ctx->body = input; + ctx->bdev = bdev; + return 0; } /** * blkg_conf_prep - parse and prepare for per-blkg config update * @blkcg: target block cgroup * @pol: target policy - * @input: input string - * @ctx: blkg_conf_ctx to be filled + * @ctx: blkg_conf_ctx initialized with blkg_conf_init() + * + * Parse per-blkg config update from @ctx->input and initialize @ctx + * accordingly. On success, @ctx->body points to the part of @ctx->input + * following MAJ:MIN, @ctx->bdev points to the target block device and + * @ctx->blkg to the blkg being configured. * - * Parse per-blkg config update from @input and initialize @ctx with the - * result. @ctx->blkg points to the blkg to be updated and @ctx->body the - * part of @input following MAJ:MIN. This function returns with RCU read - * lock and queue lock held and must be paired with blkg_conf_finish(). + * blkg_conf_open_bdev() may be called on @ctx beforehand. On success, this + * function returns with queue lock held and must be followed by + * blkg_conf_exit(). */ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, - char *input, struct blkg_conf_ctx *ctx) - __acquires(rcu) __acquires(&bdev->bd_queue->queue_lock) + struct blkg_conf_ctx *ctx) + __acquires(&bdev->bd_queue->queue_lock) { - struct block_device *bdev; struct gendisk *disk; struct request_queue *q; struct blkcg_gq *blkg; int ret; - bdev = blkcg_conf_open_bdev(&input); - if (IS_ERR(bdev)) - return PTR_ERR(bdev); - disk = bdev->bd_disk; + ret = blkg_conf_open_bdev(ctx); + if (ret) + return ret; + + disk = ctx->bdev->bd_disk; q = disk->queue; /* @@ -766,7 +789,6 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, if (ret) goto fail; - rcu_read_lock(); spin_lock_irq(&q->queue_lock); if (!blkcg_policy_enabled(q, pol)) { @@ -795,7 +817,6 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, /* Drop locks to do new blkg allocation with GFP_KERNEL. */ spin_unlock_irq(&q->queue_lock); - rcu_read_unlock(); new_blkg = blkg_alloc(pos, disk, GFP_KERNEL); if (unlikely(!new_blkg)) { @@ -809,7 +830,6 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, goto fail_exit_queue; } - rcu_read_lock(); spin_lock_irq(&q->queue_lock); if (!blkcg_policy_enabled(q, pol)) { @@ -836,20 +856,16 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, } success: blk_queue_exit(q); - ctx->bdev = bdev; ctx->blkg = blkg; - ctx->body = input; return 0; fail_preloaded: radix_tree_preload_end(); fail_unlock: spin_unlock_irq(&q->queue_lock); - rcu_read_unlock(); fail_exit_queue: blk_queue_exit(q); fail: - blkdev_put_no_open(bdev); /* * If queue was bypassing, we should retry. Do so after a * short msleep(). It isn't strictly necessary but queue @@ -865,20 +881,27 @@ fail: EXPORT_SYMBOL_GPL(blkg_conf_prep); /** - * blkg_conf_finish - finish up per-blkg config update - * @ctx: blkg_conf_ctx initialized by blkg_conf_prep() + * blkg_conf_exit - clean up per-blkg config update + * @ctx: blkg_conf_ctx initialized with blkg_conf_init() * - * Finish up after per-blkg config update. This function must be paired - * with blkg_conf_prep(). + * Clean up after per-blkg config update. This function must be called on all + * blkg_conf_ctx's initialized with blkg_conf_init(). */ -void blkg_conf_finish(struct blkg_conf_ctx *ctx) - __releases(&ctx->bdev->bd_queue->queue_lock) __releases(rcu) +void blkg_conf_exit(struct blkg_conf_ctx *ctx) + __releases(&ctx->bdev->bd_queue->queue_lock) { - spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock); - rcu_read_unlock(); - blkdev_put_no_open(ctx->bdev); + if (ctx->blkg) { + spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock); + ctx->blkg = NULL; + } + + if (ctx->bdev) { + blkdev_put_no_open(ctx->bdev); + ctx->body = NULL; + ctx->bdev = NULL; + } } -EXPORT_SYMBOL_GPL(blkg_conf_finish); +EXPORT_SYMBOL_GPL(blkg_conf_exit); static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src) { @@ -1289,8 +1312,6 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) blkcg->cpd[i] = cpd; cpd->blkcg = blkcg; cpd->plid = i; - if (pol->cpd_init_fn) - pol->cpd_init_fn(cpd); } spin_lock_init(&blkcg->lock); @@ -1368,14 +1389,8 @@ int blkcg_init_disk(struct gendisk *disk) if (ret) goto err_ioprio_exit; - ret = blk_iolatency_init(disk); - if (ret) - goto err_throtl_exit; - return 0; -err_throtl_exit: - blk_throtl_exit(disk); err_ioprio_exit: blk_ioprio_exit(disk); err_destroy_all: @@ -1391,30 +1406,9 @@ err_unlock: void blkcg_exit_disk(struct gendisk *disk) { blkg_destroy_all(disk); - rq_qos_exit(disk->queue); blk_throtl_exit(disk); } -static void blkcg_bind(struct cgroup_subsys_state *root_css) -{ - int i; - - mutex_lock(&blkcg_pol_mutex); - - for (i = 0; i < BLKCG_MAX_POLS; i++) { - struct blkcg_policy *pol = blkcg_policy[i]; - struct blkcg *blkcg; - - if (!pol || !pol->cpd_bind_fn) - continue; - - list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) - if (blkcg->cpd[pol->plid]) - pol->cpd_bind_fn(blkcg->cpd[pol->plid]); - } - mutex_unlock(&blkcg_pol_mutex); -} - static void blkcg_exit(struct task_struct *tsk) { if (tsk->throttle_disk) @@ -1428,7 +1422,6 @@ struct cgroup_subsys io_cgrp_subsys = { .css_offline = blkcg_css_offline, .css_free = blkcg_css_free, .css_rstat_flush = blkcg_rstat_flush, - .bind = blkcg_bind, .dfl_cftypes = blkcg_files, .legacy_cftypes = blkcg_legacy_files, .legacy_name = "blkio", @@ -1666,8 +1659,6 @@ int blkcg_policy_register(struct blkcg_policy *pol) blkcg->cpd[pol->plid] = cpd; cpd->blkcg = blkcg; cpd->plid = pol->plid; - if (pol->cpd_init_fn) - pol->cpd_init_fn(cpd); } } |