summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-05-15 03:34:19 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2024-05-15 03:34:19 +0200
commit4f8b6f25eb1e51febd426da764a0b0ea652ad238 (patch)
tree8a737e8d167786e06c3edd3d0eb966c7ca162daa
parentMerge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi (diff)
parentdm-delay: remove timer_lock (diff)
downloadlinux-4f8b6f25eb1e51febd426da764a0b0ea652ad238.tar.xz
linux-4f8b6f25eb1e51febd426da764a0b0ea652ad238.zip
Merge tag 'for-6.10/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer: - Add a dm-crypt optional "high_priority" flag that enables the crypt workqueues to use WQ_HIGHPRI. - Export dm-crypt workqueues via sysfs (by enabling WQ_SYSFS) to allow for improved visibility and controls over IO and crypt workqueues. - Fix dm-crypt to no longer constrain max_segment_size to PAGE_SIZE. This limit isn't needed given that the block core provides late bio splitting if bio exceeds underlying limits (e.g. max_segment_size). - Fix dm-crypt crypt_queue's use of WQ_UNBOUND to not use WQ_CPU_INTENSIVE because it is meaningless with WQ_UNBOUND. - Fix various issues with dm-delay target (ranging from a resource teardown fix, a fix for hung task when using kthread mode, and other improvements that followed from code inspection). * tag 'for-6.10/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm-delay: remove timer_lock dm-delay: change locking to avoid contention dm-delay: fix max_delay calculations dm-delay: fix hung task introduced by kthread mode dm-delay: fix workqueue delay_timer race dm-crypt: don't set WQ_CPU_INTENSIVE for WQ_UNBOUND crypt_queue dm: use queue_limits_set dm-crypt: stop constraining max_segment_size to PAGE_SIZE dm-crypt: export sysfs of all workqueues dm-crypt: add the optional "high_priority" flag
-rw-r--r--Documentation/admin-guide/device-mapper/dm-crypt.rst5
-rw-r--r--drivers/md/dm-crypt.c73
-rw-r--r--drivers/md/dm-delay.c60
-rw-r--r--drivers/md/dm-table.c27
4 files changed, 97 insertions, 68 deletions
diff --git a/Documentation/admin-guide/device-mapper/dm-crypt.rst b/Documentation/admin-guide/device-mapper/dm-crypt.rst
index aa2d04d95df6..41f5f57f00eb 100644
--- a/Documentation/admin-guide/device-mapper/dm-crypt.rst
+++ b/Documentation/admin-guide/device-mapper/dm-crypt.rst
@@ -113,6 +113,11 @@ same_cpu_crypt
The default is to use an unbound workqueue so that encryption work
is automatically balanced between available CPUs.
+high_priority
+ Set dm-crypt workqueues and the writer thread to high priority. This
+ improves throughput and latency of dm-crypt while degrading general
+ responsiveness of the system.
+
submit_from_crypt_cpus
Disable offloading writes to a separate thread after encryption.
There are some situations where offloading write bios from the
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 9a74c6316c5d..1b7a97cc3779 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -47,6 +47,8 @@
#define DM_MSG_PREFIX "crypt"
+static DEFINE_IDA(workqueue_ida);
+
/*
* context holding the current state of a multi-part conversion
*/
@@ -137,9 +139,9 @@ struct iv_elephant_private {
* and encrypts / decrypts at the same time.
*/
enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID,
- DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD,
- DM_CRYPT_NO_READ_WORKQUEUE, DM_CRYPT_NO_WRITE_WORKQUEUE,
- DM_CRYPT_WRITE_INLINE };
+ DM_CRYPT_SAME_CPU, DM_CRYPT_HIGH_PRIORITY,
+ DM_CRYPT_NO_OFFLOAD, DM_CRYPT_NO_READ_WORKQUEUE,
+ DM_CRYPT_NO_WRITE_WORKQUEUE, DM_CRYPT_WRITE_INLINE };
enum cipher_flags {
CRYPT_MODE_INTEGRITY_AEAD, /* Use authenticated mode for cipher */
@@ -184,6 +186,7 @@ struct crypt_config {
struct crypto_aead **tfms_aead;
} cipher_tfm;
unsigned int tfms_count;
+ int workqueue_id;
unsigned long cipher_flags;
/*
@@ -1653,8 +1656,8 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone);
/*
* Generate a new unfragmented bio with the given size
- * This should never violate the device limitations (but only because
- * max_segment_size is being constrained to PAGE_SIZE).
+ * This should never violate the device limitations (but if it did then block
+ * core should split the bio as needed).
*
* This function may be called concurrently. If we allocate from the mempool
* concurrently, there is a possibility of deadlock. For example, if we have
@@ -2771,6 +2774,9 @@ static void crypt_dtr(struct dm_target *ti)
if (cc->crypt_queue)
destroy_workqueue(cc->crypt_queue);
+ if (cc->workqueue_id)
+ ida_free(&workqueue_ida, cc->workqueue_id);
+
crypt_free_tfms(cc);
bioset_exit(&cc->bs);
@@ -3134,7 +3140,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar
struct crypt_config *cc = ti->private;
struct dm_arg_set as;
static const struct dm_arg _args[] = {
- {0, 8, "Invalid number of feature args"},
+ {0, 9, "Invalid number of feature args"},
};
unsigned int opt_params, val;
const char *opt_string, *sval;
@@ -3161,6 +3167,8 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar
else if (!strcasecmp(opt_string, "same_cpu_crypt"))
set_bit(DM_CRYPT_SAME_CPU, &cc->flags);
+ else if (!strcasecmp(opt_string, "high_priority"))
+ set_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags);
else if (!strcasecmp(opt_string, "submit_from_crypt_cpus"))
set_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
@@ -3230,8 +3238,9 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
struct crypt_config *cc;
const char *devname = dm_table_device_name(ti->table);
- int key_size;
+ int key_size, wq_id;
unsigned int align_mask;
+ unsigned int common_wq_flags;
unsigned long long tmpll;
int ret;
size_t iv_size_padding, additional_req_size;
@@ -3398,20 +3407,38 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
cc->tag_pool_max_sectors <<= cc->sector_shift;
}
+ wq_id = ida_alloc_min(&workqueue_ida, 1, GFP_KERNEL);
+ if (wq_id < 0) {
+ ti->error = "Couldn't get workqueue id";
+ ret = wq_id;
+ goto bad;
+ }
+ cc->workqueue_id = wq_id;
+
ret = -ENOMEM;
- cc->io_queue = alloc_workqueue("kcryptd_io/%s", WQ_MEM_RECLAIM, 1, devname);
+ common_wq_flags = WQ_MEM_RECLAIM | WQ_SYSFS;
+ if (test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags))
+ common_wq_flags |= WQ_HIGHPRI;
+
+ cc->io_queue = alloc_workqueue("kcryptd_io-%s-%d", common_wq_flags, 1, devname, wq_id);
if (!cc->io_queue) {
ti->error = "Couldn't create kcryptd io queue";
goto bad;
}
- if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
- cc->crypt_queue = alloc_workqueue("kcryptd/%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
- 1, devname);
- else
- cc->crypt_queue = alloc_workqueue("kcryptd/%s",
- WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
- num_online_cpus(), devname);
+ if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags)) {
+ cc->crypt_queue = alloc_workqueue("kcryptd-%s-%d",
+ common_wq_flags | WQ_CPU_INTENSIVE,
+ 1, devname, wq_id);
+ } else {
+ /*
+ * While crypt_queue is certainly CPU intensive, the use of
+ * WQ_CPU_INTENSIVE is meaningless with WQ_UNBOUND.
+ */
+ cc->crypt_queue = alloc_workqueue("kcryptd-%s-%d",
+ common_wq_flags | WQ_UNBOUND,
+ num_online_cpus(), devname, wq_id);
+ }
if (!cc->crypt_queue) {
ti->error = "Couldn't create kcryptd queue";
goto bad;
@@ -3427,6 +3454,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->error = "Couldn't spawn write thread";
goto bad;
}
+ if (test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags))
+ set_user_nice(cc->write_thread, MIN_NICE);
ti->num_flush_bios = 1;
ti->limit_swap_bios = true;
@@ -3547,6 +3576,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
num_feature_args += !!ti->num_discard_bios;
num_feature_args += test_bit(DM_CRYPT_SAME_CPU, &cc->flags);
+ num_feature_args += test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags);
num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
num_feature_args += test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags);
num_feature_args += test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags);
@@ -3560,6 +3590,8 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
DMEMIT(" allow_discards");
if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
DMEMIT(" same_cpu_crypt");
+ if (test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags))
+ DMEMIT(" high_priority");
if (test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags))
DMEMIT(" submit_from_crypt_cpus");
if (test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags))
@@ -3579,6 +3611,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
DMEMIT_TARGET_NAME_VERSION(ti->type);
DMEMIT(",allow_discards=%c", ti->num_discard_bios ? 'y' : 'n');
DMEMIT(",same_cpu_crypt=%c", test_bit(DM_CRYPT_SAME_CPU, &cc->flags) ? 'y' : 'n');
+ DMEMIT(",high_priority=%c", test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags) ? 'y' : 'n');
DMEMIT(",submit_from_crypt_cpus=%c", test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags) ?
'y' : 'n');
DMEMIT(",no_read_workqueue=%c", test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags) ?
@@ -3688,14 +3721,6 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
{
struct crypt_config *cc = ti->private;
- /*
- * Unfortunate constraint that is required to avoid the potential
- * for exceeding underlying device's max_segments limits -- due to
- * crypt_alloc_buffer() possibly allocating pages for the encryption
- * bio that are not as physically contiguous as the original bio.
- */
- limits->max_segment_size = PAGE_SIZE;
-
limits->logical_block_size =
max_t(unsigned int, limits->logical_block_size, cc->sector_size);
limits->physical_block_size =
@@ -3706,7 +3731,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type crypt_target = {
.name = "crypt",
- .version = {1, 25, 0},
+ .version = {1, 26, 0},
.module = THIS_MODULE,
.ctr = crypt_ctr,
.dtr = crypt_dtr,
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 5eabdb06c649..08f6387620c1 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -28,7 +28,8 @@ struct delay_class {
struct delay_c {
struct timer_list delay_timer;
- struct mutex timer_lock;
+ struct mutex process_bios_lock; /* hold while removing bios to be processed from list */
+ spinlock_t delayed_bios_lock; /* hold on all accesses to delayed_bios list */
struct workqueue_struct *kdelayd_wq;
struct work_struct flush_expired_bios;
struct list_head delayed_bios;
@@ -49,8 +50,6 @@ struct dm_delay_info {
unsigned long expires;
};
-static DEFINE_MUTEX(delayed_bios_lock);
-
static void handle_delayed_timer(struct timer_list *t)
{
struct delay_c *dc = from_timer(dc, t, delay_timer);
@@ -60,12 +59,7 @@ static void handle_delayed_timer(struct timer_list *t)
static void queue_timeout(struct delay_c *dc, unsigned long expires)
{
- mutex_lock(&dc->timer_lock);
-
- if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires)
- mod_timer(&dc->delay_timer, expires);
-
- mutex_unlock(&dc->timer_lock);
+ timer_reduce(&dc->delay_timer, expires);
}
static inline bool delay_is_fast(struct delay_c *dc)
@@ -89,12 +83,16 @@ static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
{
struct dm_delay_info *delayed, *next;
struct bio_list flush_bio_list;
+ LIST_HEAD(local_list);
unsigned long next_expires = 0;
bool start_timer = false;
bio_list_init(&flush_bio_list);
- mutex_lock(&delayed_bios_lock);
- list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
+ mutex_lock(&dc->process_bios_lock);
+ spin_lock(&dc->delayed_bios_lock);
+ list_replace_init(&dc->delayed_bios, &local_list);
+ spin_unlock(&dc->delayed_bios_lock);
+ list_for_each_entry_safe(delayed, next, &local_list, list) {
cond_resched();
if (flush_all || time_after_eq(jiffies, delayed->expires)) {
struct bio *bio = dm_bio_from_per_bio_data(delayed,
@@ -114,7 +112,10 @@ static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
}
}
}
- mutex_unlock(&delayed_bios_lock);
+ spin_lock(&dc->delayed_bios_lock);
+ list_splice(&local_list, &dc->delayed_bios);
+ spin_unlock(&dc->delayed_bios_lock);
+ mutex_unlock(&dc->process_bios_lock);
if (start_timer)
queue_timeout(dc, next_expires);
@@ -128,13 +129,13 @@ static int flush_worker_fn(void *data)
while (!kthread_should_stop()) {
flush_delayed_bios(dc, false);
- mutex_lock(&delayed_bios_lock);
+ spin_lock(&dc->delayed_bios_lock);
if (unlikely(list_empty(&dc->delayed_bios))) {
set_current_state(TASK_INTERRUPTIBLE);
- mutex_unlock(&delayed_bios_lock);
+ spin_unlock(&dc->delayed_bios_lock);
schedule();
} else {
- mutex_unlock(&delayed_bios_lock);
+ spin_unlock(&dc->delayed_bios_lock);
cond_resched();
}
}
@@ -154,8 +155,10 @@ static void delay_dtr(struct dm_target *ti)
{
struct delay_c *dc = ti->private;
- if (dc->kdelayd_wq)
+ if (dc->kdelayd_wq) {
+ timer_shutdown_sync(&dc->delay_timer);
destroy_workqueue(dc->kdelayd_wq);
+ }
if (dc->read.dev)
dm_put_device(ti, dc->read.dev);
@@ -166,7 +169,7 @@ static void delay_dtr(struct dm_target *ti)
if (dc->worker)
kthread_stop(dc->worker);
- mutex_destroy(&dc->timer_lock);
+ mutex_destroy(&dc->process_bios_lock);
kfree(dc);
}
@@ -224,7 +227,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->private = dc;
INIT_LIST_HEAD(&dc->delayed_bios);
- mutex_init(&dc->timer_lock);
+ mutex_init(&dc->process_bios_lock);
+ spin_lock_init(&dc->delayed_bios_lock);
dc->may_delay = true;
dc->argc = argc;
@@ -240,19 +244,18 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ret = delay_class_ctr(ti, &dc->flush, argv);
if (ret)
goto bad;
- max_delay = max(max_delay, dc->write.delay);
- max_delay = max(max_delay, dc->flush.delay);
goto out;
}
ret = delay_class_ctr(ti, &dc->write, argv + 3);
if (ret)
goto bad;
+ max_delay = max(max_delay, dc->write.delay);
+
if (argc == 6) {
ret = delay_class_ctr(ti, &dc->flush, argv + 3);
if (ret)
goto bad;
- max_delay = max(max_delay, dc->flush.delay);
goto out;
}
@@ -267,8 +270,7 @@ out:
* In case of small requested delays, use kthread instead of
* timers and workqueue to achieve better latency.
*/
- dc->worker = kthread_create(&flush_worker_fn, dc,
- "dm-delay-flush-worker");
+ dc->worker = kthread_run(&flush_worker_fn, dc, "dm-delay-flush-worker");
if (IS_ERR(dc->worker)) {
ret = PTR_ERR(dc->worker);
dc->worker = NULL;
@@ -309,14 +311,14 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
delayed->context = dc;
delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);
- mutex_lock(&delayed_bios_lock);
+ spin_lock(&dc->delayed_bios_lock);
if (unlikely(!dc->may_delay)) {
- mutex_unlock(&delayed_bios_lock);
+ spin_unlock(&dc->delayed_bios_lock);
return DM_MAPIO_REMAPPED;
}
c->ops++;
list_add_tail(&delayed->list, &dc->delayed_bios);
- mutex_unlock(&delayed_bios_lock);
+ spin_unlock(&dc->delayed_bios_lock);
if (delay_is_fast(dc))
wake_up_process(dc->worker);
@@ -330,12 +332,12 @@ static void delay_presuspend(struct dm_target *ti)
{
struct delay_c *dc = ti->private;
- mutex_lock(&delayed_bios_lock);
+ spin_lock(&dc->delayed_bios_lock);
dc->may_delay = false;
- mutex_unlock(&delayed_bios_lock);
+ spin_unlock(&dc->delayed_bios_lock);
if (!delay_is_fast(dc))
- del_timer_sync(&dc->delay_timer);
+ timer_delete(&dc->delay_timer);
flush_delayed_bios(dc, true);
}
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 2c6fbd87363f..cc66a27c363a 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1963,26 +1963,27 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
bool wc = false, fua = false;
int r;
- /*
- * Copy table's limits to the DM device's request_queue
- */
- q->limits = *limits;
-
if (dm_table_supports_nowait(t))
blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q);
else
blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q);
if (!dm_table_supports_discards(t)) {
- q->limits.max_discard_sectors = 0;
- q->limits.max_hw_discard_sectors = 0;
- q->limits.discard_granularity = 0;
- q->limits.discard_alignment = 0;
- q->limits.discard_misaligned = 0;
+ limits->max_hw_discard_sectors = 0;
+ limits->discard_granularity = 0;
+ limits->discard_alignment = 0;
+ limits->discard_misaligned = 0;
}
+ if (!dm_table_supports_write_zeroes(t))
+ limits->max_write_zeroes_sectors = 0;
+
if (!dm_table_supports_secure_erase(t))
- q->limits.max_secure_erase_sectors = 0;
+ limits->max_secure_erase_sectors = 0;
+
+ r = queue_limits_set(q, limits);
+ if (r)
+ return r;
if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) {
wc = true;
@@ -2007,9 +2008,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
else
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
- if (!dm_table_supports_write_zeroes(t))
- q->limits.max_write_zeroes_sectors = 0;
-
dm_table_verify_integrity(t);
/*
@@ -2048,7 +2046,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
}
dm_update_crypto_profile(q, t);
- disk_update_readahead(t->md->disk);
/*
* Check for request-based device is left to