summaryrefslogtreecommitdiffstats
path: root/drivers/block/null_blk.h
diff options
context:
space:
mode:
authorDamien Le Moal <damien.lemoal@wdc.com>2020-11-20 02:55:14 +0100
committerJens Axboe <axboe@kernel.dk>2020-12-08 01:36:03 +0100
commit2b8b7ed7f3fc2b1536a0add3941ae159529d23bd (patch)
treecf22ceaac1c8766b333359129a9cac8a33bb9202 /drivers/block/null_blk.h
parentblock: Align max_hw_sectors to logical blocksize (diff)
downloadlinux-2b8b7ed7f3fc2b1536a0add3941ae159529d23bd.tar.xz
linux-2b8b7ed7f3fc2b1536a0add3941ae159529d23bd.zip
null_blk: improve zone locking
With memory backing disabled, using a single spinlock for protecting zone information and zone resource management prevents the parallel execution on multiple queue of IO requests to different zones. Furthermore, regardless of the use of memory backing, if a null_blk device is created without limits on the number of open and active zones, accounting for zone resource management is not necessary. >From these observations, zone locking is changed as follows to improve performance: 1) the zone_lock spinlock is renamed zone_res_lock and used only if zone resource management is necessary, that is, if either zone_max_open or zone_max_active are not 0. This is indicated using the new boolean need_zone_res_mgmt in the nullb_device structure. null_zone_write() is modified to reduce the amount of code executed with the zone_res_lock spinlock held. 2) With memory backing disabled, per zone locking is changed to a spinlock per zone. 3) Introduce the structure nullb_zone to replace the use of struct blk_zone for zone information. This new structure includes a union of a spinlock and a mutex for zone locking. The spinlock is used when memory backing is disabled and the mutex is used with memory backing. With these changes, fio performance with zonemode=zbd for 4K random read and random write on a dual socket (24 cores per socket) machine using the none schedulder is as follows: before patch: write (psync x 96 jobs) = 465 KIOPS read (libaio@qd=8 x 96 jobs) = 1361 KIOPS after patch: write (psync x 96 jobs) = 456 KIOPS read (libaio@qd=8 x 96 jobs) = 4096 KIOPS Write performance remains mostly unchanged but read performance is three times higher. Performance when using the mq-deadline scheduler is not changed by this patch as mq-deadline becomes the bottleneck for a multi-queue device. Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'drivers/block/null_blk.h')
-rw-r--r--drivers/block/null_blk.h28
1 files changed, 25 insertions, 3 deletions
diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
index c24d9b5ad81a..14546ead1d66 100644
--- a/drivers/block/null_blk.h
+++ b/drivers/block/null_blk.h
@@ -12,6 +12,8 @@
#include <linux/configfs.h>
#include <linux/badblocks.h>
#include <linux/fault-inject.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
struct nullb_cmd {
struct request *rq;
@@ -32,6 +34,26 @@ struct nullb_queue {
struct nullb_cmd *cmds;
};
+struct nullb_zone {
+ /*
+ * Zone lock to prevent concurrent modification of a zone write
+ * pointer position and condition: with memory backing, a write
+ * command execution may sleep on memory allocation. For this case,
+ * use mutex as the zone lock. Otherwise, use the spinlock for
+ * locking the zone.
+ */
+ union {
+ spinlock_t spinlock;
+ struct mutex mutex;
+ };
+ enum blk_zone_type type;
+ enum blk_zone_cond cond;
+ sector_t start;
+ sector_t wp;
+ unsigned int len;
+ unsigned int capacity;
+};
+
struct nullb_device {
struct nullb *nullb;
struct config_item item;
@@ -45,10 +67,10 @@ struct nullb_device {
unsigned int nr_zones_imp_open;
unsigned int nr_zones_exp_open;
unsigned int nr_zones_closed;
- struct blk_zone *zones;
+ struct nullb_zone *zones;
sector_t zone_size_sects;
- spinlock_t zone_lock;
- unsigned long *zone_locks;
+ bool need_zone_res_mgmt;
+ spinlock_t zone_res_lock;
unsigned long size; /* device size in MB */
unsigned long completion_nsec; /* time in ns to complete a request */