summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-07-15 23:20:22 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2024-07-15 23:20:22 +0200
commit3e7819886281e077e82006fe4804b0d6b0f5643b (patch)
tree40766af623d8a1dde0edaee8b6abc496efbcc615 /drivers
parentMerge tag 'for-6.11/io_uring-20240714' of git://git.kernel.dk/linux (diff)
parentfloppy: add missing MODULE_DESCRIPTION() macro (diff)
downloadlinux-3e7819886281e077e82006fe4804b0d6b0f5643b.tar.xz
linux-3e7819886281e077e82006fe4804b0d6b0f5643b.zip
Merge tag 'for-6.11/block-20240710' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: - NVMe updates via Keith: - Device initialization memory leak fixes (Keith) - More constants defined (Weiwen) - Target debugfs support (Hannes) - PCIe subsystem reset enhancements (Keith) - Queue-depth multipath policy (Redhat and PureStorage) - Implement get_unique_id (Christoph) - Authentication error fixes (Gaosheng) - MD updates via Song - sync_action fix and refactoring (Yu Kuai) - Various small fixes (Christoph Hellwig, Li Nan, and Ofir Gal, Yu Kuai, Benjamin Marzinski, Christophe JAILLET, Yang Li) - Fix loop detach/open race (Gulam) - Fix lower control limit for blk-throttle (Yu) - Add module descriptions to various drivers (Jeff) - Add support for atomic writes for block devices, and statx reporting for same. Includes SCSI and NVMe (John, Prasad, Alan) - Add IO priority information to block trace points (Dongliang) - Various zone improvements and tweaks (Damien) - mq-deadline tag reservation improvements (Bart) - Ignore direct reclaim swap writes in writeback throttling (Baokun) - Block integrity improvements and fixes (Anuj) - Add basic support for rust based block drivers. Has a dummy null_blk variant for now (Andreas) - Series converting driver settings to queue limits, and cleanups and fixes related to that (Christoph) - Cleanup for poking too deeply into the bvec internals, in preparation for DMA mapping API changes (Christoph) - Various minor tweaks and fixes (Jiapeng, John, Kanchan, Mikulas, Ming, Zhu, Damien, Christophe, Chaitanya) * tag 'for-6.11/block-20240710' of git://git.kernel.dk/linux: (206 commits) floppy: add missing MODULE_DESCRIPTION() macro loop: add missing MODULE_DESCRIPTION() macro ublk_drv: add missing MODULE_DESCRIPTION() macro xen/blkback: add missing MODULE_DESCRIPTION() macro block/rnbd: Constify struct kobj_type block: take offset into account in blk_bvec_map_sg again block: fix get_max_segment_size() warning loop: Don't bother validating blocksize virtio_blk: Don't bother validating blocksize null_blk: Don't bother validating blocksize block: Validate logical block size in blk_validate_limits() virtio_blk: Fix default logical block size fallback nvmet-auth: fix nvmet_auth hash error handling nvme: implement ->get_unique_id block: pass a phys_addr_t to get_max_segment_size block: add a bvec_phys helper blk-lib: check for kill signal in ioctl BLKZEROOUT block: limit the Write Zeroes to manually writing zeroes fallback block: refacto blkdev_issue_zeroout block: move read-only and supported checks into (__)blkdev_issue_zeroout ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/ata/libata-scsi.c3
-rw-r--r--drivers/ata/pata_macio.c4
-rw-r--r--drivers/block/Kconfig9
-rw-r--r--drivers/block/Makefile3
-rw-r--r--drivers/block/amiflop.c6
-rw-r--r--drivers/block/aoe/aoeblk.c1
-rw-r--r--drivers/block/ataflop.c6
-rw-r--r--drivers/block/brd.c7
-rw-r--r--drivers/block/drbd/drbd_main.c6
-rw-r--r--drivers/block/floppy.c4
-rw-r--r--drivers/block/loop.c184
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c2
-rw-r--r--drivers/block/n64cart.c2
-rw-r--r--drivers/block/nbd.c26
-rw-r--r--drivers/block/null_blk/main.c29
-rw-r--r--drivers/block/null_blk/null_blk.h1
-rw-r--r--drivers/block/null_blk/zoned.c15
-rw-r--r--drivers/block/pktcdvd.c1
-rw-r--r--drivers/block/ps3disk.c8
-rw-r--r--drivers/block/rbd.c15
-rw-r--r--drivers/block/rnbd/rnbd-clt-sysfs.c2
-rw-r--r--drivers/block/rnbd/rnbd-clt.c16
-rw-r--r--drivers/block/rnbd/rnbd-srv-sysfs.c4
-rw-r--r--drivers/block/rnull.rs73
-rw-r--r--drivers/block/sunvdc.c1
-rw-r--r--drivers/block/swim.c5
-rw-r--r--drivers/block/swim3.c5
-rw-r--r--drivers/block/ublk_drv.c22
-rw-r--r--drivers/block/virtio_blk.c68
-rw-r--r--drivers/block/xen-blkback/blkback.c1
-rw-r--r--drivers/block/xen-blkfront.c73
-rw-r--r--drivers/block/z2ram.c1
-rw-r--r--drivers/block/zram/zram_drv.c6
-rw-r--r--drivers/cdrom/cdrom.c1
-rw-r--r--drivers/cdrom/gdrom.c1
-rw-r--r--drivers/md/bcache/super.c13
-rw-r--r--drivers/md/dm-cache-target.c1
-rw-r--r--drivers/md/dm-clone-target.c1
-rw-r--r--drivers/md/dm-core.h1
-rw-r--r--drivers/md/dm-crypt.c4
-rw-r--r--drivers/md/dm-integrity.c47
-rw-r--r--drivers/md/dm-raid.c2
-rw-r--r--drivers/md/dm-table.c351
-rw-r--r--drivers/md/dm-zone.c254
-rw-r--r--drivers/md/dm-zoned-target.c2
-rw-r--r--drivers/md/dm.c174
-rw-r--r--drivers/md/dm.h4
-rw-r--r--drivers/md/md-bitmap.c6
-rw-r--r--drivers/md/md-cluster.c2
-rw-r--r--drivers/md/md.c630
-rw-r--r--drivers/md/md.h136
-rw-r--r--drivers/md/raid0.c30
-rw-r--r--drivers/md/raid1.c34
-rw-r--r--drivers/md/raid10.c23
-rw-r--r--drivers/md/raid5.c114
-rw-r--r--drivers/mmc/core/block.c42
-rw-r--r--drivers/mmc/core/queue.c20
-rw-r--r--drivers/mmc/core/queue.h3
-rw-r--r--drivers/mtd/mtd_blkdevs.c9
-rw-r--r--drivers/nvdimm/btt.c17
-rw-r--r--drivers/nvdimm/pmem.c14
-rw-r--r--drivers/nvme/host/Kconfig1
-rw-r--r--drivers/nvme/host/apple.c32
-rw-r--r--drivers/nvme/host/constants.c2
-rw-r--r--drivers/nvme/host/core.c286
-rw-r--r--drivers/nvme/host/fabrics.c25
-rw-r--r--drivers/nvme/host/fabrics.h1
-rw-r--r--drivers/nvme/host/fault_inject.c2
-rw-r--r--drivers/nvme/host/fc.c55
-rw-r--r--drivers/nvme/host/multipath.c144
-rw-r--r--drivers/nvme/host/nvme.h28
-rw-r--r--drivers/nvme/host/pci.c47
-rw-r--r--drivers/nvme/host/pr.c10
-rw-r--r--drivers/nvme/host/rdma.c34
-rw-r--r--drivers/nvme/host/tcp.c31
-rw-r--r--drivers/nvme/host/zns.c3
-rw-r--r--drivers/nvme/target/Kconfig10
-rw-r--r--drivers/nvme/target/Makefile1
-rw-r--r--drivers/nvme/target/admin-cmd.c24
-rw-r--r--drivers/nvme/target/auth.c14
-rw-r--r--drivers/nvme/target/core.c76
-rw-r--r--drivers/nvme/target/debugfs.c202
-rw-r--r--drivers/nvme/target/debugfs.h42
-rw-r--r--drivers/nvme/target/discovery.c14
-rw-r--r--drivers/nvme/target/fabrics-cmd-auth.c16
-rw-r--r--drivers/nvme/target/fabrics-cmd.c36
-rw-r--r--drivers/nvme/target/fc.c33
-rw-r--r--drivers/nvme/target/fcloop.c11
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c28
-rw-r--r--drivers/nvme/target/loop.c5
-rw-r--r--drivers/nvme/target/nvmet.h12
-rw-r--r--drivers/nvme/target/passthru.c10
-rw-r--r--drivers/nvme/target/rdma.c22
-rw-r--r--drivers/nvme/target/tcp.c18
-rw-r--r--drivers/nvme/target/zns.c30
-rw-r--r--drivers/s390/block/dasd_genhd.c1
-rw-r--r--drivers/s390/block/dcssblk.c2
-rw-r--r--drivers/s390/block/scm_blk.c5
-rw-r--r--drivers/scsi/Kconfig1
-rw-r--r--drivers/scsi/iscsi_tcp.c8
-rw-r--r--drivers/scsi/lpfc/lpfc_nvmet.c11
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_base.c2
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_scsih.c6
-rw-r--r--drivers/scsi/scsi_debug.c588
-rw-r--r--drivers/scsi/scsi_lib.c9
-rw-r--r--drivers/scsi/scsi_trace.c22
-rw-r--r--drivers/scsi/sd.c373
-rw-r--r--drivers/scsi/sd.h31
-rw-r--r--drivers/scsi/sd_dif.c45
-rw-r--r--drivers/scsi/sd_zbc.c52
-rw-r--r--drivers/scsi/sr.c42
-rw-r--r--drivers/target/target_core_iblock.c49
-rw-r--r--drivers/ufs/core/ufshcd.c10
113 files changed, 3201 insertions, 1906 deletions
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index bb4d30d377ae..67ce756f8dfc 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1024,7 +1024,6 @@ EXPORT_SYMBOL_GPL(ata_scsi_dma_need_drain);
int ata_scsi_dev_config(struct scsi_device *sdev, struct queue_limits *lim,
struct ata_device *dev)
{
- struct request_queue *q = sdev->request_queue;
int depth = 1;
if (!ata_id_has_unload(dev->id))
@@ -1038,7 +1037,7 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct queue_limits *lim,
sdev->sector_size = ATA_SECT_SIZE;
/* set DMA padding */
- blk_queue_update_dma_pad(q, ATA_DMA_PAD_SZ - 1);
+ lim->dma_pad_mask = ATA_DMA_PAD_SZ - 1;
/* make room for appending the drain */
lim->max_segments--;
diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c
index 3cb455a32d92..1b85e8bf4ef9 100644
--- a/drivers/ata/pata_macio.c
+++ b/drivers/ata/pata_macio.c
@@ -816,7 +816,7 @@ static int pata_macio_device_configure(struct scsi_device *sdev,
/* OHare has issues with non cache aligned DMA on some chipsets */
if (priv->kind == controller_ohare) {
lim->dma_alignment = 31;
- blk_queue_update_dma_pad(sdev->request_queue, 31);
+ lim->dma_pad_mask = 31;
/* Tell the world about it */
ata_dev_info(dev, "OHare alignment limits applied\n");
@@ -831,7 +831,7 @@ static int pata_macio_device_configure(struct scsi_device *sdev,
if (priv->kind == controller_sh_ata6 || priv->kind == controller_k2_ata6) {
/* Allright these are bad, apply restrictions */
lim->dma_alignment = 15;
- blk_queue_update_dma_pad(sdev->request_queue, 15);
+ lim->dma_pad_mask = 15;
/* We enable MWI and hack cache line size directly here, this
* is specific to this chipset and not normal values, we happen
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 5b9d4aaebb81..ed209f4f2798 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -354,6 +354,15 @@ config VIRTIO_BLK
This is the virtual block driver for virtio. It can be used with
QEMU based VMMs (like KVM or Xen). Say Y or M.
+config BLK_DEV_RUST_NULL
+ tristate "Rust null block driver (Experimental)"
+ depends on RUST
+ help
+ This is the Rust implementation of the null block driver. For now it
+ is only a minimal stub.
+
+ If unsure, say N.
+
config BLK_DEV_RBD
tristate "Rados block device (RBD)"
depends on INET && BLOCK
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 101612cba303..1105a2d4fdcb 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -9,6 +9,9 @@
# needed for trace events
ccflags-y += -I$(src)
+obj-$(CONFIG_BLK_DEV_RUST_NULL) += rnull_mod.o
+rnull_mod-y := rnull.o
+
obj-$(CONFIG_MAC_FLOPPY) += swim3.o
obj-$(CONFIG_BLK_DEV_SWIM) += swim_mod.o
obj-$(CONFIG_BLK_DEV_FD) += floppy.o
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index a25414228e47..49ced65bef4c 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -232,6 +232,7 @@ static DEFINE_MUTEX(amiflop_mutex);
static unsigned long int fd_def_df0 = FD_DD_3; /* default for df0 if it doesn't identify */
module_param(fd_def_df0, ulong, 0);
+MODULE_DESCRIPTION("Amiga floppy driver");
MODULE_LICENSE("GPL");
/*
@@ -1776,10 +1777,13 @@ static const struct blk_mq_ops amiflop_mq_ops = {
static int fd_alloc_disk(int drive, int system)
{
+ struct queue_limits lim = {
+ .features = BLK_FEAT_ROTATIONAL,
+ };
struct gendisk *disk;
int err;
- disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL, NULL);
+ disk = blk_mq_alloc_disk(&unit[drive].tag_set, &lim, NULL);
if (IS_ERR(disk))
return PTR_ERR(disk);
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index b6dac8cee70f..2028795ec61c 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -337,6 +337,7 @@ aoeblk_gdalloc(void *vp)
struct queue_limits lim = {
.max_hw_sectors = aoe_maxsectors,
.io_opt = SZ_2M,
+ .features = BLK_FEAT_ROTATIONAL,
};
ulong flags;
int late = 0;
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index cacc4ba942a8..4ba98c6654be 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1992,9 +1992,12 @@ static const struct blk_mq_ops ataflop_mq_ops = {
static int ataflop_alloc_disk(unsigned int drive, unsigned int type)
{
+ struct queue_limits lim = {
+ .features = BLK_FEAT_ROTATIONAL,
+ };
struct gendisk *disk;
- disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL, NULL);
+ disk = blk_mq_alloc_disk(&unit[drive].tag_set, &lim, NULL);
if (IS_ERR(disk))
return PTR_ERR(disk);
@@ -2197,4 +2200,5 @@ static void __exit atari_floppy_exit(void)
module_init(atari_floppy_init)
module_exit(atari_floppy_exit)
+MODULE_DESCRIPTION("Atari floppy driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 558d8e670566..2fd1ed101748 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -296,6 +296,7 @@ static int max_part = 1;
module_param(max_part, int, 0444);
MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
+MODULE_DESCRIPTION("Ram backed block device driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
MODULE_ALIAS("rd");
@@ -335,6 +336,8 @@ static int brd_alloc(int i)
.max_hw_discard_sectors = UINT_MAX,
.max_discard_segments = 1,
.discard_granularity = PAGE_SIZE,
+ .features = BLK_FEAT_SYNCHRONOUS |
+ BLK_FEAT_NOWAIT,
};
list_for_each_entry(brd, &brd_devices, brd_list)
@@ -366,10 +369,6 @@ static int brd_alloc(int i)
strscpy(disk->disk_name, buf, DISK_NAME_LEN);
set_capacity(disk, rd_size * 2);
- /* Tell the block layer that this is not a rotational device */
- blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
- blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, disk->queue);
- blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue);
err = add_disk(disk);
if (err)
goto out_cleanup_disk;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 113b441d4d36..f92673f05c7a 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2697,6 +2697,9 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
* connect.
*/
.max_hw_sectors = DRBD_MAX_BIO_SIZE_SAFE >> 8,
+ .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA |
+ BLK_FEAT_ROTATIONAL |
+ BLK_FEAT_STABLE_WRITES,
};
device = minor_to_device(minor);
@@ -2735,9 +2738,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
sprintf(disk->disk_name, "drbd%d", minor);
disk->private_data = device;
- blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue);
- blk_queue_write_cache(disk->queue, true, true);
-
device->md_io.page = alloc_page(GFP_KERNEL);
if (!device->md_io.page)
goto out_no_io_page;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 25c9d85667f1..3affb538b989 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4516,7 +4516,8 @@ static bool floppy_available(int drive)
static int floppy_alloc_disk(unsigned int drive, unsigned int type)
{
struct queue_limits lim = {
- .max_hw_sectors = 64,
+ .max_hw_sectors = 64,
+ .features = BLK_FEAT_ROTATIONAL,
};
struct gendisk *disk;
@@ -5016,6 +5017,7 @@ module_param(floppy, charp, 0);
module_param(FLOPPY_IRQ, int, 0);
module_param(FLOPPY_DMA, int, 0);
MODULE_AUTHOR("Alain L. Knaff");
+MODULE_DESCRIPTION("Normal floppy disk support");
MODULE_LICENSE("GPL");
/* This doesn't actually get used other than for module information */
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 1153721bc7c2..78a7bb28defe 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -211,13 +211,10 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
if (lo->lo_state == Lo_bound)
blk_mq_freeze_queue(lo->lo_queue);
lo->use_dio = use_dio;
- if (use_dio) {
- blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, lo->lo_queue);
+ if (use_dio)
lo->lo_flags |= LO_FLAGS_DIRECT_IO;
- } else {
- blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue);
+ else
lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
- }
if (lo->lo_state == Lo_bound)
blk_mq_unfreeze_queue(lo->lo_queue);
}
@@ -939,24 +936,6 @@ static void loop_free_idle_workers_timer(struct timer_list *timer)
return loop_free_idle_workers(lo, false);
}
-static void loop_update_rotational(struct loop_device *lo)
-{
- struct file *file = lo->lo_backing_file;
- struct inode *file_inode = file->f_mapping->host;
- struct block_device *file_bdev = file_inode->i_sb->s_bdev;
- struct request_queue *q = lo->lo_queue;
- bool nonrot = true;
-
- /* not all filesystems (e.g. tmpfs) have a sb->s_bdev */
- if (file_bdev)
- nonrot = bdev_nonrot(file_bdev);
-
- if (nonrot)
- blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
- else
- blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
-}
-
/**
* loop_set_status_from_info - configure device from loop_info
* @lo: struct loop_device to configure
@@ -998,17 +977,40 @@ loop_set_status_from_info(struct loop_device *lo,
return 0;
}
-static int loop_reconfigure_limits(struct loop_device *lo, unsigned short bsize,
- bool update_discard_settings)
+static unsigned short loop_default_blocksize(struct loop_device *lo,
+ struct block_device *backing_bdev)
{
+ /* In case of direct I/O, match underlying block size */
+ if ((lo->lo_backing_file->f_flags & O_DIRECT) && backing_bdev)
+ return bdev_logical_block_size(backing_bdev);
+ return SECTOR_SIZE;
+}
+
+static int loop_reconfigure_limits(struct loop_device *lo, unsigned short bsize)
+{
+ struct file *file = lo->lo_backing_file;
+ struct inode *inode = file->f_mapping->host;
+ struct block_device *backing_bdev = NULL;
struct queue_limits lim;
+ if (S_ISBLK(inode->i_mode))
+ backing_bdev = I_BDEV(inode);
+ else if (inode->i_sb->s_bdev)
+ backing_bdev = inode->i_sb->s_bdev;
+
+ if (!bsize)
+ bsize = loop_default_blocksize(lo, backing_bdev);
+
lim = queue_limits_start_update(lo->lo_queue);
lim.logical_block_size = bsize;
lim.physical_block_size = bsize;
lim.io_min = bsize;
- if (update_discard_settings)
- loop_config_discard(lo, &lim);
+ lim.features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_ROTATIONAL);
+ if (file->f_op->fsync && !(lo->lo_flags & LO_FLAGS_READ_ONLY))
+ lim.features |= BLK_FEAT_WRITE_CACHE;
+ if (backing_bdev && !bdev_nonrot(backing_bdev))
+ lim.features |= BLK_FEAT_ROTATIONAL;
+ loop_config_discard(lo, &lim);
return queue_limits_commit_update(lo->lo_queue, &lim);
}
@@ -1017,12 +1019,10 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
const struct loop_config *config)
{
struct file *file = fget(config->fd);
- struct inode *inode;
struct address_space *mapping;
int error;
loff_t size;
bool partscan;
- unsigned short bsize;
bool is_loop;
if (!file)
@@ -1055,19 +1055,12 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
goto out_unlock;
mapping = file->f_mapping;
- inode = mapping->host;
if ((config->info.lo_flags & ~LOOP_CONFIGURE_SETTABLE_FLAGS) != 0) {
error = -EINVAL;
goto out_unlock;
}
- if (config->block_size) {
- error = blk_validate_block_size(config->block_size);
- if (error)
- goto out_unlock;
- }
-
error = loop_set_status_from_info(lo, &config->info);
if (error)
goto out_unlock;
@@ -1098,22 +1091,10 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
lo->old_gfp_mask = mapping_gfp_mask(mapping);
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
- if (!(lo->lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
- blk_queue_write_cache(lo->lo_queue, true, false);
-
- if (config->block_size)
- bsize = config->block_size;
- else if ((lo->lo_backing_file->f_flags & O_DIRECT) && inode->i_sb->s_bdev)
- /* In case of direct I/O, match underlying block size */
- bsize = bdev_logical_block_size(inode->i_sb->s_bdev);
- else
- bsize = 512;
-
- error = loop_reconfigure_limits(lo, bsize, true);
- if (WARN_ON_ONCE(error))
+ error = loop_reconfigure_limits(lo, config->block_size);
+ if (error)
goto out_unlock;
- loop_update_rotational(lo);
loop_update_dio(lo);
loop_sysfs_init(lo);
@@ -1154,22 +1135,12 @@ out_putf:
return error;
}
-static void __loop_clr_fd(struct loop_device *lo, bool release)
+static void __loop_clr_fd(struct loop_device *lo)
{
+ struct queue_limits lim;
struct file *filp;
gfp_t gfp = lo->old_gfp_mask;
- if (test_bit(QUEUE_FLAG_WC, &lo->lo_queue->queue_flags))
- blk_queue_write_cache(lo->lo_queue, false, false);
-
- /*
- * Freeze the request queue when unbinding on a live file descriptor and
- * thus an open device. When called from ->release we are guaranteed
- * that there is no I/O in progress already.
- */
- if (!release)
- blk_mq_freeze_queue(lo->lo_queue);
-
spin_lock_irq(&lo->lo_lock);
filp = lo->lo_backing_file;
lo->lo_backing_file = NULL;
@@ -1179,7 +1150,14 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
lo->lo_offset = 0;
lo->lo_sizelimit = 0;
memset(lo->lo_file_name, 0, LO_NAME_SIZE);
- loop_reconfigure_limits(lo, 512, false);
+
+ /* reset the block size to the default */
+ lim = queue_limits_start_update(lo->lo_queue);
+ lim.logical_block_size = SECTOR_SIZE;
+ lim.physical_block_size = SECTOR_SIZE;
+ lim.io_min = SECTOR_SIZE;
+ queue_limits_commit_update(lo->lo_queue, &lim);
+
invalidate_disk(lo->lo_disk);
loop_sysfs_exit(lo);
/* let user-space know about this change */
@@ -1187,8 +1165,6 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
mapping_set_gfp_mask(filp->f_mapping, gfp);
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
- if (!release)
- blk_mq_unfreeze_queue(lo->lo_queue);
disk_force_media_change(lo->lo_disk);
@@ -1203,11 +1179,7 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
* must be at least one and it can only become zero when the
* current holder is released.
*/
- if (!release)
- mutex_lock(&lo->lo_disk->open_mutex);
err = bdev_disk_changed(lo->lo_disk, false);
- if (!release)
- mutex_unlock(&lo->lo_disk->open_mutex);
if (err)
pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
__func__, lo->lo_number, err);
@@ -1256,24 +1228,16 @@ static int loop_clr_fd(struct loop_device *lo)
return -ENXIO;
}
/*
- * If we've explicitly asked to tear down the loop device,
- * and it has an elevated reference count, set it for auto-teardown when
- * the last reference goes away. This stops $!~#$@ udev from
- * preventing teardown because it decided that it needs to run blkid on
- * the loopback device whenever they appear. xfstests is notorious for
- * failing tests because blkid via udev races with a losetup
- * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
- * command to fail with EBUSY.
+ * Mark the device for removing the backing device on last close.
+ * If we are the only opener, also switch the state to roundown here to
+ * prevent new openers from coming in.
*/
- if (disk_openers(lo->lo_disk) > 1) {
- lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
- loop_global_unlock(lo, true);
- return 0;
- }
- lo->lo_state = Lo_rundown;
+
+ lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
+ if (disk_openers(lo->lo_disk) == 1)
+ lo->lo_state = Lo_rundown;
loop_global_unlock(lo, true);
- __loop_clr_fd(lo, false);
return 0;
}
@@ -1500,10 +1464,6 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
if (lo->lo_state != Lo_bound)
return -ENXIO;
- err = blk_validate_block_size(arg);
- if (err)
- return err;
-
if (lo->lo_queue->limits.logical_block_size == arg)
return 0;
@@ -1511,7 +1471,7 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
invalidate_bdev(lo->lo_device);
blk_mq_freeze_queue(lo->lo_queue);
- err = loop_reconfigure_limits(lo, arg, false);
+ err = loop_reconfigure_limits(lo, arg);
loop_update_dio(lo);
blk_mq_unfreeze_queue(lo->lo_queue);
@@ -1740,25 +1700,43 @@ static int lo_compat_ioctl(struct block_device *bdev, blk_mode_t mode,
}
#endif
+static int lo_open(struct gendisk *disk, blk_mode_t mode)
+{
+ struct loop_device *lo = disk->private_data;
+ int err;
+
+ err = mutex_lock_killable(&lo->lo_mutex);
+ if (err)
+ return err;
+
+ if (lo->lo_state == Lo_deleting || lo->lo_state == Lo_rundown)
+ err = -ENXIO;
+ mutex_unlock(&lo->lo_mutex);
+ return err;
+}
+
static void lo_release(struct gendisk *disk)
{
struct loop_device *lo = disk->private_data;
+ bool need_clear = false;
if (disk_openers(disk) > 0)
return;
+ /*
+ * Clear the backing device information if this is the last close of
+ * a device that's been marked for auto clear, or on which LOOP_CLR_FD
+ * has been called.
+ */
mutex_lock(&lo->lo_mutex);
- if (lo->lo_state == Lo_bound && (lo->lo_flags & LO_FLAGS_AUTOCLEAR)) {
+ if (lo->lo_state == Lo_bound && (lo->lo_flags & LO_FLAGS_AUTOCLEAR))
lo->lo_state = Lo_rundown;
- mutex_unlock(&lo->lo_mutex);
- /*
- * In autoclear mode, stop the loop thread
- * and remove configuration after last close.
- */
- __loop_clr_fd(lo, true);
- return;
- }
+
+ need_clear = (lo->lo_state == Lo_rundown);
mutex_unlock(&lo->lo_mutex);
+
+ if (need_clear)
+ __loop_clr_fd(lo);
}
static void lo_free_disk(struct gendisk *disk)
@@ -1775,6 +1753,7 @@ static void lo_free_disk(struct gendisk *disk)
static const struct block_device_operations lo_fops = {
.owner = THIS_MODULE,
+ .open = lo_open,
.release = lo_release,
.ioctl = lo_ioctl,
#ifdef CONFIG_COMPAT
@@ -1853,6 +1832,7 @@ static const struct kernel_param_ops loop_hw_qdepth_param_ops = {
device_param_cb(hw_queue_depth, &loop_hw_qdepth_param_ops, &hw_queue_depth, 0444);
MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: " __stringify(LOOP_DEFAULT_HW_Q_DEPTH));
+MODULE_DESCRIPTION("Loopback device support");
MODULE_LICENSE("GPL");
MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
@@ -2060,14 +2040,6 @@ static int loop_add(int i)
lo->lo_queue = lo->lo_disk->queue;
/*
- * By default, we do buffer IO, so it doesn't make sense to enable
- * merge because the I/O submitted to backing file is handled page by
- * page. For directio mode, merge does help to dispatch bigger request
- * to underlayer disk. We will enable merge once directio is enabled.
- */
- blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue);
-
- /*
* Disable partition scanning by default. The in-kernel partition
* scanning can be requested individually per-device during its
* setup. Userspace can always add and remove partitions from all
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 43a187609ef7..c6ef0546ffc9 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3485,8 +3485,6 @@ skip_create_disk:
goto start_service_thread;
/* Set device limits. */
- blk_queue_flag_set(QUEUE_FLAG_NONROT, dd->queue);
- blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, dd->queue);
dma_set_max_seg_size(&dd->pdev->dev, 0x400000);
/* Set the capacity of the device in 512 byte sectors. */
diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c
index 27b2187e7a6d..b9fdeff31caf 100644
--- a/drivers/block/n64cart.c
+++ b/drivers/block/n64cart.c
@@ -150,8 +150,6 @@ static int __init n64cart_probe(struct platform_device *pdev)
set_capacity(disk, size >> SECTOR_SHIFT);
set_disk_ro(disk, 1);
- blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
-
err = add_disk(disk);
if (err)
goto out_cleanup_disk;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index b87aa80a46dd..41a90150b501 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -342,6 +342,14 @@ static int __nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
lim.max_hw_discard_sectors = UINT_MAX;
else
lim.max_hw_discard_sectors = 0;
+ if (!(nbd->config->flags & NBD_FLAG_SEND_FLUSH)) {
+ lim.features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA);
+ } else if (nbd->config->flags & NBD_FLAG_SEND_FUA) {
+ lim.features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA;
+ } else {
+ lim.features |= BLK_FEAT_WRITE_CACHE;
+ lim.features &= ~BLK_FEAT_FUA;
+ }
lim.logical_block_size = blksize;
lim.physical_block_size = blksize;
error = queue_limits_commit_update(nbd->disk->queue, &lim);
@@ -1279,19 +1287,10 @@ static void nbd_bdev_reset(struct nbd_device *nbd)
static void nbd_parse_flags(struct nbd_device *nbd)
{
- struct nbd_config *config = nbd->config;
- if (config->flags & NBD_FLAG_READ_ONLY)
+ if (nbd->config->flags & NBD_FLAG_READ_ONLY)
set_disk_ro(nbd->disk, true);
else
set_disk_ro(nbd->disk, false);
- if (config->flags & NBD_FLAG_SEND_FLUSH) {
- if (config->flags & NBD_FLAG_SEND_FUA)
- blk_queue_write_cache(nbd->disk->queue, true, true);
- else
- blk_queue_write_cache(nbd->disk->queue, true, false);
- }
- else
- blk_queue_write_cache(nbd->disk->queue, false, false);
}
static void send_disconnects(struct nbd_device *nbd)
@@ -1801,7 +1800,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
{
struct queue_limits lim = {
.max_hw_sectors = 65536,
- .max_user_sectors = 256,
+ .io_opt = 256 << SECTOR_SHIFT,
.max_segments = USHRT_MAX,
.max_segment_size = UINT_MAX,
};
@@ -1861,11 +1860,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
goto out_err_disk;
}
- /*
- * Tell the block layer that we are not a rotational device
- */
- blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
-
mutex_init(&nbd->config_lock);
refcount_set(&nbd->config_refs, 0);
/*
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 75f189e42f88..2f0431e42c49 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -77,7 +77,7 @@ enum {
NULL_IRQ_TIMER = 2,
};
-static bool g_virt_boundary = false;
+static bool g_virt_boundary;
module_param_named(virt_boundary, g_virt_boundary, bool, 0444);
MODULE_PARM_DESC(virt_boundary, "Require a virtual boundary for the device. Default: False");
@@ -227,7 +227,7 @@ MODULE_PARM_DESC(mbps, "Cache size in MiB for memory-backed device. Default: 0 (
static bool g_fua = true;
module_param_named(fua, g_fua, bool, 0444);
-MODULE_PARM_DESC(zoned, "Enable/disable FUA support when cache_size is used. Default: true");
+MODULE_PARM_DESC(fua, "Enable/disable FUA support when cache_size is used. Default: true");
static unsigned int g_mbps;
module_param_named(mbps, g_mbps, uint, 0444);
@@ -262,6 +262,10 @@ module_param_named(zone_append_max_sectors, g_zone_append_max_sectors, int, 0444
MODULE_PARM_DESC(zone_append_max_sectors,
"Maximum size of a zone append command (in 512B sectors). Specify 0 for zone append emulation");
+static bool g_zone_full;
+module_param_named(zone_full, g_zone_full, bool, S_IRUGO);
+MODULE_PARM_DESC(zone_full, "Initialize the sequential write required zones of a zoned device to be full. Default: false");
+
static struct nullb_device *null_alloc_dev(void);
static void null_free_dev(struct nullb_device *dev);
static void null_del_dev(struct nullb *nullb);
@@ -458,6 +462,7 @@ NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
NULLB_DEVICE_ATTR(zone_append_max_sectors, uint, NULL);
+NULLB_DEVICE_ATTR(zone_full, bool, NULL);
NULLB_DEVICE_ATTR(virt_boundary, bool, NULL);
NULLB_DEVICE_ATTR(no_sched, bool, NULL);
NULLB_DEVICE_ATTR(shared_tags, bool, NULL);
@@ -610,6 +615,7 @@ static struct configfs_attribute *nullb_device_attrs[] = {
&nullb_device_attr_zone_append_max_sectors,
&nullb_device_attr_zone_readonly,
&nullb_device_attr_zone_offline,
+ &nullb_device_attr_zone_full,
&nullb_device_attr_virt_boundary,
&nullb_device_attr_no_sched,
&nullb_device_attr_shared_tags,
@@ -700,7 +706,7 @@ static ssize_t memb_group_features_show(struct config_item *item, char *page)
"shared_tags,size,submit_queues,use_per_node_hctx,"
"virt_boundary,zoned,zone_capacity,zone_max_active,"
"zone_max_open,zone_nr_conv,zone_offline,zone_readonly,"
- "zone_size,zone_append_max_sectors\n");
+ "zone_size,zone_append_max_sectors,zone_full\n");
}
CONFIGFS_ATTR_RO(memb_group_, features);
@@ -781,6 +787,7 @@ static struct nullb_device *null_alloc_dev(void)
dev->zone_max_open = g_zone_max_open;
dev->zone_max_active = g_zone_max_active;
dev->zone_append_max_sectors = g_zone_append_max_sectors;
+ dev->zone_full = g_zone_full;
dev->virt_boundary = g_virt_boundary;
dev->no_sched = g_no_sched;
dev->shared_tags = g_shared_tags;
@@ -1824,9 +1831,6 @@ static int null_validate_conf(struct nullb_device *dev)
dev->queue_mode = NULL_Q_MQ;
}
- if (blk_validate_block_size(dev->blocksize))
- return -EINVAL;
-
if (dev->use_per_node_hctx) {
if (dev->submit_queues != nr_online_nodes)
dev->submit_queues = nr_online_nodes;
@@ -1928,6 +1932,13 @@ static int null_add_dev(struct nullb_device *dev)
goto out_cleanup_tags;
}
+ if (dev->cache_size > 0) {
+ set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
+ lim.features |= BLK_FEAT_WRITE_CACHE;
+ if (dev->fua)
+ lim.features |= BLK_FEAT_FUA;
+ }
+
nullb->disk = blk_mq_alloc_disk(nullb->tag_set, &lim, nullb);
if (IS_ERR(nullb->disk)) {
rv = PTR_ERR(nullb->disk);
@@ -1940,13 +1951,7 @@ static int null_add_dev(struct nullb_device *dev)
nullb_setup_bwtimer(nullb);
}
- if (dev->cache_size > 0) {
- set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
- blk_queue_write_cache(nullb->q, true, dev->fua);
- }
-
nullb->q->queuedata = nullb;
- blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
rv = ida_alloc(&nullb_indexes, GFP_KERNEL);
if (rv < 0)
diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
index 3234e6c85eed..a7bb32f73ec3 100644
--- a/drivers/block/null_blk/null_blk.h
+++ b/drivers/block/null_blk/null_blk.h
@@ -101,6 +101,7 @@ struct nullb_device {
bool memory_backed; /* if data is stored in memory */
bool discard; /* if support discard */
bool zoned; /* if device is zoned */
+ bool zone_full; /* Initialize zones to be full */
bool virt_boundary; /* virtual boundary on/off for the device */
bool no_sched; /* no IO scheduler for the device */
bool shared_tags; /* share tag set between devices for blk-mq */
diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
index f118d304f310..9bc768b2ca56 100644
--- a/drivers/block/null_blk/zoned.c
+++ b/drivers/block/null_blk/zoned.c
@@ -145,7 +145,7 @@ int null_init_zoned_dev(struct nullb_device *dev,
zone = &dev->zones[i];
null_init_zone_lock(dev, zone);
- zone->start = zone->wp = sector;
+ zone->start = sector;
if (zone->start + dev->zone_size_sects > dev_capacity_sects)
zone->len = dev_capacity_sects - zone->start;
else
@@ -153,12 +153,18 @@ int null_init_zoned_dev(struct nullb_device *dev,
zone->capacity =
min_t(sector_t, zone->len, zone_capacity_sects);
zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ;
- zone->cond = BLK_ZONE_COND_EMPTY;
+ if (dev->zone_full) {
+ zone->cond = BLK_ZONE_COND_FULL;
+ zone->wp = zone->start + zone->capacity;
+ } else{
+ zone->cond = BLK_ZONE_COND_EMPTY;
+ zone->wp = zone->start;
+ }
sector += dev->zone_size_sects;
}
- lim->zoned = true;
+ lim->features |= BLK_FEAT_ZONED;
lim->chunk_sectors = dev->zone_size_sects;
lim->max_zone_append_sectors = dev->zone_append_max_sectors;
lim->max_open_zones = dev->zone_max_open;
@@ -171,9 +177,6 @@ int null_register_zoned_dev(struct nullb *nullb)
struct request_queue *q = nullb->q;
struct gendisk *disk = nullb->disk;
- blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
- disk->nr_zones = bdev_nr_zones(disk->part0);
-
pr_info("%s: using %s zone append\n",
disk->disk_name,
queue_emulates_zone_append(q) ? "emulated" : "native");
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 8a2ce8070010..7cece5884b9c 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2622,6 +2622,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
struct queue_limits lim = {
.max_hw_sectors = PACKET_MAX_SECTORS,
.logical_block_size = CD_FRAMESIZE,
+ .features = BLK_FEAT_ROTATIONAL,
};
int idx;
int ret = -ENOMEM;
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index b810ac0a5c4b..ff45ed766469 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -388,9 +388,9 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
.max_segments = -1,
.max_segment_size = dev->bounce_size,
.dma_alignment = dev->blk_size - 1,
+ .features = BLK_FEAT_WRITE_CACHE |
+ BLK_FEAT_ROTATIONAL,
};
-
- struct request_queue *queue;
struct gendisk *gendisk;
if (dev->blk_size < 512) {
@@ -447,10 +447,6 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
goto fail_free_tag_set;
}
- queue = gendisk->queue;
-
- blk_queue_write_cache(queue, true, false);
-
priv->gendisk = gendisk;
gendisk->major = ps3disk_major;
gendisk->first_minor = devidx * PS3DISK_MINORS;
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 26ff5cd2bf0a..008e850555f4 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4949,14 +4949,12 @@ static const struct blk_mq_ops rbd_mq_ops = {
static int rbd_init_disk(struct rbd_device *rbd_dev)
{
struct gendisk *disk;
- struct request_queue *q;
unsigned int objset_bytes =
rbd_dev->layout.object_size * rbd_dev->layout.stripe_count;
struct queue_limits lim = {
.max_hw_sectors = objset_bytes >> SECTOR_SHIFT,
- .max_user_sectors = objset_bytes >> SECTOR_SHIFT,
+ .io_opt = objset_bytes,
.io_min = rbd_dev->opts->alloc_size,
- .io_opt = rbd_dev->opts->alloc_size,
.max_segments = USHRT_MAX,
.max_segment_size = UINT_MAX,
};
@@ -4980,12 +4978,14 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
lim.max_write_zeroes_sectors = objset_bytes >> SECTOR_SHIFT;
}
+ if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
+ lim.features |= BLK_FEAT_STABLE_WRITES;
+
disk = blk_mq_alloc_disk(&rbd_dev->tag_set, &lim, rbd_dev);
if (IS_ERR(disk)) {
err = PTR_ERR(disk);
goto out_tag_set;
}
- q = disk->queue;
snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
rbd_dev->dev_id);
@@ -4997,13 +4997,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
disk->minors = RBD_MINORS_PER_MAJOR;
disk->fops = &rbd_bd_ops;
disk->private_data = rbd_dev;
-
- blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
- /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */
-
- if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
- blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
-
rbd_dev->disk = disk;
return 0;
diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c
index 39887556cf95..6ea7c12e3a87 100644
--- a/drivers/block/rnbd/rnbd-clt-sysfs.c
+++ b/drivers/block/rnbd/rnbd-clt-sysfs.c
@@ -475,7 +475,7 @@ void rnbd_clt_remove_dev_symlink(struct rnbd_clt_dev *dev)
}
}
-static struct kobj_type rnbd_dev_ktype = {
+static const struct kobj_type rnbd_dev_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.default_groups = rnbd_dev_groups,
};
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index b7ffe03c6160..c34695d2eea7 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -1352,10 +1352,6 @@ static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev,
if (dev->access_mode == RNBD_ACCESS_RO)
set_disk_ro(dev->gd, true);
- /*
- * Network device does not need rotational
- */
- blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue);
err = add_disk(dev->gd);
if (err)
put_disk(dev->gd);
@@ -1389,18 +1385,18 @@ static int rnbd_client_setup_device(struct rnbd_clt_dev *dev,
le32_to_cpu(rsp->max_discard_sectors);
}
+ if (rsp->cache_policy & RNBD_WRITEBACK) {
+ lim.features |= BLK_FEAT_WRITE_CACHE;
+ if (rsp->cache_policy & RNBD_FUA)
+ lim.features |= BLK_FEAT_FUA;
+ }
+
dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, &lim, dev);
if (IS_ERR(dev->gd))
return PTR_ERR(dev->gd);
dev->queue = dev->gd->queue;
rnbd_init_mq_hw_queues(dev);
- blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue);
- blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue);
- blk_queue_write_cache(dev->queue,
- !!(rsp->cache_policy & RNBD_WRITEBACK),
- !!(rsp->cache_policy & RNBD_FUA));
-
return rnbd_clt_setup_gen_disk(dev, rsp, idx);
}
diff --git a/drivers/block/rnbd/rnbd-srv-sysfs.c b/drivers/block/rnbd/rnbd-srv-sysfs.c
index cba6ba43c2c2..64780094442c 100644
--- a/drivers/block/rnbd/rnbd-srv-sysfs.c
+++ b/drivers/block/rnbd/rnbd-srv-sysfs.c
@@ -33,7 +33,7 @@ static void rnbd_srv_dev_release(struct kobject *kobj)
kfree(dev);
}
-static struct kobj_type dev_ktype = {
+static const struct kobj_type dev_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.release = rnbd_srv_dev_release
};
@@ -184,7 +184,7 @@ static void rnbd_srv_sess_dev_release(struct kobject *kobj)
rnbd_destroy_sess_dev(sess_dev, sess_dev->keep_id);
}
-static struct kobj_type rnbd_srv_sess_dev_ktype = {
+static const struct kobj_type rnbd_srv_sess_dev_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.release = rnbd_srv_sess_dev_release,
};
diff --git a/drivers/block/rnull.rs b/drivers/block/rnull.rs
new file mode 100644
index 000000000000..b0227cf9ddd3
--- /dev/null
+++ b/drivers/block/rnull.rs
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! This is a Rust implementation of the C null block driver.
+//!
+//! Supported features:
+//!
+//! - blk-mq interface
+//! - direct completion
+//! - block size 4k
+//!
+//! The driver is not configurable.
+
+use kernel::{
+ alloc::flags,
+ block::mq::{
+ self,
+ gen_disk::{self, GenDisk},
+ Operations, TagSet,
+ },
+ error::Result,
+ new_mutex, pr_info,
+ prelude::*,
+ sync::{Arc, Mutex},
+ types::ARef,
+};
+
+module! {
+ type: NullBlkModule,
+ name: "rnull_mod",
+ author: "Andreas Hindborg",
+ license: "GPL v2",
+}
+
+struct NullBlkModule {
+ _disk: Pin<Box<Mutex<GenDisk<NullBlkDevice>>>>,
+}
+
+impl kernel::Module for NullBlkModule {
+ fn init(_module: &'static ThisModule) -> Result<Self> {
+ pr_info!("Rust null_blk loaded\n");
+ let tagset = Arc::pin_init(TagSet::new(1, 256, 1), flags::GFP_KERNEL)?;
+
+ let disk = gen_disk::GenDiskBuilder::new()
+ .capacity_sectors(4096 << 11)
+ .logical_block_size(4096)?
+ .physical_block_size(4096)?
+ .rotational(false)
+ .build(format_args!("rnullb{}", 0), tagset)?;
+
+ let disk = Box::pin_init(new_mutex!(disk, "nullb:disk"), flags::GFP_KERNEL)?;
+
+ Ok(Self { _disk: disk })
+ }
+}
+
+struct NullBlkDevice;
+
+#[vtable]
+impl Operations for NullBlkDevice {
+ #[inline(always)]
+ fn queue_rq(rq: ARef<mq::Request<Self>>, _is_last: bool) -> Result {
+ mq::Request::end_ok(rq)
+ .map_err(|_e| kernel::error::code::EIO)
+ // We take no refcounts on the request, so we expect to be able to
+ // end the request. The request reference must be unique at this
+ // point, and so `end_ok` cannot fail.
+ .expect("Fatal error - expected to be able to end request");
+
+ Ok(())
+ }
+
+ fn commit_rqs() {}
+}
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 5286cb8e0824..2d38331ee667 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -791,6 +791,7 @@ static int probe_disk(struct vdc_port *port)
.seg_boundary_mask = PAGE_SIZE - 1,
.max_segment_size = PAGE_SIZE,
.max_segments = port->ring_cookies,
+ .features = BLK_FEAT_ROTATIONAL,
};
struct request_queue *q;
struct gendisk *g;
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 6731678f3a41..126f151c4f2c 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -787,6 +787,9 @@ static void swim_cleanup_floppy_disk(struct floppy_state *fs)
static int swim_floppy_init(struct swim_priv *swd)
{
+ struct queue_limits lim = {
+ .features = BLK_FEAT_ROTATIONAL,
+ };
int err;
int drive;
struct swim __iomem *base = swd->base;
@@ -820,7 +823,7 @@ static int swim_floppy_init(struct swim_priv *swd)
goto exit_put_disks;
swd->unit[drive].disk =
- blk_mq_alloc_disk(&swd->unit[drive].tag_set, NULL,
+ blk_mq_alloc_disk(&swd->unit[drive].tag_set, &lim,
&swd->unit[drive]);
if (IS_ERR(swd->unit[drive].disk)) {
blk_mq_free_tag_set(&swd->unit[drive].tag_set);
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index a04756ac778e..90be1017f7bf 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -1189,6 +1189,9 @@ static int swim3_add_device(struct macio_dev *mdev, int index)
static int swim3_attach(struct macio_dev *mdev,
const struct of_device_id *match)
{
+ struct queue_limits lim = {
+ .features = BLK_FEAT_ROTATIONAL,
+ };
struct floppy_state *fs;
struct gendisk *disk;
int rc;
@@ -1210,7 +1213,7 @@ static int swim3_attach(struct macio_dev *mdev,
if (rc)
goto out_unregister;
- disk = blk_mq_alloc_disk(&fs->tag_set, NULL, fs);
+ disk = blk_mq_alloc_disk(&fs->tag_set, &lim, fs);
if (IS_ERR(disk)) {
rc = PTR_ERR(disk);
goto out_free_tag_set;
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 4e159948c912..6fb799ec0d88 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -248,8 +248,6 @@ static int ublk_dev_param_zoned_validate(const struct ublk_device *ub)
static void ublk_dev_param_zoned_apply(struct ublk_device *ub)
{
- blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ub->ub_disk->queue);
-
ub->ub_disk->nr_zones = ublk_get_nr_zones(ub);
}
@@ -484,16 +482,8 @@ static inline unsigned ublk_pos_to_tag(loff_t pos)
static void ublk_dev_param_basic_apply(struct ublk_device *ub)
{
- struct request_queue *q = ub->ub_disk->queue;
const struct ublk_param_basic *p = &ub->params.basic;
- blk_queue_write_cache(q, p->attrs & UBLK_ATTR_VOLATILE_CACHE,
- p->attrs & UBLK_ATTR_FUA);
- if (p->attrs & UBLK_ATTR_ROTATIONAL)
- blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
- else
- blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
-
if (p->attrs & UBLK_ATTR_READ_ONLY)
set_disk_ro(ub->ub_disk, true);
@@ -2204,12 +2194,21 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED))
return -EOPNOTSUPP;
- lim.zoned = true;
+ lim.features |= BLK_FEAT_ZONED;
lim.max_active_zones = p->max_active_zones;
lim.max_open_zones = p->max_open_zones;
lim.max_zone_append_sectors = p->max_zone_append_sectors;
}
+ if (ub->params.basic.attrs & UBLK_ATTR_VOLATILE_CACHE) {
+ lim.features |= BLK_FEAT_WRITE_CACHE;
+ if (ub->params.basic.attrs & UBLK_ATTR_FUA)
+ lim.features |= BLK_FEAT_FUA;
+ }
+
+ if (ub->params.basic.attrs & UBLK_ATTR_ROTATIONAL)
+ lim.features |= BLK_FEAT_ROTATIONAL;
+
if (wait_for_completion_interruptible(&ub->completion) != 0)
return -EINTR;
@@ -3017,4 +3016,5 @@ module_param_cb(ublks_max, &ublk_max_ublks_ops, &ublks_max, 0644);
MODULE_PARM_DESC(ublks_max, "max number of ublk devices allowed to add(default: 64)");
MODULE_AUTHOR("Ming Lei <ming.lei@redhat.com>");
+MODULE_DESCRIPTION("Userspace block device");
MODULE_LICENSE("GPL");
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 2351f411fa46..e3147a611151 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -728,7 +728,7 @@ static int virtblk_read_zoned_limits(struct virtio_blk *vblk,
dev_dbg(&vdev->dev, "probing host-managed zoned device\n");
- lim->zoned = true;
+ lim->features |= BLK_FEAT_ZONED;
virtio_cread(vdev, struct virtio_blk_config,
zoned.max_open_zones, &v);
@@ -1089,14 +1089,6 @@ static int virtblk_get_cache_mode(struct virtio_device *vdev)
return writeback;
}
-static void virtblk_update_cache_mode(struct virtio_device *vdev)
-{
- u8 writeback = virtblk_get_cache_mode(vdev);
- struct virtio_blk *vblk = vdev->priv;
-
- blk_queue_write_cache(vblk->disk->queue, writeback, false);
-}
-
static const char *const virtblk_cache_types[] = {
"write through", "write back"
};
@@ -1108,6 +1100,7 @@ cache_type_store(struct device *dev, struct device_attribute *attr,
struct gendisk *disk = dev_to_disk(dev);
struct virtio_blk *vblk = disk->private_data;
struct virtio_device *vdev = vblk->vdev;
+ struct queue_limits lim;
int i;
BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
@@ -1116,7 +1109,17 @@ cache_type_store(struct device *dev, struct device_attribute *attr,
return i;
virtio_cwrite8(vdev, offsetof(struct virtio_blk_config, wce), i);
- virtblk_update_cache_mode(vdev);
+
+ lim = queue_limits_start_update(disk->queue);
+ if (virtblk_get_cache_mode(vdev))
+ lim.features |= BLK_FEAT_WRITE_CACHE;
+ else
+ lim.features &= ~BLK_FEAT_WRITE_CACHE;
+ blk_mq_freeze_queue(disk->queue);
+ i = queue_limits_commit_update(disk->queue, &lim);
+ blk_mq_unfreeze_queue(disk->queue);
+ if (i)
+ return i;
return count;
}
@@ -1247,7 +1250,7 @@ static int virtblk_read_limits(struct virtio_blk *vblk,
struct queue_limits *lim)
{
struct virtio_device *vdev = vblk->vdev;
- u32 v, blk_size, max_size, sg_elems, opt_io_size;
+ u32 v, max_size, sg_elems, opt_io_size;
u32 max_discard_segs = 0;
u32 discard_granularity = 0;
u16 min_io_size;
@@ -1286,46 +1289,36 @@ static int virtblk_read_limits(struct virtio_blk *vblk,
lim->max_segment_size = max_size;
/* Host can optionally specify the block size of the device */
- err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE,
+ virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE,
struct virtio_blk_config, blk_size,
- &blk_size);
- if (!err) {
- err = blk_validate_block_size(blk_size);
- if (err) {
- dev_err(&vdev->dev,
- "virtio_blk: invalid block size: 0x%x\n",
- blk_size);
- return err;
- }
-
- lim->logical_block_size = blk_size;
- } else
- blk_size = lim->logical_block_size;
+ &lim->logical_block_size);
/* Use topology information if available */
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
struct virtio_blk_config, physical_block_exp,
&physical_block_exp);
if (!err && physical_block_exp)
- lim->physical_block_size = blk_size * (1 << physical_block_exp);
+ lim->physical_block_size =
+ lim->logical_block_size * (1 << physical_block_exp);
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
struct virtio_blk_config, alignment_offset,
&alignment_offset);
if (!err && alignment_offset)
- lim->alignment_offset = blk_size * alignment_offset;
+ lim->alignment_offset =
+ lim->logical_block_size * alignment_offset;
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
struct virtio_blk_config, min_io_size,
&min_io_size);
if (!err && min_io_size)
- lim->io_min = blk_size * min_io_size;
+ lim->io_min = lim->logical_block_size * min_io_size;
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
struct virtio_blk_config, opt_io_size,
&opt_io_size);
if (!err && opt_io_size)
- lim->io_opt = blk_size * opt_io_size;
+ lim->io_opt = lim->logical_block_size * opt_io_size;
if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
virtio_cread(vdev, struct virtio_blk_config,
@@ -1419,7 +1412,7 @@ static int virtblk_read_limits(struct virtio_blk *vblk,
lim->discard_granularity =
discard_granularity << SECTOR_SHIFT;
else
- lim->discard_granularity = blk_size;
+ lim->discard_granularity = lim->logical_block_size;
}
if (virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED)) {
@@ -1448,7 +1441,10 @@ static int virtblk_read_limits(struct virtio_blk *vblk,
static int virtblk_probe(struct virtio_device *vdev)
{
struct virtio_blk *vblk;
- struct queue_limits lim = { };
+ struct queue_limits lim = {
+ .features = BLK_FEAT_ROTATIONAL,
+ .logical_block_size = SECTOR_SIZE,
+ };
int err, index;
unsigned int queue_depth;
@@ -1512,6 +1508,9 @@ static int virtblk_probe(struct virtio_device *vdev)
if (err)
goto out_free_tags;
+ if (virtblk_get_cache_mode(vdev))
+ lim.features |= BLK_FEAT_WRITE_CACHE;
+
vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, &lim, vblk);
if (IS_ERR(vblk->disk)) {
err = PTR_ERR(vblk->disk);
@@ -1527,9 +1526,6 @@ static int virtblk_probe(struct virtio_device *vdev)
vblk->disk->fops = &virtblk_fops;
vblk->index = index;
- /* configure queue flush support */
- virtblk_update_cache_mode(vdev);
-
/* If disk is read-only in the host, the guest should obey */
if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
set_disk_ro(vblk->disk, 1);
@@ -1541,8 +1537,8 @@ static int virtblk_probe(struct virtio_device *vdev)
* All steps that follow use the VQs therefore they need to be
* placed after the virtio_device_ready() call above.
*/
- if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && lim.zoned) {
- blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, vblk->disk->queue);
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
+ (lim.features & BLK_FEAT_ZONED)) {
err = blk_revalidate_disk_zones(vblk->disk);
if (err)
goto out_cleanup_disk;
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 944576d582fb..838064593f62 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -1563,5 +1563,6 @@ static void __exit xen_blkif_fini(void)
module_exit(xen_blkif_fini);
+MODULE_DESCRIPTION("Virtual block device back-end driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS("xen-backend:vbd");
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index fd7c0ff2139c..59ce113b882a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -788,6 +788,11 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
* A barrier request a superset of FUA, so we can
* implement it the same way. (It's also a FLUSH+FUA,
* since it is guaranteed ordered WRT previous writes.)
+ *
+ * Note that can end up here with a FUA write and the
+ * flags cleared. This happens when the flag was
+ * run-time disabled after a failing I/O, and we'll
+ * simplify submit it as a normal write.
*/
if (info->feature_flush && info->feature_fua)
ring_req->operation =
@@ -795,8 +800,6 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
else if (info->feature_flush)
ring_req->operation =
BLKIF_OP_FLUSH_DISKCACHE;
- else
- ring_req->operation = 0;
}
ring_req->u.rw.nr_segments = num_grant;
if (unlikely(require_extra_req)) {
@@ -887,16 +890,6 @@ static inline void flush_requests(struct blkfront_ring_info *rinfo)
notify_remote_via_irq(rinfo->irq);
}
-static inline bool blkif_request_flush_invalid(struct request *req,
- struct blkfront_info *info)
-{
- return (blk_rq_is_passthrough(req) ||
- ((req_op(req) == REQ_OP_FLUSH) &&
- !info->feature_flush) ||
- ((req->cmd_flags & REQ_FUA) &&
- !info->feature_fua));
-}
-
static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *qd)
{
@@ -908,12 +901,22 @@ static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
rinfo = get_rinfo(info, qid);
blk_mq_start_request(qd->rq);
spin_lock_irqsave(&rinfo->ring_lock, flags);
- if (RING_FULL(&rinfo->ring))
- goto out_busy;
- if (blkif_request_flush_invalid(qd->rq, rinfo->dev_info))
- goto out_err;
+ /*
+ * Check if the backend actually supports flushes.
+ *
+ * While the block layer won't send us flushes if we don't claim to
+ * support them, the Xen protocol allows the backend to revoke support
+ * at any time. That is of course a really bad idea and dangerous, but
+ * has been allowed for 10+ years. In that case we simply clear the
+ * flags, and directly return here for an empty flush and ignore the
+ * FUA flag later on.
+ */
+ if (unlikely(req_op(qd->rq) == REQ_OP_FLUSH && !info->feature_flush))
+ goto complete;
+ if (RING_FULL(&rinfo->ring))
+ goto out_busy;
if (blkif_queue_request(qd->rq, rinfo))
goto out_busy;
@@ -921,14 +924,14 @@ static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
return BLK_STS_OK;
-out_err:
- spin_unlock_irqrestore(&rinfo->ring_lock, flags);
- return BLK_STS_IOERR;
-
out_busy:
blk_mq_stop_hw_queue(hctx);
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
return BLK_STS_DEV_RESOURCE;
+complete:
+ spin_unlock_irqrestore(&rinfo->ring_lock, flags);
+ blk_mq_end_request(qd->rq, BLK_STS_OK);
+ return BLK_STS_OK;
}
static void blkif_complete_rq(struct request *rq)
@@ -956,6 +959,12 @@ static void blkif_set_queue_limits(const struct blkfront_info *info,
lim->max_secure_erase_sectors = UINT_MAX;
}
+ if (info->feature_flush) {
+ lim->features |= BLK_FEAT_WRITE_CACHE;
+ if (info->feature_fua)
+ lim->features |= BLK_FEAT_FUA;
+ }
+
/* Hard sector size and max sectors impersonate the equiv. hardware. */
lim->logical_block_size = info->sector_size;
lim->physical_block_size = info->physical_sector_size;
@@ -984,8 +993,6 @@ static const char *flush_info(struct blkfront_info *info)
static void xlvbd_flush(struct blkfront_info *info)
{
- blk_queue_write_cache(info->rq, info->feature_flush ? true : false,
- info->feature_fua ? true : false);
pr_info("blkfront: %s: %s %s %s %s %s %s %s\n",
info->gd->disk_name, flush_info(info),
"persistent grants:", info->feature_persistent ?
@@ -1063,8 +1070,7 @@ static char *encode_disk_name(char *ptr, unsigned int n)
}
static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
- struct blkfront_info *info, u16 sector_size,
- unsigned int physical_sector_size)
+ struct blkfront_info *info)
{
struct queue_limits lim = {};
struct gendisk *gd;
@@ -1139,7 +1145,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
err = PTR_ERR(gd);
goto out_free_tag_set;
}
- blk_queue_flag_set(QUEUE_FLAG_VIRT, gd->queue);
strcpy(gd->disk_name, DEV_NAME);
ptr = encode_disk_name(gd->disk_name + sizeof(DEV_NAME) - 1, offset);
@@ -1159,8 +1164,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
info->rq = gd->queue;
info->gd = gd;
- info->sector_size = sector_size;
- info->physical_sector_size = physical_sector_size;
xlvbd_flush(info);
@@ -1605,8 +1608,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
blkif_req(req)->error = BLK_STS_NOTSUPP;
info->feature_discard = 0;
info->feature_secdiscard = 0;
- blk_queue_max_discard_sectors(rq, 0);
- blk_queue_max_secure_erase_sectors(rq, 0);
+ blk_queue_disable_discard(rq);
+ blk_queue_disable_secure_erase(rq);
}
break;
case BLKIF_OP_FLUSH_DISKCACHE:
@@ -1627,7 +1630,6 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
blkif_req(req)->error = BLK_STS_OK;
info->feature_fua = 0;
info->feature_flush = 0;
- xlvbd_flush(info);
}
fallthrough;
case BLKIF_OP_READ:
@@ -2315,8 +2317,6 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
static void blkfront_connect(struct blkfront_info *info)
{
unsigned long long sectors;
- unsigned long sector_size;
- unsigned int physical_sector_size;
int err, i;
struct blkfront_ring_info *rinfo;
@@ -2355,7 +2355,7 @@ static void blkfront_connect(struct blkfront_info *info)
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
"sectors", "%llu", &sectors,
"info", "%u", &info->vdisk_info,
- "sector-size", "%lu", &sector_size,
+ "sector-size", "%lu", &info->sector_size,
NULL);
if (err) {
xenbus_dev_fatal(info->xbdev, err,
@@ -2369,9 +2369,9 @@ static void blkfront_connect(struct blkfront_info *info)
* provide this. Assume physical sector size to be the same as
* sector_size in that case.
*/
- physical_sector_size = xenbus_read_unsigned(info->xbdev->otherend,
+ info->physical_sector_size = xenbus_read_unsigned(info->xbdev->otherend,
"physical-sector-size",
- sector_size);
+ info->sector_size);
blkfront_gather_backend_features(info);
for_each_rinfo(info, rinfo, i) {
err = blkfront_setup_indirect(rinfo);
@@ -2383,8 +2383,7 @@ static void blkfront_connect(struct blkfront_info *info)
}
}
- err = xlvbd_alloc_gendisk(sectors, info, sector_size,
- physical_sector_size);
+ err = xlvbd_alloc_gendisk(sectors, info);
if (err) {
xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
info->xbdev->otherend);
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 7c5f4e4d9b50..4b7219be1bb8 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -409,4 +409,5 @@ static void __exit z2_exit(void)
module_init(z2_init);
module_exit(z2_exit);
+MODULE_DESCRIPTION("Amiga Zorro II ramdisk driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 3acd7006ad2c..efcb8d9d274c 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -2208,6 +2208,8 @@ static int zram_add(void)
#if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE
.max_write_zeroes_sectors = UINT_MAX,
#endif
+ .features = BLK_FEAT_STABLE_WRITES |
+ BLK_FEAT_SYNCHRONOUS,
};
struct zram *zram;
int ret, device_id;
@@ -2245,10 +2247,6 @@ static int zram_add(void)
/* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */
set_capacity(zram->disk, 0);
- /* zram devices sort of resembles non-rotational disks */
- blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
- blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, zram->disk->queue);
- blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
if (ret)
goto out_cleanup_disk;
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 20c90ebb3a3f..49e4829b7264 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -3708,4 +3708,5 @@ static void __exit cdrom_exit(void)
module_init(cdrom_init);
module_exit(cdrom_exit);
+MODULE_DESCRIPTION("Uniform CD-ROM driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index eefdd422ad8e..71cfe7a85913 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -744,6 +744,7 @@ static int probe_gdrom(struct platform_device *devptr)
.max_segments = 1,
/* set a large max size to get most from DMA */
.max_segment_size = 0x40000,
+ .features = BLK_FEAT_ROTATIONAL,
};
int err;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 4d11fc664cb0..b5d6ef430b86 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -897,7 +897,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
sector_t sectors, struct block_device *cached_bdev,
const struct block_device_operations *ops)
{
- struct request_queue *q;
const size_t max_stripes = min_t(size_t, INT_MAX,
SIZE_MAX / sizeof(atomic_t));
struct queue_limits lim = {
@@ -909,6 +908,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
.io_min = block_size,
.logical_block_size = block_size,
.physical_block_size = block_size,
+ .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA,
};
uint64_t n;
int idx;
@@ -974,13 +974,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
d->disk->minors = BCACHE_MINORS;
d->disk->fops = ops;
d->disk->private_data = d;
-
- q = d->disk->queue;
-
- blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue);
-
- blk_queue_write_cache(q, true, true);
-
return 0;
out_bioset_exit:
@@ -1423,8 +1416,8 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
}
if (bdev_io_opt(dc->bdev))
- dc->partial_stripes_expensive =
- q->limits.raid_partial_stripes_expensive;
+ dc->partial_stripes_expensive = !!(q->limits.features &
+ BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE);
ret = bcache_device_init(&dc->disk, block_size,
bdev_nr_sectors(dc->bdev) - dc->sb.data_offset,
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 16884b585053..2d8dd9283ff4 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -3403,7 +3403,6 @@ static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
limits->max_hw_discard_sectors = origin_limits->max_hw_discard_sectors;
limits->discard_granularity = origin_limits->discard_granularity;
limits->discard_alignment = origin_limits->discard_alignment;
- limits->discard_misaligned = origin_limits->discard_misaligned;
}
static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c
index ad79b52ffc14..b4384a8b13e3 100644
--- a/drivers/md/dm-clone-target.c
+++ b/drivers/md/dm-clone-target.c
@@ -2059,7 +2059,6 @@ static void set_discard_limits(struct clone *clone, struct queue_limits *limits)
limits->max_hw_discard_sectors = dest_limits->max_hw_discard_sectors;
limits->discard_granularity = dest_limits->discard_granularity;
limits->discard_alignment = dest_limits->discard_alignment;
- limits->discard_misaligned = dest_limits->discard_misaligned;
limits->max_discard_segments = dest_limits->max_discard_segments;
}
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 08700bfc3e23..14a44c0f8286 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -206,7 +206,6 @@ struct dm_table {
bool integrity_supported:1;
bool singleton:1;
- unsigned integrity_added:1;
/*
* Indicates the rw permissions for the new logical device. This
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 1b7a97cc3779..6c013ceb0e5f 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1176,8 +1176,8 @@ static int crypt_integrity_ctr(struct crypt_config *cc, struct dm_target *ti)
struct blk_integrity *bi = blk_get_integrity(cc->dev->bdev->bd_disk);
struct mapped_device *md = dm_table_get_md(ti->table);
- /* From now we require underlying device with our integrity profile */
- if (!bi || strcasecmp(bi->profile->name, "DM-DIF-EXT-TAG")) {
+ /* We require an underlying device with non-PI metadata */
+ if (!bi || bi->csum_type != BLK_INTEGRITY_CSUM_NONE) {
ti->error = "Integrity profile not supported.";
return -EINVAL;
}
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 417fddebe367..2a89f8eb4713 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -350,25 +350,6 @@ static struct kmem_cache *journal_io_cache;
#define DEBUG_bytes(bytes, len, msg, ...) do { } while (0)
#endif
-static void dm_integrity_prepare(struct request *rq)
-{
-}
-
-static void dm_integrity_complete(struct request *rq, unsigned int nr_bytes)
-{
-}
-
-/*
- * DM Integrity profile, protection is performed layer above (dm-crypt)
- */
-static const struct blk_integrity_profile dm_integrity_profile = {
- .name = "DM-DIF-EXT-TAG",
- .generate_fn = NULL,
- .verify_fn = NULL,
- .prepare_fn = dm_integrity_prepare,
- .complete_fn = dm_integrity_complete,
-};
-
static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map);
static void integrity_bio_wait(struct work_struct *w);
static void dm_integrity_dtr(struct dm_target *ti);
@@ -3494,6 +3475,17 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
limits->dma_alignment = limits->logical_block_size - 1;
limits->discard_granularity = ic->sectors_per_block << SECTOR_SHIFT;
}
+
+ if (!ic->internal_hash) {
+ struct blk_integrity *bi = &limits->integrity;
+
+ memset(bi, 0, sizeof(*bi));
+ bi->tuple_size = ic->tag_size;
+ bi->tag_size = bi->tuple_size;
+ bi->interval_exp =
+ ic->sb->log2_sectors_per_block + SECTOR_SHIFT;
+ }
+
limits->max_integrity_segments = USHRT_MAX;
}
@@ -3650,20 +3642,6 @@ try_smaller_buffer:
return 0;
}
-static void dm_integrity_set(struct dm_target *ti, struct dm_integrity_c *ic)
-{
- struct gendisk *disk = dm_disk(dm_table_get_md(ti->table));
- struct blk_integrity bi;
-
- memset(&bi, 0, sizeof(bi));
- bi.profile = &dm_integrity_profile;
- bi.tuple_size = ic->tag_size;
- bi.tag_size = bi.tuple_size;
- bi.interval_exp = ic->sb->log2_sectors_per_block + SECTOR_SHIFT;
-
- blk_integrity_register(disk, &bi);
-}
-
static void dm_integrity_free_page_list(struct page_list *pl)
{
unsigned int i;
@@ -4649,9 +4627,6 @@ try_smaller_buffer:
}
}
- if (!ic->internal_hash)
- dm_integrity_set(ti, ic);
-
ti->num_flush_bios = 1;
ti->flush_supported = true;
if (ic->discard)
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index abe88d1e6735..052c00c1eb15 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3542,7 +3542,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
recovery = rs->md.recovery;
state = decipher_sync_action(mddev, recovery);
progress = rs_get_progress(rs, recovery, state, resync_max_sectors);
- resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ?
+ resync_mismatches = mddev->last_sync_action == ACTION_CHECK ?
atomic64_read(&mddev->resync_mismatches) : 0;
/* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index b2d5246cff21..92d18dcdb3e9 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -425,6 +425,13 @@ static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
q->limits.logical_block_size,
q->limits.alignment_offset,
(unsigned long long) start << SECTOR_SHIFT);
+
+ /*
+ * Only stack the integrity profile if the target doesn't have native
+ * integrity support.
+ */
+ if (!dm_target_has_integrity(ti->type))
+ queue_limits_stack_integrity_bdev(limits, bdev);
return 0;
}
@@ -572,6 +579,12 @@ int dm_split_args(int *argc, char ***argvp, char *input)
return 0;
}
+static void dm_set_stacking_limits(struct queue_limits *limits)
+{
+ blk_set_stacking_limits(limits);
+ limits->features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | BLK_FEAT_POLL;
+}
+
/*
* Impose necessary and sufficient conditions on a devices's table such
* that any incoming bio which respects its logical_block_size can be
@@ -610,7 +623,7 @@ static int validate_hardware_logical_block_alignment(struct dm_table *t,
for (i = 0; i < t->num_targets; i++) {
ti = dm_table_get_target(t, i);
- blk_set_stacking_limits(&ti_limits);
+ dm_set_stacking_limits(&ti_limits);
/* combine all target devices' limits */
if (ti->type->iterate_devices)
@@ -702,9 +715,6 @@ int dm_table_add_target(struct dm_table *t, const char *type,
t->immutable_target_type = ti->type;
}
- if (dm_target_has_integrity(ti->type))
- t->integrity_added = 1;
-
ti->table = t;
ti->begin = start;
ti->len = len;
@@ -1014,14 +1024,13 @@ bool dm_table_request_based(struct dm_table *t)
return __table_type_request_based(dm_table_get_type(t));
}
-static bool dm_table_supports_poll(struct dm_table *t);
-
static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
{
enum dm_queue_mode type = dm_table_get_type(t);
unsigned int per_io_data_size = 0, front_pad, io_front_pad;
unsigned int min_pool_size = 0, pool_size;
struct dm_md_mempools *pools;
+ unsigned int bioset_flags = 0;
if (unlikely(type == DM_TYPE_NONE)) {
DMERR("no table type is set, can't allocate mempools");
@@ -1038,6 +1047,9 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
goto init_bs;
}
+ if (md->queue->limits.features & BLK_FEAT_POLL)
+ bioset_flags |= BIOSET_PERCPU_CACHE;
+
for (unsigned int i = 0; i < t->num_targets; i++) {
struct dm_target *ti = dm_table_get_target(t, i);
@@ -1050,8 +1062,7 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
io_front_pad = roundup(per_io_data_size,
__alignof__(struct dm_io)) + DM_IO_BIO_OFFSET;
- if (bioset_init(&pools->io_bs, pool_size, io_front_pad,
- dm_table_supports_poll(t) ? BIOSET_PERCPU_CACHE : 0))
+ if (bioset_init(&pools->io_bs, pool_size, io_front_pad, bioset_flags))
goto out_free_pools;
if (t->integrity_supported &&
bioset_integrity_create(&pools->io_bs, pool_size))
@@ -1119,99 +1130,6 @@ static int dm_table_build_index(struct dm_table *t)
return r;
}
-static bool integrity_profile_exists(struct gendisk *disk)
-{
- return !!blk_get_integrity(disk);
-}
-
-/*
- * Get a disk whose integrity profile reflects the table's profile.
- * Returns NULL if integrity support was inconsistent or unavailable.
- */
-static struct gendisk *dm_table_get_integrity_disk(struct dm_table *t)
-{
- struct list_head *devices = dm_table_get_devices(t);
- struct dm_dev_internal *dd = NULL;
- struct gendisk *prev_disk = NULL, *template_disk = NULL;
-
- for (unsigned int i = 0; i < t->num_targets; i++) {
- struct dm_target *ti = dm_table_get_target(t, i);
-
- if (!dm_target_passes_integrity(ti->type))
- goto no_integrity;
- }
-
- list_for_each_entry(dd, devices, list) {
- template_disk = dd->dm_dev->bdev->bd_disk;
- if (!integrity_profile_exists(template_disk))
- goto no_integrity;
- else if (prev_disk &&
- blk_integrity_compare(prev_disk, template_disk) < 0)
- goto no_integrity;
- prev_disk = template_disk;
- }
-
- return template_disk;
-
-no_integrity:
- if (prev_disk)
- DMWARN("%s: integrity not set: %s and %s profile mismatch",
- dm_device_name(t->md),
- prev_disk->disk_name,
- template_disk->disk_name);
- return NULL;
-}
-
-/*
- * Register the mapped device for blk_integrity support if the
- * underlying devices have an integrity profile. But all devices may
- * not have matching profiles (checking all devices isn't reliable
- * during table load because this table may use other DM device(s) which
- * must be resumed before they will have an initialized integity
- * profile). Consequently, stacked DM devices force a 2 stage integrity
- * profile validation: First pass during table load, final pass during
- * resume.
- */
-static int dm_table_register_integrity(struct dm_table *t)
-{
- struct mapped_device *md = t->md;
- struct gendisk *template_disk = NULL;
-
- /* If target handles integrity itself do not register it here. */
- if (t->integrity_added)
- return 0;
-
- template_disk = dm_table_get_integrity_disk(t);
- if (!template_disk)
- return 0;
-
- if (!integrity_profile_exists(dm_disk(md))) {
- t->integrity_supported = true;
- /*
- * Register integrity profile during table load; we can do
- * this because the final profile must match during resume.
- */
- blk_integrity_register(dm_disk(md),
- blk_get_integrity(template_disk));
- return 0;
- }
-
- /*
- * If DM device already has an initialized integrity
- * profile the new profile should not conflict.
- */
- if (blk_integrity_compare(dm_disk(md), template_disk) < 0) {
- DMERR("%s: conflict with existing integrity profile: %s profile mismatch",
- dm_device_name(t->md),
- template_disk->disk_name);
- return 1;
- }
-
- /* Preserve existing integrity profile */
- t->integrity_supported = true;
- return 0;
-}
-
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
struct dm_crypto_profile {
@@ -1423,12 +1341,6 @@ int dm_table_complete(struct dm_table *t)
return r;
}
- r = dm_table_register_integrity(t);
- if (r) {
- DMERR("could not register integrity profile.");
- return r;
- }
-
r = dm_table_construct_crypto_profile(t);
if (r) {
DMERR("could not construct crypto profile.");
@@ -1493,14 +1405,6 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
return &t->targets[(KEYS_PER_NODE * n) + k];
}
-static int device_not_poll_capable(struct dm_target *ti, struct dm_dev *dev,
- sector_t start, sector_t len, void *data)
-{
- struct request_queue *q = bdev_get_queue(dev->bdev);
-
- return !test_bit(QUEUE_FLAG_POLL, &q->queue_flags);
-}
-
/*
* type->iterate_devices() should be called when the sanity check needs to
* iterate and check all underlying data devices. iterate_devices() will
@@ -1548,19 +1452,6 @@ static int count_device(struct dm_target *ti, struct dm_dev *dev,
return 0;
}
-static bool dm_table_supports_poll(struct dm_table *t)
-{
- for (unsigned int i = 0; i < t->num_targets; i++) {
- struct dm_target *ti = dm_table_get_target(t, i);
-
- if (!ti->type->iterate_devices ||
- ti->type->iterate_devices(ti, device_not_poll_capable, NULL))
- return false;
- }
-
- return true;
-}
-
/*
* Check whether a table has no data devices attached using each
* target's iterate_devices method.
@@ -1686,12 +1577,20 @@ int dm_calculate_queue_limits(struct dm_table *t,
unsigned int zone_sectors = 0;
bool zoned = false;
- blk_set_stacking_limits(limits);
+ dm_set_stacking_limits(limits);
+
+ t->integrity_supported = true;
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
+
+ if (!dm_target_passes_integrity(ti->type))
+ t->integrity_supported = false;
+ }
for (unsigned int i = 0; i < t->num_targets; i++) {
struct dm_target *ti = dm_table_get_target(t, i);
- blk_set_stacking_limits(&ti_limits);
+ dm_set_stacking_limits(&ti_limits);
if (!ti->type->iterate_devices) {
/* Set I/O hints portion of queue limits */
@@ -1706,12 +1605,12 @@ int dm_calculate_queue_limits(struct dm_table *t,
ti->type->iterate_devices(ti, dm_set_device_limits,
&ti_limits);
- if (!zoned && ti_limits.zoned) {
+ if (!zoned && (ti_limits.features & BLK_FEAT_ZONED)) {
/*
* After stacking all limits, validate all devices
* in table support this zoned model and zone sectors.
*/
- zoned = ti_limits.zoned;
+ zoned = (ti_limits.features & BLK_FEAT_ZONED);
zone_sectors = ti_limits.chunk_sectors;
}
@@ -1738,6 +1637,18 @@ combine_limits:
dm_device_name(t->md),
(unsigned long long) ti->begin,
(unsigned long long) ti->len);
+
+ if (t->integrity_supported ||
+ dm_target_has_integrity(ti->type)) {
+ if (!queue_limits_stack_integrity(limits, &ti_limits)) {
+ DMWARN("%s: adding target device (start sect %llu len %llu) "
+ "disabled integrity support due to incompatibility",
+ dm_device_name(t->md),
+ (unsigned long long) ti->begin,
+ (unsigned long long) ti->len);
+ t->integrity_supported = false;
+ }
+ }
}
/*
@@ -1747,12 +1658,12 @@ combine_limits:
* zoned model on host-managed zoned block devices.
* BUT...
*/
- if (limits->zoned) {
+ if (limits->features & BLK_FEAT_ZONED) {
/*
* ...IF the above limits stacking determined a zoned model
* validate that all of the table's devices conform to it.
*/
- zoned = limits->zoned;
+ zoned = limits->features & BLK_FEAT_ZONED;
zone_sectors = limits->chunk_sectors;
}
if (validate_hardware_zoned(t, zoned, zone_sectors))
@@ -1762,63 +1673,15 @@ combine_limits:
}
/*
- * Verify that all devices have an integrity profile that matches the
- * DM device's registered integrity profile. If the profiles don't
- * match then unregister the DM device's integrity profile.
+ * Check if a target requires flush support even if none of the underlying
+ * devices need it (e.g. to persist target-specific metadata).
*/
-static void dm_table_verify_integrity(struct dm_table *t)
-{
- struct gendisk *template_disk = NULL;
-
- if (t->integrity_added)
- return;
-
- if (t->integrity_supported) {
- /*
- * Verify that the original integrity profile
- * matches all the devices in this table.
- */
- template_disk = dm_table_get_integrity_disk(t);
- if (template_disk &&
- blk_integrity_compare(dm_disk(t->md), template_disk) >= 0)
- return;
- }
-
- if (integrity_profile_exists(dm_disk(t->md))) {
- DMWARN("%s: unable to establish an integrity profile",
- dm_device_name(t->md));
- blk_integrity_unregister(dm_disk(t->md));
- }
-}
-
-static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev,
- sector_t start, sector_t len, void *data)
+static bool dm_table_supports_flush(struct dm_table *t)
{
- unsigned long flush = (unsigned long) data;
- struct request_queue *q = bdev_get_queue(dev->bdev);
-
- return (q->queue_flags & flush);
-}
-
-static bool dm_table_supports_flush(struct dm_table *t, unsigned long flush)
-{
- /*
- * Require at least one underlying device to support flushes.
- * t->devices includes internal dm devices such as mirror logs
- * so we need to use iterate_devices here, which targets
- * supporting flushes must provide.
- */
for (unsigned int i = 0; i < t->num_targets; i++) {
struct dm_target *ti = dm_table_get_target(t, i);
- if (!ti->num_flush_bios)
- continue;
-
- if (ti->flush_supported)
- return true;
-
- if (ti->type->iterate_devices &&
- ti->type->iterate_devices(ti, device_flush_capable, (void *) flush))
+ if (ti->num_flush_bios && ti->flush_supported)
return true;
}
@@ -1839,20 +1702,6 @@ static int device_dax_write_cache_enabled(struct dm_target *ti,
return false;
}
-static int device_is_rotational(struct dm_target *ti, struct dm_dev *dev,
- sector_t start, sector_t len, void *data)
-{
- return !bdev_nonrot(dev->bdev);
-}
-
-static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev,
- sector_t start, sector_t len, void *data)
-{
- struct request_queue *q = bdev_get_queue(dev->bdev);
-
- return !blk_queue_add_random(q);
-}
-
static int device_not_write_zeroes_capable(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
@@ -1877,12 +1726,6 @@ static bool dm_table_supports_write_zeroes(struct dm_table *t)
return true;
}
-static int device_not_nowait_capable(struct dm_target *ti, struct dm_dev *dev,
- sector_t start, sector_t len, void *data)
-{
- return !bdev_nowait(dev->bdev);
-}
-
static bool dm_table_supports_nowait(struct dm_table *t)
{
for (unsigned int i = 0; i < t->num_targets; i++) {
@@ -1890,10 +1733,6 @@ static bool dm_table_supports_nowait(struct dm_table *t)
if (!dm_target_supports_nowait(ti->type))
return false;
-
- if (!ti->type->iterate_devices ||
- ti->type->iterate_devices(ti, device_not_nowait_capable, NULL))
- return false;
}
return true;
@@ -1950,29 +1789,25 @@ static bool dm_table_supports_secure_erase(struct dm_table *t)
return true;
}
-static int device_requires_stable_pages(struct dm_target *ti,
- struct dm_dev *dev, sector_t start,
- sector_t len, void *data)
-{
- return bdev_stable_writes(dev->bdev);
-}
-
int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits)
{
- bool wc = false, fua = false;
int r;
- if (dm_table_supports_nowait(t))
- blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q);
- else
- blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q);
+ if (!dm_table_supports_nowait(t))
+ limits->features &= ~BLK_FEAT_NOWAIT;
+
+ /*
+ * The current polling impementation does not support request based
+ * stacking.
+ */
+ if (!__table_type_bio_based(t->type))
+ limits->features &= ~BLK_FEAT_POLL;
if (!dm_table_supports_discards(t)) {
limits->max_hw_discard_sectors = 0;
limits->discard_granularity = 0;
limits->discard_alignment = 0;
- limits->discard_misaligned = 0;
}
if (!dm_table_supports_write_zeroes(t))
@@ -1981,58 +1816,22 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
if (!dm_table_supports_secure_erase(t))
limits->max_secure_erase_sectors = 0;
- if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) {
- wc = true;
- if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_FUA)))
- fua = true;
- }
- blk_queue_write_cache(q, wc, fua);
+ if (dm_table_supports_flush(t))
+ limits->features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA;
if (dm_table_supports_dax(t, device_not_dax_capable)) {
- blk_queue_flag_set(QUEUE_FLAG_DAX, q);
+ limits->features |= BLK_FEAT_DAX;
if (dm_table_supports_dax(t, device_not_dax_synchronous_capable))
set_dax_synchronous(t->md->dax_dev);
} else
- blk_queue_flag_clear(QUEUE_FLAG_DAX, q);
+ limits->features &= ~BLK_FEAT_DAX;
if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL))
dax_write_cache(t->md->dax_dev, true);
- /* Ensure that all underlying devices are non-rotational. */
- if (dm_table_any_dev_attr(t, device_is_rotational, NULL))
- blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
- else
- blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
-
- dm_table_verify_integrity(t);
-
- /*
- * Some devices don't use blk_integrity but still want stable pages
- * because they do their own checksumming.
- * If any underlying device requires stable pages, a table must require
- * them as well. Only targets that support iterate_devices are considered:
- * don't want error, zero, etc to require stable pages.
- */
- if (dm_table_any_dev_attr(t, device_requires_stable_pages, NULL))
- blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
- else
- blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q);
-
- /*
- * Determine whether or not this queue's I/O timings contribute
- * to the entropy pool, Only request-based targets use this.
- * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not
- * have it set.
- */
- if (blk_queue_add_random(q) &&
- dm_table_any_dev_attr(t, device_is_not_random, NULL))
- blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
-
- /*
- * For a zoned target, setup the zones related queue attributes
- * and resources necessary for zone append emulation if necessary.
- */
- if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && limits->zoned) {
+ /* For a zoned table, setup the zone related queue attributes. */
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
+ (limits->features & BLK_FEAT_ZONED)) {
r = dm_set_zones_restrictions(t, q, limits);
if (r)
return r;
@@ -2042,22 +1841,18 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
if (r)
return r;
- dm_update_crypto_profile(q, t);
-
/*
- * Check for request-based device is left to
- * dm_mq_init_request_queue()->blk_mq_init_allocated_queue().
- *
- * For bio-based device, only set QUEUE_FLAG_POLL when all
- * underlying devices supporting polling.
+ * Now that the limits are set, check the zones mapped by the table
+ * and setup the resources for zone append emulation if necessary.
*/
- if (__table_type_bio_based(t->type)) {
- if (dm_table_supports_poll(t))
- blk_queue_flag_set(QUEUE_FLAG_POLL, q);
- else
- blk_queue_flag_clear(QUEUE_FLAG_POLL, q);
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
+ (limits->features & BLK_FEAT_ZONED)) {
+ r = dm_revalidate_zones(t, q);
+ if (r)
+ return r;
}
+ dm_update_crypto_profile(q, t);
return 0;
}
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index 5d66d916730e..c0d41c36e06e 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -13,8 +13,6 @@
#define DM_MSG_PREFIX "zone"
-#define DM_ZONE_INVALID_WP_OFST UINT_MAX
-
/*
* For internal zone reports bypassing the top BIO submission path.
*/
@@ -146,34 +144,27 @@ bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
}
/*
- * Count conventional zones of a mapped zoned device. If the device
- * only has conventional zones, do not expose it as zoned.
- */
-static int dm_check_zoned_cb(struct blk_zone *zone, unsigned int idx,
- void *data)
-{
- unsigned int *nr_conv_zones = data;
-
- if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
- (*nr_conv_zones)++;
-
- return 0;
-}
-
-/*
* Revalidate the zones of a mapped device to initialize resource necessary
* for zone append emulation. Note that we cannot simply use the block layer
* blk_revalidate_disk_zones() function here as the mapped device is suspended
* (this is called from __bind() context).
*/
-static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t)
+int dm_revalidate_zones(struct dm_table *t, struct request_queue *q)
{
+ struct mapped_device *md = t->md;
struct gendisk *disk = md->disk;
int ret;
+ if (!get_capacity(disk))
+ return 0;
+
/* Revalidate only if something changed. */
- if (!disk->nr_zones || disk->nr_zones != md->nr_zones)
+ if (!disk->nr_zones || disk->nr_zones != md->nr_zones) {
+ DMINFO("%s using %s zone append",
+ disk->disk_name,
+ queue_emulates_zone_append(q) ? "emulated" : "native");
md->nr_zones = 0;
+ }
if (md->nr_zones)
return 0;
@@ -220,13 +211,129 @@ static bool dm_table_supports_zone_append(struct dm_table *t)
return true;
}
+struct dm_device_zone_count {
+ sector_t start;
+ sector_t len;
+ unsigned int total_nr_seq_zones;
+ unsigned int target_nr_seq_zones;
+};
+
+/*
+ * Count the total number of and the number of mapped sequential zones of a
+ * target zoned device.
+ */
+static int dm_device_count_zones_cb(struct blk_zone *zone,
+ unsigned int idx, void *data)
+{
+ struct dm_device_zone_count *zc = data;
+
+ if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) {
+ zc->total_nr_seq_zones++;
+ if (zone->start >= zc->start &&
+ zone->start < zc->start + zc->len)
+ zc->target_nr_seq_zones++;
+ }
+
+ return 0;
+}
+
+static int dm_device_count_zones(struct dm_dev *dev,
+ struct dm_device_zone_count *zc)
+{
+ int ret;
+
+ ret = blkdev_report_zones(dev->bdev, 0, BLK_ALL_ZONES,
+ dm_device_count_zones_cb, zc);
+ if (ret < 0)
+ return ret;
+ if (!ret)
+ return -EIO;
+ return 0;
+}
+
+struct dm_zone_resource_limits {
+ unsigned int mapped_nr_seq_zones;
+ struct queue_limits *lim;
+ bool reliable_limits;
+};
+
+static int device_get_zone_resource_limits(struct dm_target *ti,
+ struct dm_dev *dev, sector_t start,
+ sector_t len, void *data)
+{
+ struct dm_zone_resource_limits *zlim = data;
+ struct gendisk *disk = dev->bdev->bd_disk;
+ unsigned int max_open_zones, max_active_zones;
+ int ret;
+ struct dm_device_zone_count zc = {
+ .start = start,
+ .len = len,
+ };
+
+ /*
+ * If the target is not the whole device, the device zone resources may
+ * be shared between different targets. Check this by counting the
+ * number of mapped sequential zones: if this number is smaller than the
+ * total number of sequential zones of the target device, then resource
+ * sharing may happen and the zone limits will not be reliable.
+ */
+ ret = dm_device_count_zones(dev, &zc);
+ if (ret) {
+ DMERR("Count %s zones failed %d", disk->disk_name, ret);
+ return ret;
+ }
+
+ /*
+ * If the target does not map any sequential zones, then we do not need
+ * any zone resource limits.
+ */
+ if (!zc.target_nr_seq_zones)
+ return 0;
+
+ /*
+ * If the target does not map all sequential zones, the limits
+ * will not be reliable and we cannot use REQ_OP_ZONE_RESET_ALL.
+ */
+ if (zc.target_nr_seq_zones < zc.total_nr_seq_zones) {
+ zlim->reliable_limits = false;
+ ti->zone_reset_all_supported = false;
+ }
+
+ /*
+ * If the target maps less sequential zones than the limit values, then
+ * we do not have limits for this target.
+ */
+ max_active_zones = disk->queue->limits.max_active_zones;
+ if (max_active_zones >= zc.target_nr_seq_zones)
+ max_active_zones = 0;
+ zlim->lim->max_active_zones =
+ min_not_zero(max_active_zones, zlim->lim->max_active_zones);
+
+ max_open_zones = disk->queue->limits.max_open_zones;
+ if (max_open_zones >= zc.target_nr_seq_zones)
+ max_open_zones = 0;
+ zlim->lim->max_open_zones =
+ min_not_zero(max_open_zones, zlim->lim->max_open_zones);
+
+ /*
+ * Also count the total number of sequential zones for the mapped
+ * device so that when we are done inspecting all its targets, we are
+ * able to check if the mapped device actually has any sequential zones.
+ */
+ zlim->mapped_nr_seq_zones += zc.target_nr_seq_zones;
+
+ return 0;
+}
+
int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *lim)
{
struct mapped_device *md = t->md;
struct gendisk *disk = md->disk;
- unsigned int nr_conv_zones = 0;
- int ret;
+ struct dm_zone_resource_limits zlim = {
+ .reliable_limits = true,
+ .lim = lim,
+ };
/*
* Check if zone append is natively supported, and if not, set the
@@ -240,46 +347,63 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
lim->max_zone_append_sectors = 0;
}
- if (!get_capacity(md->disk))
- return 0;
-
/*
- * Count conventional zones to check that the mapped device will indeed
- * have sequential write required zones.
+ * Determine the max open and max active zone limits for the mapped
+ * device by inspecting the zone resource limits and the zones mapped
+ * by each target.
*/
- md->zone_revalidate_map = t;
- ret = dm_blk_report_zones(disk, 0, UINT_MAX,
- dm_check_zoned_cb, &nr_conv_zones);
- md->zone_revalidate_map = NULL;
- if (ret < 0) {
- DMERR("Check zoned failed %d", ret);
- return ret;
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
+
+ /*
+ * Assume that the target can accept REQ_OP_ZONE_RESET_ALL.
+ * device_get_zone_resource_limits() may adjust this if one of
+ * the device used by the target does not have all its
+ * sequential write required zones mapped.
+ */
+ ti->zone_reset_all_supported = true;
+
+ if (!ti->type->iterate_devices ||
+ ti->type->iterate_devices(ti,
+ device_get_zone_resource_limits, &zlim)) {
+ DMERR("Could not determine %s zone resource limits",
+ disk->disk_name);
+ return -ENODEV;
+ }
}
/*
- * If we only have conventional zones, expose the mapped device as
- * a regular device.
+ * If we only have conventional zones mapped, expose the mapped device
+ + as a regular device.
*/
- if (nr_conv_zones >= ret) {
+ if (!zlim.mapped_nr_seq_zones) {
lim->max_open_zones = 0;
lim->max_active_zones = 0;
- lim->zoned = false;
+ lim->max_zone_append_sectors = 0;
+ lim->zone_write_granularity = 0;
+ lim->chunk_sectors = 0;
+ lim->features &= ~BLK_FEAT_ZONED;
clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
+ md->nr_zones = 0;
disk->nr_zones = 0;
return 0;
}
- if (!md->disk->nr_zones) {
- DMINFO("%s using %s zone append",
- md->disk->disk_name,
- queue_emulates_zone_append(q) ? "emulated" : "native");
- }
-
- ret = dm_revalidate_zones(md, t);
- if (ret < 0)
- return ret;
+ /*
+ * Warn once (when the capacity is not yet set) if the mapped device is
+ * partially using zone resources of the target devices as that leads to
+ * unreliable limits, i.e. if another mapped device uses the same
+ * underlying devices, we cannot enforce zone limits to guarantee that
+ * writing will not lead to errors. Note that we really should return
+ * an error for such case but there is no easy way to find out if
+ * another mapped device uses the same underlying zoned devices.
+ */
+ if (!get_capacity(disk) && !zlim.reliable_limits)
+ DMWARN("%s zone resource limits may be unreliable",
+ disk->disk_name);
- if (!static_key_enabled(&zoned_enabled.key))
+ if (lim->features & BLK_FEAT_ZONED &&
+ !static_key_enabled(&zoned_enabled.key))
static_branch_enable(&zoned_enabled);
return 0;
}
@@ -306,3 +430,39 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone)
return;
}
+
+static int dm_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
+ void *data)
+{
+ /*
+ * For an all-zones reset, ignore conventional, empty, read-only
+ * and offline zones.
+ */
+ switch (zone->cond) {
+ case BLK_ZONE_COND_NOT_WP:
+ case BLK_ZONE_COND_EMPTY:
+ case BLK_ZONE_COND_READONLY:
+ case BLK_ZONE_COND_OFFLINE:
+ return 0;
+ default:
+ set_bit(idx, (unsigned long *)data);
+ return 0;
+ }
+}
+
+int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t,
+ sector_t sector, unsigned int nr_zones,
+ unsigned long *need_reset)
+{
+ int ret;
+
+ ret = dm_blk_do_report_zones(md, t, sector, nr_zones,
+ dm_zone_need_reset_cb, need_reset);
+ if (ret != nr_zones) {
+ DMERR("Get %s zone reset bitmap failed\n",
+ md->disk->disk_name);
+ return -EIO;
+ }
+
+ return 0;
+}
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index 12236e6f46f3..cd0ee144973f 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -1009,7 +1009,7 @@ static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits)
limits->max_sectors = chunk_sectors;
/* We are exposing a drive-managed zoned block device */
- limits->zoned = false;
+ limits->features &= ~BLK_FEAT_ZONED;
}
/*
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 13037d6a6f62..4b1b69e576a5 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1188,7 +1188,7 @@ static sector_t __max_io_len(struct dm_target *ti, sector_t sector,
return len;
return min_t(sector_t, len,
min(max_sectors ? : queue_max_sectors(ti->table->md->queue),
- blk_chunk_sectors_left(target_offset, max_granularity)));
+ blk_boundary_sectors_left(target_offset, max_granularity)));
}
static inline sector_t max_io_len(struct dm_target *ti, sector_t sector)
@@ -1598,20 +1598,19 @@ static void __send_abnormal_io(struct clone_info *ci, struct dm_target *ti,
static bool is_abnormal_io(struct bio *bio)
{
- enum req_op op = bio_op(bio);
-
- if (op != REQ_OP_READ && op != REQ_OP_WRITE && op != REQ_OP_FLUSH) {
- switch (op) {
- case REQ_OP_DISCARD:
- case REQ_OP_SECURE_ERASE:
- case REQ_OP_WRITE_ZEROES:
- return true;
- default:
- break;
- }
+ switch (bio_op(bio)) {
+ case REQ_OP_READ:
+ case REQ_OP_WRITE:
+ case REQ_OP_FLUSH:
+ return false;
+ case REQ_OP_DISCARD:
+ case REQ_OP_SECURE_ERASE:
+ case REQ_OP_WRITE_ZEROES:
+ case REQ_OP_ZONE_RESET_ALL:
+ return true;
+ default:
+ return false;
}
-
- return false;
}
static blk_status_t __process_abnormal_io(struct clone_info *ci,
@@ -1776,6 +1775,119 @@ static inline bool dm_zone_plug_bio(struct mapped_device *md, struct bio *bio)
{
return dm_emulate_zone_append(md) && blk_zone_plug_bio(bio, 0);
}
+
+static blk_status_t __send_zone_reset_all_emulated(struct clone_info *ci,
+ struct dm_target *ti)
+{
+ struct bio_list blist = BIO_EMPTY_LIST;
+ struct mapped_device *md = ci->io->md;
+ unsigned int zone_sectors = md->disk->queue->limits.chunk_sectors;
+ unsigned long *need_reset;
+ unsigned int i, nr_zones, nr_reset;
+ unsigned int num_bios = 0;
+ blk_status_t sts = BLK_STS_OK;
+ sector_t sector = ti->begin;
+ struct bio *clone;
+ int ret;
+
+ nr_zones = ti->len >> ilog2(zone_sectors);
+ need_reset = bitmap_zalloc(nr_zones, GFP_NOIO);
+ if (!need_reset)
+ return BLK_STS_RESOURCE;
+
+ ret = dm_zone_get_reset_bitmap(md, ci->map, ti->begin,
+ nr_zones, need_reset);
+ if (ret) {
+ sts = BLK_STS_IOERR;
+ goto free_bitmap;
+ }
+
+ /* If we have no zone to reset, we are done. */
+ nr_reset = bitmap_weight(need_reset, nr_zones);
+ if (!nr_reset)
+ goto free_bitmap;
+
+ atomic_add(nr_zones, &ci->io->io_count);
+
+ for (i = 0; i < nr_zones; i++) {
+
+ if (!test_bit(i, need_reset)) {
+ sector += zone_sectors;
+ continue;
+ }
+
+ if (bio_list_empty(&blist)) {
+ /* This may take a while, so be nice to others */
+ if (num_bios)
+ cond_resched();
+
+ /*
+ * We may need to reset thousands of zones, so let's
+ * not go crazy with the clone allocation.
+ */
+ alloc_multiple_bios(&blist, ci, ti, min(nr_reset, 32),
+ NULL, GFP_NOIO);
+ }
+
+ /* Get a clone and change it to a regular reset operation. */
+ clone = bio_list_pop(&blist);
+ clone->bi_opf &= ~REQ_OP_MASK;
+ clone->bi_opf |= REQ_OP_ZONE_RESET | REQ_SYNC;
+ clone->bi_iter.bi_sector = sector;
+ clone->bi_iter.bi_size = 0;
+ __map_bio(clone);
+
+ sector += zone_sectors;
+ num_bios++;
+ nr_reset--;
+ }
+
+ WARN_ON_ONCE(!bio_list_empty(&blist));
+ atomic_sub(nr_zones - num_bios, &ci->io->io_count);
+ ci->sector_count = 0;
+
+free_bitmap:
+ bitmap_free(need_reset);
+
+ return sts;
+}
+
+static void __send_zone_reset_all_native(struct clone_info *ci,
+ struct dm_target *ti)
+{
+ unsigned int bios;
+
+ atomic_add(1, &ci->io->io_count);
+ bios = __send_duplicate_bios(ci, ti, 1, NULL, GFP_NOIO);
+ atomic_sub(1 - bios, &ci->io->io_count);
+
+ ci->sector_count = 0;
+}
+
+static blk_status_t __send_zone_reset_all(struct clone_info *ci)
+{
+ struct dm_table *t = ci->map;
+ blk_status_t sts = BLK_STS_OK;
+
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
+
+ if (ti->zone_reset_all_supported) {
+ __send_zone_reset_all_native(ci, ti);
+ continue;
+ }
+
+ sts = __send_zone_reset_all_emulated(ci, ti);
+ if (sts != BLK_STS_OK)
+ break;
+ }
+
+ /* Release the reference that alloc_io() took for submission. */
+ atomic_sub(1, &ci->io->io_count);
+
+ return sts;
+}
+
#else
static inline bool dm_zone_bio_needs_split(struct mapped_device *md,
struct bio *bio)
@@ -1786,6 +1898,10 @@ static inline bool dm_zone_plug_bio(struct mapped_device *md, struct bio *bio)
{
return false;
}
+static blk_status_t __send_zone_reset_all(struct clone_info *ci)
+{
+ return BLK_STS_NOTSUPP;
+}
#endif
/*
@@ -1799,9 +1915,14 @@ static void dm_split_and_process_bio(struct mapped_device *md,
blk_status_t error = BLK_STS_OK;
bool is_abnormal, need_split;
- need_split = is_abnormal = is_abnormal_io(bio);
- if (static_branch_unlikely(&zoned_enabled))
- need_split = is_abnormal || dm_zone_bio_needs_split(md, bio);
+ is_abnormal = is_abnormal_io(bio);
+ if (static_branch_unlikely(&zoned_enabled)) {
+ /* Special case REQ_OP_ZONE_RESET_ALL as it cannot be split. */
+ need_split = (bio_op(bio) != REQ_OP_ZONE_RESET_ALL) &&
+ (is_abnormal || dm_zone_bio_needs_split(md, bio));
+ } else {
+ need_split = is_abnormal;
+ }
if (unlikely(need_split)) {
/*
@@ -1842,6 +1963,12 @@ static void dm_split_and_process_bio(struct mapped_device *md,
goto out;
}
+ if (static_branch_unlikely(&zoned_enabled) &&
+ (bio_op(bio) == REQ_OP_ZONE_RESET_ALL)) {
+ error = __send_zone_reset_all(&ci);
+ goto out;
+ }
+
error = __split_and_process_bio(&ci);
if (error || !ci.sector_count)
goto out;
@@ -2386,22 +2513,15 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
struct table_device *td;
int r;
- switch (type) {
- case DM_TYPE_REQUEST_BASED:
+ WARN_ON_ONCE(type == DM_TYPE_NONE);
+
+ if (type == DM_TYPE_REQUEST_BASED) {
md->disk->fops = &dm_rq_blk_dops;
r = dm_mq_init_request_queue(md, t);
if (r) {
DMERR("Cannot initialize queue for request-based dm mapped device");
return r;
}
- break;
- case DM_TYPE_BIO_BASED:
- case DM_TYPE_DAX_BIO_BASED:
- blk_queue_flag_set(QUEUE_FLAG_IO_STAT, md->queue);
- break;
- case DM_TYPE_NONE:
- WARN_ON_ONCE(true);
- break;
}
r = dm_calculate_queue_limits(t, &limits);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 53ef8207fe2c..cc466ad5cb1d 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -103,12 +103,16 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
*/
int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *lim);
+int dm_revalidate_zones(struct dm_table *t, struct request_queue *q);
void dm_zone_endio(struct dm_io *io, struct bio *clone);
#ifdef CONFIG_BLK_DEV_ZONED
int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
bool dm_is_zone_write(struct mapped_device *md, struct bio *bio);
int dm_zone_map_bio(struct dm_target_io *io);
+int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t,
+ sector_t sector, unsigned int nr_zones,
+ unsigned long *need_reset);
#else
#define dm_blk_report_zones NULL
static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index 0a2d37eb38ef..08232d8dc815 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -227,6 +227,8 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
struct block_device *bdev;
struct mddev *mddev = bitmap->mddev;
struct bitmap_storage *store = &bitmap->storage;
+ unsigned int bitmap_limit = (bitmap->storage.file_pages - pg_index) <<
+ PAGE_SHIFT;
loff_t sboff, offset = mddev->bitmap_info.offset;
sector_t ps = pg_index * PAGE_SIZE / SECTOR_SIZE;
unsigned int size = PAGE_SIZE;
@@ -269,11 +271,9 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
if (size == 0)
/* bitmap runs in to data */
return -EINVAL;
- } else {
- /* DATA METADATA BITMAP - no problems */
}
- md_super_write(mddev, rdev, sboff + ps, (int) size, page);
+ md_super_write(mddev, rdev, sboff + ps, (int)min(size, bitmap_limit), page);
return 0;
}
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 8e36a0feec09..139fe2019c1d 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -1570,7 +1570,7 @@ out:
return err;
}
-static struct md_cluster_operations cluster_ops = {
+static const struct md_cluster_operations cluster_ops = {
.join = join,
.leave = leave,
.slot_number = slot_number,
diff --git a/drivers/md/md.c b/drivers/md/md.c
index aff9118ff697..64693913ed18 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -69,13 +69,23 @@
#include "md-bitmap.h"
#include "md-cluster.h"
+static const char *action_name[NR_SYNC_ACTIONS] = {
+ [ACTION_RESYNC] = "resync",
+ [ACTION_RECOVER] = "recover",
+ [ACTION_CHECK] = "check",
+ [ACTION_REPAIR] = "repair",
+ [ACTION_RESHAPE] = "reshape",
+ [ACTION_FROZEN] = "frozen",
+ [ACTION_IDLE] = "idle",
+};
+
/* pers_list is a list of registered personalities protected by pers_lock. */
static LIST_HEAD(pers_list);
static DEFINE_SPINLOCK(pers_lock);
static const struct kobj_type md_ktype;
-struct md_cluster_operations *md_cluster_ops;
+const struct md_cluster_operations *md_cluster_ops;
EXPORT_SYMBOL(md_cluster_ops);
static struct module *md_cluster_mod;
@@ -479,7 +489,6 @@ int mddev_suspend(struct mddev *mddev, bool interruptible)
*/
WRITE_ONCE(mddev->suspended, mddev->suspended + 1);
- del_timer_sync(&mddev->safemode_timer);
/* restrict memory reclaim I/O during raid array is suspend */
mddev->noio_flag = memalloc_noio_save();
@@ -550,13 +559,9 @@ static void md_end_flush(struct bio *bio)
rdev_dec_pending(rdev, mddev);
- if (atomic_dec_and_test(&mddev->flush_pending)) {
- /* The pair is percpu_ref_get() from md_flush_request() */
- percpu_ref_put(&mddev->active_io);
-
+ if (atomic_dec_and_test(&mddev->flush_pending))
/* The pre-request flush has finished */
queue_work(md_wq, &mddev->flush_work);
- }
}
static void md_submit_flush_data(struct work_struct *ws);
@@ -587,12 +592,8 @@ static void submit_flushes(struct work_struct *ws)
rcu_read_lock();
}
rcu_read_unlock();
- if (atomic_dec_and_test(&mddev->flush_pending)) {
- /* The pair is percpu_ref_get() from md_flush_request() */
- percpu_ref_put(&mddev->active_io);
-
+ if (atomic_dec_and_test(&mddev->flush_pending))
queue_work(md_wq, &mddev->flush_work);
- }
}
static void md_submit_flush_data(struct work_struct *ws)
@@ -617,8 +618,20 @@ static void md_submit_flush_data(struct work_struct *ws)
bio_endio(bio);
} else {
bio->bi_opf &= ~REQ_PREFLUSH;
- md_handle_request(mddev, bio);
+
+ /*
+ * make_requst() will never return error here, it only
+ * returns error in raid5_make_request() by dm-raid.
+ * Since dm always splits data and flush operation into
+ * two separate io, io size of flush submitted by dm
+ * always is 0, make_request() will not be called here.
+ */
+ if (WARN_ON_ONCE(!mddev->pers->make_request(mddev, bio)))
+ bio_io_error(bio);
}
+
+ /* The pair is percpu_ref_get() from md_flush_request() */
+ percpu_ref_put(&mddev->active_io);
}
/*
@@ -654,24 +667,22 @@ bool md_flush_request(struct mddev *mddev, struct bio *bio)
WARN_ON(percpu_ref_is_zero(&mddev->active_io));
percpu_ref_get(&mddev->active_io);
mddev->flush_bio = bio;
- bio = NULL;
- }
- spin_unlock_irq(&mddev->lock);
-
- if (!bio) {
+ spin_unlock_irq(&mddev->lock);
INIT_WORK(&mddev->flush_work, submit_flushes);
queue_work(md_wq, &mddev->flush_work);
- } else {
- /* flush was performed for some other bio while we waited. */
- if (bio->bi_iter.bi_size == 0)
- /* an empty barrier - all done */
- bio_endio(bio);
- else {
- bio->bi_opf &= ~REQ_PREFLUSH;
- return false;
- }
+ return true;
}
- return true;
+
+ /* flush was performed for some other bio while we waited. */
+ spin_unlock_irq(&mddev->lock);
+ if (bio->bi_iter.bi_size == 0) {
+ /* pure flush without data - all done */
+ bio_endio(bio);
+ return true;
+ }
+
+ bio->bi_opf &= ~REQ_PREFLUSH;
+ return false;
}
EXPORT_SYMBOL(md_flush_request);
@@ -742,7 +753,6 @@ int mddev_init(struct mddev *mddev)
mutex_init(&mddev->open_mutex);
mutex_init(&mddev->reconfig_mutex);
- mutex_init(&mddev->sync_mutex);
mutex_init(&mddev->suspend_mutex);
mutex_init(&mddev->bitmap_info.mutex);
INIT_LIST_HEAD(&mddev->disks);
@@ -758,7 +768,7 @@ int mddev_init(struct mddev *mddev)
init_waitqueue_head(&mddev->recovery_wait);
mddev->reshape_position = MaxSector;
mddev->reshape_backwards = 0;
- mddev->last_sync_action = "none";
+ mddev->last_sync_action = ACTION_IDLE;
mddev->resync_min = 0;
mddev->resync_max = MaxSector;
mddev->level = LEVEL_NONE;
@@ -2410,36 +2420,10 @@ static LIST_HEAD(pending_raid_disks);
*/
int md_integrity_register(struct mddev *mddev)
{
- struct md_rdev *rdev, *reference = NULL;
-
if (list_empty(&mddev->disks))
return 0; /* nothing to do */
- if (mddev_is_dm(mddev) || blk_get_integrity(mddev->gendisk))
- return 0; /* shouldn't register, or already is */
- rdev_for_each(rdev, mddev) {
- /* skip spares and non-functional disks */
- if (test_bit(Faulty, &rdev->flags))
- continue;
- if (rdev->raid_disk < 0)
- continue;
- if (!reference) {
- /* Use the first rdev as the reference */
- reference = rdev;
- continue;
- }
- /* does this rdev's profile match the reference profile? */
- if (blk_integrity_compare(reference->bdev->bd_disk,
- rdev->bdev->bd_disk) < 0)
- return -EINVAL;
- }
- if (!reference || !bdev_get_integrity(reference->bdev))
- return 0;
- /*
- * All component devices are integrity capable and have matching
- * profiles, register the common profile for the md device.
- */
- blk_integrity_register(mddev->gendisk,
- bdev_get_integrity(reference->bdev));
+ if (mddev_is_dm(mddev) || !blk_get_integrity(mddev->gendisk))
+ return 0; /* shouldn't register */
pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
@@ -2459,32 +2443,6 @@ int md_integrity_register(struct mddev *mddev)
}
EXPORT_SYMBOL(md_integrity_register);
-/*
- * Attempt to add an rdev, but only if it is consistent with the current
- * integrity profile
- */
-int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
-{
- struct blk_integrity *bi_mddev;
-
- if (mddev_is_dm(mddev))
- return 0;
-
- bi_mddev = blk_get_integrity(mddev->gendisk);
-
- if (!bi_mddev) /* nothing to do */
- return 0;
-
- if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
- pr_err("%s: incompatible integrity profile for %pg\n",
- mdname(mddev), rdev->bdev);
- return -ENXIO;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(md_integrity_add_rdev);
-
static bool rdev_read_only(struct md_rdev *rdev)
{
return bdev_read_only(rdev->bdev) ||
@@ -4867,30 +4825,81 @@ out_unlock:
static struct md_sysfs_entry md_metadata =
__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
-static ssize_t
-action_show(struct mddev *mddev, char *page)
+enum sync_action md_sync_action(struct mddev *mddev)
{
- char *type = "idle";
unsigned long recovery = mddev->recovery;
+
+ /*
+ * frozen has the highest priority, means running sync_thread will be
+ * stopped immediately, and no new sync_thread can start.
+ */
if (test_bit(MD_RECOVERY_FROZEN, &recovery))
- type = "frozen";
- else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
- (md_is_rdwr(mddev) && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
- if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
- type = "reshape";
- else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
- if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
- type = "resync";
- else if (test_bit(MD_RECOVERY_CHECK, &recovery))
- type = "check";
- else
- type = "repair";
- } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
- type = "recover";
- else if (mddev->reshape_position != MaxSector)
- type = "reshape";
+ return ACTION_FROZEN;
+
+ /*
+ * read-only array can't register sync_thread, and it can only
+ * add/remove spares.
+ */
+ if (!md_is_rdwr(mddev))
+ return ACTION_IDLE;
+
+ /*
+ * idle means no sync_thread is running, and no new sync_thread is
+ * requested.
+ */
+ if (!test_bit(MD_RECOVERY_RUNNING, &recovery) &&
+ !test_bit(MD_RECOVERY_NEEDED, &recovery))
+ return ACTION_IDLE;
+
+ if (test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
+ mddev->reshape_position != MaxSector)
+ return ACTION_RESHAPE;
+
+ if (test_bit(MD_RECOVERY_RECOVER, &recovery))
+ return ACTION_RECOVER;
+
+ if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
+ /*
+ * MD_RECOVERY_CHECK must be paired with
+ * MD_RECOVERY_REQUESTED.
+ */
+ if (test_bit(MD_RECOVERY_CHECK, &recovery))
+ return ACTION_CHECK;
+ if (test_bit(MD_RECOVERY_REQUESTED, &recovery))
+ return ACTION_REPAIR;
+ return ACTION_RESYNC;
}
- return sprintf(page, "%s\n", type);
+
+ /*
+ * MD_RECOVERY_NEEDED or MD_RECOVERY_RUNNING is set, however, no
+ * sync_action is specified.
+ */
+ return ACTION_IDLE;
+}
+
+enum sync_action md_sync_action_by_name(const char *page)
+{
+ enum sync_action action;
+
+ for (action = 0; action < NR_SYNC_ACTIONS; ++action) {
+ if (cmd_match(page, action_name[action]))
+ return action;
+ }
+
+ return NR_SYNC_ACTIONS;
+}
+
+const char *md_sync_action_name(enum sync_action action)
+{
+ return action_name[action];
+}
+
+static ssize_t
+action_show(struct mddev *mddev, char *page)
+{
+ enum sync_action action = md_sync_action(mddev);
+
+ return sprintf(page, "%s\n", md_sync_action_name(action));
}
/**
@@ -4899,15 +4908,10 @@ action_show(struct mddev *mddev, char *page)
* @locked: if set, reconfig_mutex will still be held after this function
* return; if not set, reconfig_mutex will be released after this
* function return.
- * @check_seq: if set, only wait for curent running sync_thread to stop, noted
- * that new sync_thread can still start.
*/
-static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
+static void stop_sync_thread(struct mddev *mddev, bool locked)
{
- int sync_seq;
-
- if (check_seq)
- sync_seq = atomic_read(&mddev->sync_seq);
+ int sync_seq = atomic_read(&mddev->sync_seq);
if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
if (!locked)
@@ -4928,7 +4932,8 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
wait_event(resync_wait,
!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
- (check_seq && sync_seq != atomic_read(&mddev->sync_seq)));
+ (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery) &&
+ sync_seq != atomic_read(&mddev->sync_seq)));
if (locked)
mddev_lock_nointr(mddev);
@@ -4939,7 +4944,7 @@ void md_idle_sync_thread(struct mddev *mddev)
lockdep_assert_held(&mddev->reconfig_mutex);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- stop_sync_thread(mddev, true, true);
+ stop_sync_thread(mddev, true);
}
EXPORT_SYMBOL_GPL(md_idle_sync_thread);
@@ -4948,7 +4953,7 @@ void md_frozen_sync_thread(struct mddev *mddev)
lockdep_assert_held(&mddev->reconfig_mutex);
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- stop_sync_thread(mddev, true, false);
+ stop_sync_thread(mddev, true);
}
EXPORT_SYMBOL_GPL(md_frozen_sync_thread);
@@ -4963,100 +4968,127 @@ void md_unfrozen_sync_thread(struct mddev *mddev)
}
EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread);
-static void idle_sync_thread(struct mddev *mddev)
+static int mddev_start_reshape(struct mddev *mddev)
{
- mutex_lock(&mddev->sync_mutex);
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-
- if (mddev_lock(mddev)) {
- mutex_unlock(&mddev->sync_mutex);
- return;
- }
-
- stop_sync_thread(mddev, false, true);
- mutex_unlock(&mddev->sync_mutex);
-}
+ int ret;
-static void frozen_sync_thread(struct mddev *mddev)
-{
- mutex_lock(&mddev->sync_mutex);
- set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ if (mddev->pers->start_reshape == NULL)
+ return -EINVAL;
- if (mddev_lock(mddev)) {
- mutex_unlock(&mddev->sync_mutex);
- return;
+ if (mddev->reshape_position == MaxSector ||
+ mddev->pers->check_reshape == NULL ||
+ mddev->pers->check_reshape(mddev)) {
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ ret = mddev->pers->start_reshape(mddev);
+ if (ret)
+ return ret;
+ } else {
+ /*
+ * If reshape is still in progress, and md_check_recovery() can
+ * continue to reshape, don't restart reshape because data can
+ * be corrupted for raid456.
+ */
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
}
- stop_sync_thread(mddev, false, false);
- mutex_unlock(&mddev->sync_mutex);
+ sysfs_notify_dirent_safe(mddev->sysfs_degraded);
+ return 0;
}
static ssize_t
action_store(struct mddev *mddev, const char *page, size_t len)
{
+ int ret;
+ enum sync_action action;
+
if (!mddev->pers || !mddev->pers->sync_request)
return -EINVAL;
+retry:
+ if (work_busy(&mddev->sync_work))
+ flush_work(&mddev->sync_work);
- if (cmd_match(page, "idle"))
- idle_sync_thread(mddev);
- else if (cmd_match(page, "frozen"))
- frozen_sync_thread(mddev);
- else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
- return -EBUSY;
- else if (cmd_match(page, "resync"))
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- else if (cmd_match(page, "recover")) {
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
- } else if (cmd_match(page, "reshape")) {
- int err;
- if (mddev->pers->start_reshape == NULL)
- return -EINVAL;
- err = mddev_lock(mddev);
- if (!err) {
- if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
- err = -EBUSY;
- } else if (mddev->reshape_position == MaxSector ||
- mddev->pers->check_reshape == NULL ||
- mddev->pers->check_reshape(mddev)) {
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- err = mddev->pers->start_reshape(mddev);
- } else {
- /*
- * If reshape is still in progress, and
- * md_check_recovery() can continue to reshape,
- * don't restart reshape because data can be
- * corrupted for raid456.
- */
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- }
- mddev_unlock(mddev);
+ ret = mddev_lock(mddev);
+ if (ret)
+ return ret;
+
+ if (work_busy(&mddev->sync_work)) {
+ mddev_unlock(mddev);
+ goto retry;
+ }
+
+ action = md_sync_action_by_name(page);
+
+ /* TODO: mdadm rely on "idle" to start sync_thread. */
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+ switch (action) {
+ case ACTION_FROZEN:
+ md_frozen_sync_thread(mddev);
+ ret = len;
+ goto out;
+ case ACTION_IDLE:
+ md_idle_sync_thread(mddev);
+ break;
+ case ACTION_RESHAPE:
+ case ACTION_RECOVER:
+ case ACTION_CHECK:
+ case ACTION_REPAIR:
+ case ACTION_RESYNC:
+ ret = -EBUSY;
+ goto out;
+ default:
+ ret = -EINVAL;
+ goto out;
}
- if (err)
- return err;
- sysfs_notify_dirent_safe(mddev->sysfs_degraded);
} else {
- if (cmd_match(page, "check"))
+ switch (action) {
+ case ACTION_FROZEN:
+ set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ ret = len;
+ goto out;
+ case ACTION_RESHAPE:
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ ret = mddev_start_reshape(mddev);
+ if (ret)
+ goto out;
+ break;
+ case ACTION_RECOVER:
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
+ break;
+ case ACTION_CHECK:
set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
- else if (!cmd_match(page, "repair"))
- return -EINVAL;
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
- set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+ fallthrough;
+ case ACTION_REPAIR:
+ set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
+ set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+ fallthrough;
+ case ACTION_RESYNC:
+ case ACTION_IDLE:
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
}
+
if (mddev->ro == MD_AUTO_READ) {
/* A write to sync_action is enough to justify
* canceling read-auto mode
*/
- flush_work(&mddev->sync_work);
mddev->ro = MD_RDWR;
md_wakeup_thread(mddev->sync_thread);
}
+
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
sysfs_notify_dirent_safe(mddev->sysfs_action);
- return len;
+ ret = len;
+
+out:
+ mddev_unlock(mddev);
+ return ret;
}
static struct md_sysfs_entry md_scan_mode =
@@ -5065,7 +5097,8 @@ __ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
static ssize_t
last_sync_action_show(struct mddev *mddev, char *page)
{
- return sprintf(page, "%s\n", mddev->last_sync_action);
+ return sprintf(page, "%s\n",
+ md_sync_action_name(mddev->last_sync_action));
}
static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
@@ -5755,14 +5788,20 @@ static const struct kobj_type md_ktype = {
int mdp_major = 0;
/* stack the limit for all rdevs into lim */
-void mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim)
+int mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim,
+ unsigned int flags)
{
struct md_rdev *rdev;
rdev_for_each(rdev, mddev) {
queue_limits_stack_bdev(lim, rdev->bdev, rdev->data_offset,
mddev->gendisk->disk_name);
+ if ((flags & MDDEV_STACK_INTEGRITY) &&
+ !queue_limits_stack_integrity_bdev(lim, rdev->bdev))
+ return -EINVAL;
}
+
+ return 0;
}
EXPORT_SYMBOL_GPL(mddev_stack_rdev_limits);
@@ -5777,6 +5816,14 @@ int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev)
lim = queue_limits_start_update(mddev->gendisk->queue);
queue_limits_stack_bdev(&lim, rdev->bdev, rdev->data_offset,
mddev->gendisk->disk_name);
+
+ if (!queue_limits_stack_integrity_bdev(&lim, rdev->bdev)) {
+ pr_err("%s: incompatible integrity profile for %pg\n",
+ mdname(mddev), rdev->bdev);
+ queue_limits_cancel_update(mddev->gendisk->queue);
+ return -ENXIO;
+ }
+
return queue_limits_commit_update(mddev->gendisk->queue, &lim);
}
EXPORT_SYMBOL_GPL(mddev_stack_new_rdev);
@@ -5806,6 +5853,14 @@ static void mddev_delayed_delete(struct work_struct *ws)
kobject_put(&mddev->kobj);
}
+void md_init_stacking_limits(struct queue_limits *lim)
+{
+ blk_set_stacking_limits(lim);
+ lim->features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA |
+ BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT;
+}
+EXPORT_SYMBOL_GPL(md_init_stacking_limits);
+
struct mddev *md_alloc(dev_t dev, char *name)
{
/*
@@ -5823,7 +5878,7 @@ struct mddev *md_alloc(dev_t dev, char *name)
int partitioned;
int shift;
int unit;
- int error ;
+ int error;
/*
* Wait for any previous instance of this device to be completely
@@ -5881,7 +5936,6 @@ struct mddev *md_alloc(dev_t dev, char *name)
disk->fops = &md_fops;
disk->private_data = mddev;
- blk_queue_write_cache(disk->queue, true, true);
disk->events |= DISK_EVENT_MEDIA_CHANGE;
mddev->gendisk = disk;
error = add_disk(disk);
@@ -6185,28 +6239,6 @@ int md_run(struct mddev *mddev)
}
}
- if (!mddev_is_dm(mddev)) {
- struct request_queue *q = mddev->gendisk->queue;
- bool nonrot = true;
-
- rdev_for_each(rdev, mddev) {
- if (rdev->raid_disk >= 0 && !bdev_nonrot(rdev->bdev)) {
- nonrot = false;
- break;
- }
- }
- if (mddev->degraded)
- nonrot = false;
- if (nonrot)
- blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
- else
- blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
- blk_queue_flag_set(QUEUE_FLAG_IO_STAT, q);
-
- /* Set the NOWAIT flags if all underlying devices support it */
- if (nowait)
- blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q);
- }
if (pers->sync_request) {
if (mddev->kobj.sd &&
sysfs_create_group(&mddev->kobj, &md_redundancy_group))
@@ -6437,7 +6469,7 @@ void md_stop_writes(struct mddev *mddev)
{
mddev_lock_nointr(mddev);
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- stop_sync_thread(mddev, true, false);
+ stop_sync_thread(mddev, true);
__md_stop_writes(mddev);
mddev_unlock(mddev);
}
@@ -6505,7 +6537,7 @@ static int md_set_readonly(struct mddev *mddev)
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
}
- stop_sync_thread(mddev, false, false);
+ stop_sync_thread(mddev, false);
wait_event(mddev->sb_wait,
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
mddev_lock_nointr(mddev);
@@ -6551,7 +6583,7 @@ static int do_md_stop(struct mddev *mddev, int mode)
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
}
- stop_sync_thread(mddev, true, false);
+ stop_sync_thread(mddev, true);
if (mddev->sysfs_active ||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
@@ -7166,15 +7198,6 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev)
if (!mddev->thread)
md_update_sb(mddev, 1);
/*
- * If the new disk does not support REQ_NOWAIT,
- * disable on the whole MD.
- */
- if (!bdev_nowait(rdev->bdev)) {
- pr_info("%s: Disabling nowait because %pg does not support nowait\n",
- mdname(mddev), rdev->bdev);
- blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, mddev->gendisk->queue);
- }
- /*
* Kick recovery, maybe this spare has to be added to the
* array immediately.
*/
@@ -7742,12 +7765,6 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
return get_bitmap_file(mddev, argp);
}
- if (cmd == HOT_REMOVE_DISK)
- /* need to ensure recovery thread has run */
- wait_event_interruptible_timeout(mddev->sb_wait,
- !test_bit(MD_RECOVERY_NEEDED,
- &mddev->recovery),
- msecs_to_jiffies(5000));
if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
/* Need to flush page cache, and ensure no-one else opens
* and writes
@@ -8520,7 +8537,7 @@ int unregister_md_personality(struct md_personality *p)
}
EXPORT_SYMBOL(unregister_md_personality);
-int register_md_cluster_operations(struct md_cluster_operations *ops,
+int register_md_cluster_operations(const struct md_cluster_operations *ops,
struct module *module)
{
int ret = 0;
@@ -8641,12 +8658,12 @@ EXPORT_SYMBOL(md_done_sync);
* A return value of 'false' means that the write wasn't recorded
* and cannot proceed as the array is being suspend.
*/
-bool md_write_start(struct mddev *mddev, struct bio *bi)
+void md_write_start(struct mddev *mddev, struct bio *bi)
{
int did_change = 0;
if (bio_data_dir(bi) != WRITE)
- return true;
+ return;
BUG_ON(mddev->ro == MD_RDONLY);
if (mddev->ro == MD_AUTO_READ) {
@@ -8679,15 +8696,9 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
if (did_change)
sysfs_notify_dirent_safe(mddev->sysfs_state);
if (!mddev->has_superblocks)
- return true;
+ return;
wait_event(mddev->sb_wait,
- !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
- is_md_suspended(mddev));
- if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
- percpu_ref_put(&mddev->writes_pending);
- return false;
- }
- return true;
+ !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
}
EXPORT_SYMBOL(md_write_start);
@@ -8835,6 +8846,77 @@ void md_allow_write(struct mddev *mddev)
}
EXPORT_SYMBOL_GPL(md_allow_write);
+static sector_t md_sync_max_sectors(struct mddev *mddev,
+ enum sync_action action)
+{
+ switch (action) {
+ case ACTION_RESYNC:
+ case ACTION_CHECK:
+ case ACTION_REPAIR:
+ atomic64_set(&mddev->resync_mismatches, 0);
+ fallthrough;
+ case ACTION_RESHAPE:
+ return mddev->resync_max_sectors;
+ case ACTION_RECOVER:
+ return mddev->dev_sectors;
+ default:
+ return 0;
+ }
+}
+
+static sector_t md_sync_position(struct mddev *mddev, enum sync_action action)
+{
+ sector_t start = 0;
+ struct md_rdev *rdev;
+
+ switch (action) {
+ case ACTION_CHECK:
+ case ACTION_REPAIR:
+ return mddev->resync_min;
+ case ACTION_RESYNC:
+ if (!mddev->bitmap)
+ return mddev->recovery_cp;
+ return 0;
+ case ACTION_RESHAPE:
+ /*
+ * If the original node aborts reshaping then we continue the
+ * reshaping, so set again to avoid restart reshape from the
+ * first beginning
+ */
+ if (mddev_is_clustered(mddev) &&
+ mddev->reshape_position != MaxSector)
+ return mddev->reshape_position;
+ return 0;
+ case ACTION_RECOVER:
+ start = MaxSector;
+ rcu_read_lock();
+ rdev_for_each_rcu(rdev, mddev)
+ if (rdev->raid_disk >= 0 &&
+ !test_bit(Journal, &rdev->flags) &&
+ !test_bit(Faulty, &rdev->flags) &&
+ !test_bit(In_sync, &rdev->flags) &&
+ rdev->recovery_offset < start)
+ start = rdev->recovery_offset;
+ rcu_read_unlock();
+
+ /* If there is a bitmap, we need to make sure all
+ * writes that started before we added a spare
+ * complete before we start doing a recovery.
+ * Otherwise the write might complete and (via
+ * bitmap_endwrite) set a bit in the bitmap after the
+ * recovery has checked that bit and skipped that
+ * region.
+ */
+ if (mddev->bitmap) {
+ mddev->pers->quiesce(mddev, 1);
+ mddev->pers->quiesce(mddev, 0);
+ }
+ return start;
+ default:
+ return MaxSector;
+ }
+}
+
#define SYNC_MARKS 10
#define SYNC_MARK_STEP (3*HZ)
#define UPDATE_FREQUENCY (5*60*HZ)
@@ -8851,7 +8933,8 @@ void md_do_sync(struct md_thread *thread)
sector_t last_check;
int skipped = 0;
struct md_rdev *rdev;
- char *desc, *action = NULL;
+ enum sync_action action;
+ const char *desc;
struct blk_plug plug;
int ret;
@@ -8882,21 +8965,9 @@ void md_do_sync(struct md_thread *thread)
goto skip;
}
- if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
- if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
- desc = "data-check";
- action = "check";
- } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
- desc = "requested-resync";
- action = "repair";
- } else
- desc = "resync";
- } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
- desc = "reshape";
- else
- desc = "recovery";
-
- mddev->last_sync_action = action ?: desc;
+ action = md_sync_action(mddev);
+ desc = md_sync_action_name(action);
+ mddev->last_sync_action = action;
/*
* Before starting a resync we must have set curr_resync to
@@ -8964,56 +9035,8 @@ void md_do_sync(struct md_thread *thread)
spin_unlock(&all_mddevs_lock);
} while (mddev->curr_resync < MD_RESYNC_DELAYED);
- j = 0;
- if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
- /* resync follows the size requested by the personality,
- * which defaults to physical size, but can be virtual size
- */
- max_sectors = mddev->resync_max_sectors;
- atomic64_set(&mddev->resync_mismatches, 0);
- /* we don't use the checkpoint if there's a bitmap */
- if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
- j = mddev->resync_min;
- else if (!mddev->bitmap)
- j = mddev->recovery_cp;
-
- } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
- max_sectors = mddev->resync_max_sectors;
- /*
- * If the original node aborts reshaping then we continue the
- * reshaping, so set j again to avoid restart reshape from the
- * first beginning
- */
- if (mddev_is_clustered(mddev) &&
- mddev->reshape_position != MaxSector)
- j = mddev->reshape_position;
- } else {
- /* recovery follows the physical size of devices */
- max_sectors = mddev->dev_sectors;
- j = MaxSector;
- rcu_read_lock();
- rdev_for_each_rcu(rdev, mddev)
- if (rdev->raid_disk >= 0 &&
- !test_bit(Journal, &rdev->flags) &&
- !test_bit(Faulty, &rdev->flags) &&
- !test_bit(In_sync, &rdev->flags) &&
- rdev->recovery_offset < j)
- j = rdev->recovery_offset;
- rcu_read_unlock();
-
- /* If there is a bitmap, we need to make sure all
- * writes that started before we added a spare
- * complete before we start doing a recovery.
- * Otherwise the write might complete and (via
- * bitmap_endwrite) set a bit in the bitmap after the
- * recovery has checked that bit and skipped that
- * region.
- */
- if (mddev->bitmap) {
- mddev->pers->quiesce(mddev, 1);
- mddev->pers->quiesce(mddev, 0);
- }
- }
+ max_sectors = md_sync_max_sectors(mddev, action);
+ j = md_sync_position(mddev, action);
pr_info("md: %s of RAID array %s\n", desc, mdname(mddev));
pr_debug("md: minimum _guaranteed_ speed: %d KB/sec/disk.\n", speed_min(mddev));
@@ -9095,7 +9118,8 @@ void md_do_sync(struct md_thread *thread)
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
break;
- sectors = mddev->pers->sync_request(mddev, j, &skipped);
+ sectors = mddev->pers->sync_request(mddev, j, max_sectors,
+ &skipped);
if (sectors == 0) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
break;
@@ -9185,7 +9209,7 @@ void md_do_sync(struct md_thread *thread)
mddev->curr_resync_completed = mddev->curr_resync;
sysfs_notify_dirent_safe(mddev->sysfs_completed);
}
- mddev->pers->sync_request(mddev, max_sectors, &skipped);
+ mddev->pers->sync_request(mddev, max_sectors, max_sectors, &skipped);
if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
mddev->curr_resync > MD_RESYNC_ACTIVE) {
diff --git a/drivers/md/md.h b/drivers/md/md.h
index ca085ecad504..a0d6827dced9 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -34,6 +34,61 @@
*/
#define MD_FAILFAST (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT)
+/* Status of sync thread. */
+enum sync_action {
+ /*
+ * Represent by MD_RECOVERY_SYNC, start when:
+ * 1) after assemble, sync data from first rdev to other copies, this
+ * must be done first before other sync actions and will only execute
+ * once;
+ * 2) resize the array(notice that this is not reshape), sync data for
+ * the new range;
+ */
+ ACTION_RESYNC,
+ /*
+ * Represent by MD_RECOVERY_RECOVER, start when:
+ * 1) for new replacement, sync data based on the replace rdev or
+ * available copies from other rdev;
+ * 2) for new member disk while the array is degraded, sync data from
+ * other rdev;
+ * 3) reassemble after power failure or re-add a hot removed rdev, sync
+ * data from first rdev to other copies based on bitmap;
+ */
+ ACTION_RECOVER,
+ /*
+ * Represent by MD_RECOVERY_SYNC | MD_RECOVERY_REQUESTED |
+ * MD_RECOVERY_CHECK, start when user echo "check" to sysfs api
+ * sync_action, used to check if data copies from differenct rdev are
+ * the same. The number of mismatch sectors will be exported to user
+ * by sysfs api mismatch_cnt;
+ */
+ ACTION_CHECK,
+ /*
+ * Represent by MD_RECOVERY_SYNC | MD_RECOVERY_REQUESTED, start when
+ * user echo "repair" to sysfs api sync_action, usually paired with
+ * ACTION_CHECK, used to force syncing data once user found that there
+ * are inconsistent data,
+ */
+ ACTION_REPAIR,
+ /*
+ * Represent by MD_RECOVERY_RESHAPE, start when new member disk is added
+ * to the conf, notice that this is different from spares or
+ * replacement;
+ */
+ ACTION_RESHAPE,
+ /*
+ * Represent by MD_RECOVERY_FROZEN, can be set by sysfs api sync_action
+ * or internal usage like setting the array read-only, will forbid above
+ * actions.
+ */
+ ACTION_FROZEN,
+ /*
+ * All above actions don't match.
+ */
+ ACTION_IDLE,
+ NR_SYNC_ACTIONS,
+};
+
/*
* The struct embedded in rdev is used to serialize IO.
*/
@@ -371,13 +426,12 @@ struct mddev {
struct md_thread __rcu *thread; /* management thread */
struct md_thread __rcu *sync_thread; /* doing resync or reconstruct */
- /* 'last_sync_action' is initialized to "none". It is set when a
- * sync operation (i.e "data-check", "requested-resync", "resync",
- * "recovery", or "reshape") is started. It holds this value even
+ /*
+ * Set when a sync operation is started. It holds this value even
* when the sync thread is "frozen" (interrupted) or "idle" (stopped
- * or finished). It is overwritten when a new sync operation is begun.
+ * or finished). It is overwritten when a new sync operation is begun.
*/
- char *last_sync_action;
+ enum sync_action last_sync_action;
sector_t curr_resync; /* last block scheduled */
/* As resync requests can complete out of order, we cannot easily track
* how much resync has been completed. So we occasionally pause until
@@ -540,8 +594,6 @@ struct mddev {
*/
struct list_head deleting;
- /* Used to synchronize idle and frozen for action_store() */
- struct mutex sync_mutex;
/* The sequence number for sync thread */
atomic_t sync_seq;
@@ -551,22 +603,46 @@ struct mddev {
};
enum recovery_flags {
+ /* flags for sync thread running status */
+
+ /*
+ * set when one of sync action is set and new sync thread need to be
+ * registered, or just add/remove spares from conf.
+ */
+ MD_RECOVERY_NEEDED,
+ /* sync thread is running, or about to be started */
+ MD_RECOVERY_RUNNING,
+ /* sync thread needs to be aborted for some reason */
+ MD_RECOVERY_INTR,
+ /* sync thread is done and is waiting to be unregistered */
+ MD_RECOVERY_DONE,
+ /* running sync thread must abort immediately, and not restart */
+ MD_RECOVERY_FROZEN,
+ /* waiting for pers->start() to finish */
+ MD_RECOVERY_WAIT,
+ /* interrupted because io-error */
+ MD_RECOVERY_ERROR,
+
+ /* flags determines sync action, see details in enum sync_action */
+
+ /* if just this flag is set, action is resync. */
+ MD_RECOVERY_SYNC,
+ /*
+ * paired with MD_RECOVERY_SYNC, if MD_RECOVERY_CHECK is not set,
+ * action is repair, means user requested resync.
+ */
+ MD_RECOVERY_REQUESTED,
/*
- * If neither SYNC or RESHAPE are set, then it is a recovery.
+ * paired with MD_RECOVERY_SYNC and MD_RECOVERY_REQUESTED, action is
+ * check.
*/
- MD_RECOVERY_RUNNING, /* a thread is running, or about to be started */
- MD_RECOVERY_SYNC, /* actually doing a resync, not a recovery */
- MD_RECOVERY_RECOVER, /* doing recovery, or need to try it. */
- MD_RECOVERY_INTR, /* resync needs to be aborted for some reason */
- MD_RECOVERY_DONE, /* thread is done and is waiting to be reaped */
- MD_RECOVERY_NEEDED, /* we might need to start a resync/recover */
- MD_RECOVERY_REQUESTED, /* user-space has requested a sync (used with SYNC) */
- MD_RECOVERY_CHECK, /* user-space request for check-only, no repair */
- MD_RECOVERY_RESHAPE, /* A reshape is happening */
- MD_RECOVERY_FROZEN, /* User request to abort, and not restart, any action */
- MD_RECOVERY_ERROR, /* sync-action interrupted because io-error */
- MD_RECOVERY_WAIT, /* waiting for pers->start() to finish */
- MD_RESYNCING_REMOTE, /* remote node is running resync thread */
+ MD_RECOVERY_CHECK,
+ /* recovery, or need to try it */
+ MD_RECOVERY_RECOVER,
+ /* reshape */
+ MD_RECOVERY_RESHAPE,
+ /* remote node is running resync thread */
+ MD_RESYNCING_REMOTE,
};
enum md_ro_state {
@@ -653,7 +729,8 @@ struct md_personality
int (*hot_add_disk) (struct mddev *mddev, struct md_rdev *rdev);
int (*hot_remove_disk) (struct mddev *mddev, struct md_rdev *rdev);
int (*spare_active) (struct mddev *mddev);
- sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr, int *skipped);
+ sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr,
+ sector_t max_sector, int *skipped);
int (*resize) (struct mddev *mddev, sector_t sectors);
sector_t (*size) (struct mddev *mddev, sector_t sectors, int raid_disks);
int (*check_reshape) (struct mddev *mddev);
@@ -772,7 +849,7 @@ static inline void safe_put_page(struct page *p)
extern int register_md_personality(struct md_personality *p);
extern int unregister_md_personality(struct md_personality *p);
-extern int register_md_cluster_operations(struct md_cluster_operations *ops,
+extern int register_md_cluster_operations(const struct md_cluster_operations *ops,
struct module *module);
extern int unregister_md_cluster_operations(void);
extern int md_setup_cluster(struct mddev *mddev, int nodes);
@@ -785,7 +862,10 @@ extern void md_unregister_thread(struct mddev *mddev, struct md_thread __rcu **t
extern void md_wakeup_thread(struct md_thread __rcu *thread);
extern void md_check_recovery(struct mddev *mddev);
extern void md_reap_sync_thread(struct mddev *mddev);
-extern bool md_write_start(struct mddev *mddev, struct bio *bi);
+extern enum sync_action md_sync_action(struct mddev *mddev);
+extern enum sync_action md_sync_action_by_name(const char *page);
+extern const char *md_sync_action_name(enum sync_action action);
+extern void md_write_start(struct mddev *mddev, struct bio *bi);
extern void md_write_inc(struct mddev *mddev, struct bio *bi);
extern void md_write_end(struct mddev *mddev);
extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
@@ -809,11 +889,11 @@ extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev);
extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors);
extern int md_check_no_bitmap(struct mddev *mddev);
extern int md_integrity_register(struct mddev *mddev);
-extern int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev);
extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
extern int mddev_init(struct mddev *mddev);
extern void mddev_destroy(struct mddev *mddev);
+void md_init_stacking_limits(struct queue_limits *lim);
struct mddev *md_alloc(dev_t dev, char *name);
void mddev_put(struct mddev *mddev);
extern int md_run(struct mddev *mddev);
@@ -852,7 +932,7 @@ static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
}
}
-extern struct md_cluster_operations *md_cluster_ops;
+extern const struct md_cluster_operations *md_cluster_ops;
static inline int mddev_is_clustered(struct mddev *mddev)
{
return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
@@ -908,7 +988,9 @@ void md_autostart_arrays(int part);
int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info);
int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info);
int do_md_run(struct mddev *mddev);
-void mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim);
+#define MDDEV_STACK_INTEGRITY (1u << 0)
+int mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim,
+ unsigned int flags);
int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev);
void mddev_update_io_opt(struct mddev *mddev, unsigned int nr_stripes);
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index c5d4aeb68404..32d587524778 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -365,30 +365,30 @@ static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks
return array_sectors;
}
-static void free_conf(struct mddev *mddev, struct r0conf *conf)
-{
- kfree(conf->strip_zone);
- kfree(conf->devlist);
- kfree(conf);
-}
-
static void raid0_free(struct mddev *mddev, void *priv)
{
struct r0conf *conf = priv;
- free_conf(mddev, conf);
+ kfree(conf->strip_zone);
+ kfree(conf->devlist);
+ kfree(conf);
}
static int raid0_set_limits(struct mddev *mddev)
{
struct queue_limits lim;
+ int err;
- blk_set_stacking_limits(&lim);
+ md_init_stacking_limits(&lim);
lim.max_hw_sectors = mddev->chunk_sectors;
lim.max_write_zeroes_sectors = mddev->chunk_sectors;
lim.io_min = mddev->chunk_sectors << 9;
lim.io_opt = lim.io_min * mddev->raid_disks;
- mddev_stack_rdev_limits(mddev, &lim);
+ err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
+ if (err) {
+ queue_limits_cancel_update(mddev->gendisk->queue);
+ return err;
+ }
return queue_limits_set(mddev->gendisk->queue, &lim);
}
@@ -415,7 +415,7 @@ static int raid0_run(struct mddev *mddev)
if (!mddev_is_dm(mddev)) {
ret = raid0_set_limits(mddev);
if (ret)
- goto out_free_conf;
+ return ret;
}
/* calculate array device size */
@@ -427,13 +427,7 @@ static int raid0_run(struct mddev *mddev)
dump_zones(mddev);
- ret = md_integrity_register(mddev);
- if (ret)
- goto out_free_conf;
- return 0;
-out_free_conf:
- free_conf(mddev, conf);
- return ret;
+ return md_integrity_register(mddev);
}
/*
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 7b8a71ca66dd..04a0c2ca1732 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1687,8 +1687,7 @@ static bool raid1_make_request(struct mddev *mddev, struct bio *bio)
if (bio_data_dir(bio) == READ)
raid1_read_request(mddev, bio, sectors, NULL);
else {
- if (!md_write_start(mddev,bio))
- return false;
+ md_write_start(mddev,bio);
raid1_write_request(mddev, bio, sectors);
}
return true;
@@ -1907,9 +1906,6 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
if (mddev->recovery_disabled == conf->recovery_disabled)
return -EBUSY;
- if (md_integrity_add_rdev(rdev, mddev))
- return -ENXIO;
-
if (rdev->raid_disk >= 0)
first = last = rdev->raid_disk;
@@ -2757,12 +2753,12 @@ static struct r1bio *raid1_alloc_init_r1buf(struct r1conf *conf)
*/
static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
- int *skipped)
+ sector_t max_sector, int *skipped)
{
struct r1conf *conf = mddev->private;
struct r1bio *r1_bio;
struct bio *bio;
- sector_t max_sector, nr_sectors;
+ sector_t nr_sectors;
int disk = -1;
int i;
int wonly = -1;
@@ -2778,7 +2774,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
if (init_resync(conf))
return 0;
- max_sector = mddev->dev_sectors;
if (sector_nr >= max_sector) {
/* If we aborted, we need to abort the
* sync on the 'current' bitmap chunk (there will
@@ -3197,14 +3192,18 @@ static struct r1conf *setup_conf(struct mddev *mddev)
static int raid1_set_limits(struct mddev *mddev)
{
struct queue_limits lim;
+ int err;
- blk_set_stacking_limits(&lim);
+ md_init_stacking_limits(&lim);
lim.max_write_zeroes_sectors = 0;
- mddev_stack_rdev_limits(mddev, &lim);
+ err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
+ if (err) {
+ queue_limits_cancel_update(mddev->gendisk->queue);
+ return err;
+ }
return queue_limits_set(mddev->gendisk->queue, &lim);
}
-static void raid1_free(struct mddev *mddev, void *priv);
static int raid1_run(struct mddev *mddev)
{
struct r1conf *conf;
@@ -3238,7 +3237,7 @@ static int raid1_run(struct mddev *mddev)
if (!mddev_is_dm(mddev)) {
ret = raid1_set_limits(mddev);
if (ret)
- goto abort;
+ return ret;
}
mddev->degraded = 0;
@@ -3252,8 +3251,7 @@ static int raid1_run(struct mddev *mddev)
*/
if (conf->raid_disks - mddev->degraded < 1) {
md_unregister_thread(mddev, &conf->thread);
- ret = -EINVAL;
- goto abort;
+ return -EINVAL;
}
if (conf->raid_disks - mddev->degraded == 1)
@@ -3277,14 +3275,8 @@ static int raid1_run(struct mddev *mddev)
md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
ret = md_integrity_register(mddev);
- if (ret) {
+ if (ret)
md_unregister_thread(mddev, &mddev->thread);
- goto abort;
- }
- return 0;
-
-abort:
- raid1_free(mddev, conf);
return ret;
}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index a4556d2e46bf..2a9c4ee982e0 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1836,8 +1836,7 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
&& md_flush_request(mddev, bio))
return true;
- if (!md_write_start(mddev, bio))
- return false;
+ md_write_start(mddev, bio);
if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
if (!raid10_handle_discard(mddev, bio))
@@ -2083,9 +2082,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
if (rdev->saved_raid_disk < 0 && !_enough(conf, 1, -1))
return -EINVAL;
- if (md_integrity_add_rdev(rdev, mddev))
- return -ENXIO;
-
if (rdev->raid_disk >= 0)
first = last = rdev->raid_disk;
@@ -3140,12 +3136,12 @@ static void raid10_set_cluster_sync_high(struct r10conf *conf)
*/
static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
- int *skipped)
+ sector_t max_sector, int *skipped)
{
struct r10conf *conf = mddev->private;
struct r10bio *r10_bio;
struct bio *biolist = NULL, *bio;
- sector_t max_sector, nr_sectors;
+ sector_t nr_sectors;
int i;
int max_sync;
sector_t sync_blocks;
@@ -3175,10 +3171,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
return 0;
skipped:
- max_sector = mddev->dev_sectors;
- if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
- test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
- max_sector = mddev->resync_max_sectors;
if (sector_nr >= max_sector) {
conf->cluster_sync_low = 0;
conf->cluster_sync_high = 0;
@@ -3980,12 +3972,17 @@ static int raid10_set_queue_limits(struct mddev *mddev)
{
struct r10conf *conf = mddev->private;
struct queue_limits lim;
+ int err;
- blk_set_stacking_limits(&lim);
+ md_init_stacking_limits(&lim);
lim.max_write_zeroes_sectors = 0;
lim.io_min = mddev->chunk_sectors << 9;
lim.io_opt = lim.io_min * raid10_nr_stripes(conf);
- mddev_stack_rdev_limits(mddev, &lim);
+ err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
+ if (err) {
+ queue_limits_cancel_update(mddev->gendisk->queue);
+ return err;
+ }
return queue_limits_set(mddev->gendisk->queue, &lim);
}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2bd1ce9b3922..c14cf2410365 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -155,7 +155,7 @@ static int raid6_idx_to_slot(int idx, struct stripe_head *sh,
return slot;
}
-static void print_raid5_conf (struct r5conf *conf);
+static void print_raid5_conf(struct r5conf *conf);
static int stripe_operations_active(struct stripe_head *sh)
{
@@ -5899,6 +5899,39 @@ out:
return ret;
}
+enum reshape_loc {
+ LOC_NO_RESHAPE,
+ LOC_AHEAD_OF_RESHAPE,
+ LOC_INSIDE_RESHAPE,
+ LOC_BEHIND_RESHAPE,
+};
+
+static enum reshape_loc get_reshape_loc(struct mddev *mddev,
+ struct r5conf *conf, sector_t logical_sector)
+{
+ sector_t reshape_progress, reshape_safe;
+ /*
+ * Spinlock is needed as reshape_progress may be
+ * 64bit on a 32bit platform, and so it might be
+ * possible to see a half-updated value
+ * Of course reshape_progress could change after
+ * the lock is dropped, so once we get a reference
+ * to the stripe that we think it is, we will have
+ * to check again.
+ */
+ spin_lock_irq(&conf->device_lock);
+ reshape_progress = conf->reshape_progress;
+ reshape_safe = conf->reshape_safe;
+ spin_unlock_irq(&conf->device_lock);
+ if (reshape_progress == MaxSector)
+ return LOC_NO_RESHAPE;
+ if (ahead_of_reshape(mddev, logical_sector, reshape_progress))
+ return LOC_AHEAD_OF_RESHAPE;
+ if (ahead_of_reshape(mddev, logical_sector, reshape_safe))
+ return LOC_INSIDE_RESHAPE;
+ return LOC_BEHIND_RESHAPE;
+}
+
static enum stripe_result make_stripe_request(struct mddev *mddev,
struct r5conf *conf, struct stripe_request_ctx *ctx,
sector_t logical_sector, struct bio *bi)
@@ -5913,28 +5946,14 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
seq = read_seqcount_begin(&conf->gen_lock);
if (unlikely(conf->reshape_progress != MaxSector)) {
- /*
- * Spinlock is needed as reshape_progress may be
- * 64bit on a 32bit platform, and so it might be
- * possible to see a half-updated value
- * Of course reshape_progress could change after
- * the lock is dropped, so once we get a reference
- * to the stripe that we think it is, we will have
- * to check again.
- */
- spin_lock_irq(&conf->device_lock);
- if (ahead_of_reshape(mddev, logical_sector,
- conf->reshape_progress)) {
- previous = 1;
- } else {
- if (ahead_of_reshape(mddev, logical_sector,
- conf->reshape_safe)) {
- spin_unlock_irq(&conf->device_lock);
- ret = STRIPE_SCHEDULE_AND_RETRY;
- goto out;
- }
+ enum reshape_loc loc = get_reshape_loc(mddev, conf,
+ logical_sector);
+ if (loc == LOC_INSIDE_RESHAPE) {
+ ret = STRIPE_SCHEDULE_AND_RETRY;
+ goto out;
}
- spin_unlock_irq(&conf->device_lock);
+ if (loc == LOC_AHEAD_OF_RESHAPE)
+ previous = 1;
}
new_sector = raid5_compute_sector(conf, logical_sector, previous,
@@ -6078,8 +6097,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
ctx.do_flush = bi->bi_opf & REQ_PREFLUSH;
}
- if (!md_write_start(mddev, bi))
- return false;
+ md_write_start(mddev, bi);
/*
* If array is degraded, better not do chunk aligned read because
* later we might have to read it again in order to reconstruct
@@ -6113,8 +6131,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
/* Bail out if conflicts with reshape and REQ_NOWAIT is set */
if ((bi->bi_opf & REQ_NOWAIT) &&
(conf->reshape_progress != MaxSector) &&
- !ahead_of_reshape(mddev, logical_sector, conf->reshape_progress) &&
- ahead_of_reshape(mddev, logical_sector, conf->reshape_safe)) {
+ get_reshape_loc(mddev, conf, logical_sector) == LOC_INSIDE_RESHAPE) {
bio_wouldblock_error(bi);
if (rw == WRITE)
md_write_end(mddev);
@@ -6255,7 +6272,9 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
safepos = conf->reshape_safe;
sector_div(safepos, data_disks);
if (mddev->reshape_backwards) {
- BUG_ON(writepos < reshape_sectors);
+ if (WARN_ON(writepos < reshape_sectors))
+ return MaxSector;
+
writepos -= reshape_sectors;
readpos += reshape_sectors;
safepos += reshape_sectors;
@@ -6273,14 +6292,18 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
* to set 'stripe_addr' which is where we will write to.
*/
if (mddev->reshape_backwards) {
- BUG_ON(conf->reshape_progress == 0);
+ if (WARN_ON(conf->reshape_progress == 0))
+ return MaxSector;
+
stripe_addr = writepos;
- BUG_ON((mddev->dev_sectors &
- ~((sector_t)reshape_sectors - 1))
- - reshape_sectors - stripe_addr
- != sector_nr);
+ if (WARN_ON((mddev->dev_sectors &
+ ~((sector_t)reshape_sectors - 1)) -
+ reshape_sectors - stripe_addr != sector_nr))
+ return MaxSector;
} else {
- BUG_ON(writepos != sector_nr + reshape_sectors);
+ if (WARN_ON(writepos != sector_nr + reshape_sectors))
+ return MaxSector;
+
stripe_addr = sector_nr;
}
@@ -6458,11 +6481,10 @@ ret:
}
static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_nr,
- int *skipped)
+ sector_t max_sector, int *skipped)
{
struct r5conf *conf = mddev->private;
struct stripe_head *sh;
- sector_t max_sector = mddev->dev_sectors;
sector_t sync_blocks;
int still_degraded = 0;
int i;
@@ -7082,12 +7104,14 @@ raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len)
err = -ENODEV;
else if (new != conf->skip_copy) {
struct request_queue *q = mddev->gendisk->queue;
+ struct queue_limits lim = queue_limits_start_update(q);
conf->skip_copy = new;
if (new)
- blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
+ lim.features |= BLK_FEAT_STABLE_WRITES;
else
- blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q);
+ lim.features &= ~BLK_FEAT_STABLE_WRITES;
+ err = queue_limits_commit_update(q, &lim);
}
mddev_unlock_and_resume(mddev);
return err ?: len;
@@ -7562,11 +7586,11 @@ static struct r5conf *setup_conf(struct mddev *mddev)
if (test_bit(Replacement, &rdev->flags)) {
if (disk->replacement)
goto abort;
- RCU_INIT_POINTER(disk->replacement, rdev);
+ disk->replacement = rdev;
} else {
if (disk->rdev)
goto abort;
- RCU_INIT_POINTER(disk->rdev, rdev);
+ disk->rdev = rdev;
}
if (test_bit(In_sync, &rdev->flags)) {
@@ -7702,13 +7726,13 @@ static int raid5_set_limits(struct mddev *mddev)
*/
stripe = roundup_pow_of_two(data_disks * (mddev->chunk_sectors << 9));
- blk_set_stacking_limits(&lim);
+ md_init_stacking_limits(&lim);
lim.io_min = mddev->chunk_sectors << 9;
lim.io_opt = lim.io_min * (conf->raid_disks - conf->max_degraded);
- lim.raid_partial_stripes_expensive = 1;
+ lim.features |= BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE;
lim.discard_granularity = stripe;
lim.max_write_zeroes_sectors = 0;
- mddev_stack_rdev_limits(mddev, &lim);
+ mddev_stack_rdev_limits(mddev, &lim, 0);
rdev_for_each(rdev, mddev)
queue_limits_stack_bdev(&lim, rdev->bdev, rdev->new_data_offset,
mddev->gendisk->disk_name);
@@ -8048,7 +8072,7 @@ static void raid5_status(struct seq_file *seq, struct mddev *mddev)
seq_printf (seq, "]");
}
-static void print_raid5_conf (struct r5conf *conf)
+static void print_raid5_conf(struct r5conf *conf)
{
struct md_rdev *rdev;
int i;
@@ -8062,15 +8086,13 @@ static void print_raid5_conf (struct r5conf *conf)
conf->raid_disks,
conf->raid_disks - conf->mddev->degraded);
- rcu_read_lock();
for (i = 0; i < conf->raid_disks; i++) {
- rdev = rcu_dereference(conf->disks[i].rdev);
+ rdev = conf->disks[i].rdev;
if (rdev)
pr_debug(" disk %d, o:%d, dev:%pg\n",
i, !test_bit(Faulty, &rdev->flags),
rdev->bdev);
}
- rcu_read_unlock();
}
static int raid5_spare_active(struct mddev *mddev)
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 367509b5b646..2c9963248fcb 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -2466,8 +2466,7 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
struct mmc_blk_data *md;
int devidx, ret;
char cap_str[10];
- bool cache_enabled = false;
- bool fua_enabled = false;
+ unsigned int features = 0;
devidx = ida_alloc_max(&mmc_blk_ida, max_devices - 1, GFP_KERNEL);
if (devidx < 0) {
@@ -2499,7 +2498,24 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
*/
md->read_only = mmc_blk_readonly(card);
- md->disk = mmc_init_queue(&md->queue, card);
+ if (mmc_host_cmd23(card->host)) {
+ if ((mmc_card_mmc(card) &&
+ card->csd.mmca_vsn >= CSD_SPEC_VER_3) ||
+ (mmc_card_sd(card) &&
+ card->scr.cmds & SD_SCR_CMD23_SUPPORT))
+ md->flags |= MMC_BLK_CMD23;
+ }
+
+ if (md->flags & MMC_BLK_CMD23 &&
+ ((card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) ||
+ card->ext_csd.rel_sectors)) {
+ md->flags |= MMC_BLK_REL_WR;
+ features |= (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA);
+ } else if (mmc_cache_enabled(card->host)) {
+ features |= BLK_FEAT_WRITE_CACHE;
+ }
+
+ md->disk = mmc_init_queue(&md->queue, card, features);
if (IS_ERR(md->disk)) {
ret = PTR_ERR(md->disk);
goto err_kfree;
@@ -2539,26 +2555,6 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
set_capacity(md->disk, size);
- if (mmc_host_cmd23(card->host)) {
- if ((mmc_card_mmc(card) &&
- card->csd.mmca_vsn >= CSD_SPEC_VER_3) ||
- (mmc_card_sd(card) &&
- card->scr.cmds & SD_SCR_CMD23_SUPPORT))
- md->flags |= MMC_BLK_CMD23;
- }
-
- if (md->flags & MMC_BLK_CMD23 &&
- ((card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) ||
- card->ext_csd.rel_sectors)) {
- md->flags |= MMC_BLK_REL_WR;
- fua_enabled = true;
- cache_enabled = true;
- }
- if (mmc_cache_enabled(card->host))
- cache_enabled = true;
-
- blk_queue_write_cache(md->queue.queue, cache_enabled, fua_enabled);
-
string_get_size((u64)size, 512, STRING_UNITS_2,
cap_str, sizeof(cap_str));
pr_info("%s: %s %s %s%s\n",
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 241cdc2b2a2a..d0b3ca8a11f0 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -344,10 +344,12 @@ static const struct blk_mq_ops mmc_mq_ops = {
};
static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq,
- struct mmc_card *card)
+ struct mmc_card *card, unsigned int features)
{
struct mmc_host *host = card->host;
- struct queue_limits lim = { };
+ struct queue_limits lim = {
+ .features = features,
+ };
struct gendisk *disk;
if (mmc_can_erase(card))
@@ -376,18 +378,16 @@ static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq,
lim.max_segments = host->max_segs;
}
+ if (mmc_host_is_spi(host) && host->use_spi_crc)
+ lim.features |= BLK_FEAT_STABLE_WRITES;
+
disk = blk_mq_alloc_disk(&mq->tag_set, &lim, mq);
if (IS_ERR(disk))
return disk;
mq->queue = disk->queue;
- if (mmc_host_is_spi(host) && host->use_spi_crc)
- blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue);
blk_queue_rq_timeout(mq->queue, 60 * HZ);
- blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue);
- blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue);
-
dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue));
INIT_WORK(&mq->recovery_work, mmc_mq_recovery_handler);
@@ -413,10 +413,12 @@ static inline bool mmc_merge_capable(struct mmc_host *host)
* mmc_init_queue - initialise a queue structure.
* @mq: mmc queue
* @card: mmc card to attach this queue
+ * @features: block layer features (BLK_FEAT_*)
*
* Initialise a MMC card request queue.
*/
-struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card)
+struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
+ unsigned int features)
{
struct mmc_host *host = card->host;
struct gendisk *disk;
@@ -460,7 +462,7 @@ struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card)
return ERR_PTR(ret);
- disk = mmc_alloc_disk(mq, card);
+ disk = mmc_alloc_disk(mq, card, features);
if (IS_ERR(disk))
blk_mq_free_tag_set(&mq->tag_set);
return disk;
diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h
index 9ade3bcbb714..1498840a4ea0 100644
--- a/drivers/mmc/core/queue.h
+++ b/drivers/mmc/core/queue.h
@@ -94,7 +94,8 @@ struct mmc_queue {
struct work_struct complete_work;
};
-struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card);
+struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
+ unsigned int features);
extern void mmc_cleanup_queue(struct mmc_queue *);
extern void mmc_queue_suspend(struct mmc_queue *);
extern void mmc_queue_resume(struct mmc_queue *);
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 3caa0717d46c..47ead84407cd 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -336,6 +336,8 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
lim.logical_block_size = tr->blksize;
if (tr->discard)
lim.max_hw_discard_sectors = UINT_MAX;
+ if (tr->flush)
+ lim.features |= BLK_FEAT_WRITE_CACHE;
/* Create gendisk */
gd = blk_mq_alloc_disk(new->tag_set, &lim, new);
@@ -372,13 +374,6 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
/* Create the request queue */
spin_lock_init(&new->queue_lock);
INIT_LIST_HEAD(&new->rq_list);
-
- if (tr->flush)
- blk_queue_write_cache(new->rq, true, false);
-
- blk_queue_flag_set(QUEUE_FLAG_NONROT, new->rq);
- blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq);
-
gd->queue = new->rq;
if (new->readonly)
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 1e5aedaf8c7b..e79c06d65bb7 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1501,9 +1501,15 @@ static int btt_blk_init(struct btt *btt)
.logical_block_size = btt->sector_size,
.max_hw_sectors = UINT_MAX,
.max_integrity_segments = 1,
+ .features = BLK_FEAT_SYNCHRONOUS,
};
int rc;
+ if (btt_meta_size(btt) && IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) {
+ lim.integrity.tuple_size = btt_meta_size(btt);
+ lim.integrity.tag_size = btt_meta_size(btt);
+ }
+
btt->btt_disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
if (IS_ERR(btt->btt_disk))
return PTR_ERR(btt->btt_disk);
@@ -1513,17 +1519,6 @@ static int btt_blk_init(struct btt *btt)
btt->btt_disk->fops = &btt_fops;
btt->btt_disk->private_data = btt;
- blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_disk->queue);
- blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, btt->btt_disk->queue);
-
- if (btt_meta_size(btt) && IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) {
- struct blk_integrity bi = {
- .tuple_size = btt_meta_size(btt),
- .tag_size = btt_meta_size(btt),
- };
- blk_integrity_register(btt->btt_disk, &bi);
- }
-
set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9);
rc = device_add_disk(&btt->nd_btt->dev, btt->btt_disk, NULL);
if (rc)
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 598fe2e89bda..1dd74c969d5a 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -455,6 +455,8 @@ static int pmem_attach_disk(struct device *dev,
.logical_block_size = pmem_sector_size(ndns),
.physical_block_size = PAGE_SIZE,
.max_hw_sectors = UINT_MAX,
+ .features = BLK_FEAT_WRITE_CACHE |
+ BLK_FEAT_SYNCHRONOUS,
};
int nid = dev_to_node(dev), fua;
struct resource *res = &nsio->res;
@@ -463,7 +465,6 @@ static int pmem_attach_disk(struct device *dev,
struct dax_device *dax_dev;
struct nd_pfn_sb *pfn_sb;
struct pmem_device *pmem;
- struct request_queue *q;
struct gendisk *disk;
void *addr;
int rc;
@@ -495,6 +496,10 @@ static int pmem_attach_disk(struct device *dev,
dev_warn(dev, "unable to guarantee persistence of writes\n");
fua = 0;
}
+ if (fua)
+ lim.features |= BLK_FEAT_FUA;
+ if (is_nd_pfn(dev))
+ lim.features |= BLK_FEAT_DAX;
if (!devm_request_mem_region(dev, res->start, resource_size(res),
dev_name(&ndns->dev))) {
@@ -505,7 +510,6 @@ static int pmem_attach_disk(struct device *dev,
disk = blk_alloc_disk(&lim, nid);
if (IS_ERR(disk))
return PTR_ERR(disk);
- q = disk->queue;
pmem->disk = disk;
pmem->pgmap.owner = pmem;
@@ -543,12 +547,6 @@ static int pmem_attach_disk(struct device *dev,
}
pmem->virt_addr = addr;
- blk_queue_write_cache(q, true, fua);
- blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
- blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, q);
- if (pmem->pfn_flags & PFN_MAP)
- blk_queue_flag_set(QUEUE_FLAG_DAX, q);
-
disk->fops = &pmem_fops;
disk->private_data = pmem;
nvdimm_namespace_disk_name(ndns, disk->disk_name);
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index b309c8be720f..a3caef75aa0a 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
config NVME_CORE
tristate
- select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY
config BLK_DEV_NVME
tristate "NVM Express block device"
diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index 0cfa39361d3b..b1387dc459a3 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -1388,7 +1388,7 @@ static void devm_apple_nvme_mempool_destroy(void *data)
mempool_destroy(data);
}
-static int apple_nvme_probe(struct platform_device *pdev)
+static struct apple_nvme *apple_nvme_alloc(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct apple_nvme *anv;
@@ -1396,7 +1396,7 @@ static int apple_nvme_probe(struct platform_device *pdev)
anv = devm_kzalloc(dev, sizeof(*anv), GFP_KERNEL);
if (!anv)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
anv->dev = get_device(dev);
anv->adminq.is_adminq = true;
@@ -1516,10 +1516,30 @@ static int apple_nvme_probe(struct platform_device *pdev)
goto put_dev;
}
+ return anv;
+put_dev:
+ put_device(anv->dev);
+ return ERR_PTR(ret);
+}
+
+static int apple_nvme_probe(struct platform_device *pdev)
+{
+ struct apple_nvme *anv;
+ int ret;
+
+ anv = apple_nvme_alloc(pdev);
+ if (IS_ERR(anv))
+ return PTR_ERR(anv);
+
+ ret = nvme_add_ctrl(&anv->ctrl);
+ if (ret)
+ goto out_put_ctrl;
+
anv->ctrl.admin_q = blk_mq_alloc_queue(&anv->admin_tagset, NULL, NULL);
if (IS_ERR(anv->ctrl.admin_q)) {
ret = -ENOMEM;
- goto put_dev;
+ anv->ctrl.admin_q = NULL;
+ goto out_uninit_ctrl;
}
nvme_reset_ctrl(&anv->ctrl);
@@ -1527,8 +1547,10 @@ static int apple_nvme_probe(struct platform_device *pdev)
return 0;
-put_dev:
- put_device(anv->dev);
+out_uninit_ctrl:
+ nvme_uninit_ctrl(&anv->ctrl);
+out_put_ctrl:
+ nvme_put_ctrl(&anv->ctrl);
return ret;
}
diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c
index 6f2ebb5fcdb0..2b9e6cfaf2a8 100644
--- a/drivers/nvme/host/constants.c
+++ b/drivers/nvme/host/constants.c
@@ -173,7 +173,7 @@ static const char * const nvme_statuses[] = {
const char *nvme_get_error_status_str(u16 status)
{
- status &= 0x7ff;
+ status &= NVME_SCT_SC_MASK;
if (status < ARRAY_SIZE(nvme_statuses) && nvme_statuses[status])
return nvme_statuses[status];
return "Unknown";
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 782090ce0bc1..19917253ba7b 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -110,7 +110,7 @@ struct workqueue_struct *nvme_delete_wq;
EXPORT_SYMBOL_GPL(nvme_delete_wq);
static LIST_HEAD(nvme_subsystems);
-static DEFINE_MUTEX(nvme_subsystems_lock);
+DEFINE_MUTEX(nvme_subsystems_lock);
static DEFINE_IDA(nvme_instance_ida);
static dev_t nvme_ctrl_base_chr_devt;
@@ -261,7 +261,7 @@ void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
static blk_status_t nvme_error_status(u16 status)
{
- switch (status & 0x7ff) {
+ switch (status & NVME_SCT_SC_MASK) {
case NVME_SC_SUCCESS:
return BLK_STS_OK;
case NVME_SC_CAP_EXCEEDED:
@@ -307,7 +307,7 @@ static void nvme_retry_req(struct request *req)
u16 crd;
/* The mask and shift result must be <= 3 */
- crd = (nvme_req(req)->status & NVME_SC_CRD) >> 11;
+ crd = (nvme_req(req)->status & NVME_STATUS_CRD) >> 11;
if (crd)
delay = nvme_req(req)->ctrl->crdt[crd - 1] * 100;
@@ -329,10 +329,10 @@ static void nvme_log_error(struct request *req)
nvme_sect_to_lba(ns->head, blk_rq_pos(req)),
blk_rq_bytes(req) >> ns->head->lba_shift,
nvme_get_error_status_str(nr->status),
- nr->status >> 8 & 7, /* Status Code Type */
- nr->status & 0xff, /* Status Code */
- nr->status & NVME_SC_MORE ? "MORE " : "",
- nr->status & NVME_SC_DNR ? "DNR " : "");
+ NVME_SCT(nr->status), /* Status Code Type */
+ nr->status & NVME_SC_MASK, /* Status Code */
+ nr->status & NVME_STATUS_MORE ? "MORE " : "",
+ nr->status & NVME_STATUS_DNR ? "DNR " : "");
return;
}
@@ -341,10 +341,10 @@ static void nvme_log_error(struct request *req)
nvme_get_admin_opcode_str(nr->cmd->common.opcode),
nr->cmd->common.opcode,
nvme_get_error_status_str(nr->status),
- nr->status >> 8 & 7, /* Status Code Type */
- nr->status & 0xff, /* Status Code */
- nr->status & NVME_SC_MORE ? "MORE " : "",
- nr->status & NVME_SC_DNR ? "DNR " : "");
+ NVME_SCT(nr->status), /* Status Code Type */
+ nr->status & NVME_SC_MASK, /* Status Code */
+ nr->status & NVME_STATUS_MORE ? "MORE " : "",
+ nr->status & NVME_STATUS_DNR ? "DNR " : "");
}
static void nvme_log_err_passthru(struct request *req)
@@ -359,10 +359,10 @@ static void nvme_log_err_passthru(struct request *req)
nvme_get_admin_opcode_str(nr->cmd->common.opcode),
nr->cmd->common.opcode,
nvme_get_error_status_str(nr->status),
- nr->status >> 8 & 7, /* Status Code Type */
- nr->status & 0xff, /* Status Code */
- nr->status & NVME_SC_MORE ? "MORE " : "",
- nr->status & NVME_SC_DNR ? "DNR " : "",
+ NVME_SCT(nr->status), /* Status Code Type */
+ nr->status & NVME_SC_MASK, /* Status Code */
+ nr->status & NVME_STATUS_MORE ? "MORE " : "",
+ nr->status & NVME_STATUS_DNR ? "DNR " : "",
nr->cmd->common.cdw10,
nr->cmd->common.cdw11,
nr->cmd->common.cdw12,
@@ -384,11 +384,11 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
return COMPLETE;
if (blk_noretry_request(req) ||
- (nvme_req(req)->status & NVME_SC_DNR) ||
+ (nvme_req(req)->status & NVME_STATUS_DNR) ||
nvme_req(req)->retries >= nvme_max_retries)
return COMPLETE;
- if ((nvme_req(req)->status & 0x7ff) == NVME_SC_AUTH_REQUIRED)
+ if ((nvme_req(req)->status & NVME_SCT_SC_MASK) == NVME_SC_AUTH_REQUIRED)
return AUTHENTICATE;
if (req->cmd_flags & REQ_NVME_MPATH) {
@@ -927,6 +927,36 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
return BLK_STS_OK;
}
+/*
+ * NVMe does not support a dedicated command to issue an atomic write. A write
+ * which does adhere to the device atomic limits will silently be executed
+ * non-atomically. The request issuer should ensure that the write is within
+ * the queue atomic writes limits, but just validate this in case it is not.
+ */
+static bool nvme_valid_atomic_write(struct request *req)
+{
+ struct request_queue *q = req->q;
+ u32 boundary_bytes = queue_atomic_write_boundary_bytes(q);
+
+ if (blk_rq_bytes(req) > queue_atomic_write_unit_max_bytes(q))
+ return false;
+
+ if (boundary_bytes) {
+ u64 mask = boundary_bytes - 1, imask = ~mask;
+ u64 start = blk_rq_pos(req) << SECTOR_SHIFT;
+ u64 end = start + blk_rq_bytes(req) - 1;
+
+ /* If greater then must be crossing a boundary */
+ if (blk_rq_bytes(req) > boundary_bytes)
+ return false;
+
+ if ((start & imask) != (end & imask))
+ return false;
+ }
+
+ return true;
+}
+
static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
struct request *req, struct nvme_command *cmnd,
enum nvme_opcode op)
@@ -942,6 +972,9 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
if (req->cmd_flags & REQ_RAHEAD)
dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
+ if (req->cmd_flags & REQ_ATOMIC && !nvme_valid_atomic_write(req))
+ return BLK_STS_INVAL;
+
cmnd->rw.opcode = op;
cmnd->rw.flags = 0;
cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id);
@@ -1224,7 +1257,7 @@ EXPORT_SYMBOL_NS_GPL(nvme_passthru_end, NVME_TARGET_PASSTHRU);
/*
* Recommended frequency for KATO commands per NVMe 1.4 section 7.12.1:
- *
+ *
* The host should send Keep Alive commands at half of the Keep Alive Timeout
* accounting for transport roundtrip times [..].
*/
@@ -1724,11 +1757,12 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return 0;
}
-static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head)
+static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head,
+ struct queue_limits *lim)
{
- struct blk_integrity integrity = { };
+ struct blk_integrity *bi = &lim->integrity;
- blk_integrity_unregister(disk);
+ memset(bi, 0, sizeof(*bi));
if (!head->ms)
return true;
@@ -1745,17 +1779,16 @@ static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head)
case NVME_NS_DPS_PI_TYPE3:
switch (head->guard_type) {
case NVME_NVM_NS_16B_GUARD:
- integrity.profile = &t10_pi_type3_crc;
- integrity.tag_size = sizeof(u16) + sizeof(u32);
- integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
+ bi->csum_type = BLK_INTEGRITY_CSUM_CRC;
+ bi->tag_size = sizeof(u16) + sizeof(u32);
+ bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
break;
case NVME_NVM_NS_64B_GUARD:
- integrity.profile = &ext_pi_type3_crc64;
- integrity.tag_size = sizeof(u16) + 6;
- integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
+ bi->csum_type = BLK_INTEGRITY_CSUM_CRC64;
+ bi->tag_size = sizeof(u16) + 6;
+ bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
break;
default:
- integrity.profile = NULL;
break;
}
break;
@@ -1763,28 +1796,27 @@ static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head)
case NVME_NS_DPS_PI_TYPE2:
switch (head->guard_type) {
case NVME_NVM_NS_16B_GUARD:
- integrity.profile = &t10_pi_type1_crc;
- integrity.tag_size = sizeof(u16);
- integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
+ bi->csum_type = BLK_INTEGRITY_CSUM_CRC;
+ bi->tag_size = sizeof(u16);
+ bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE |
+ BLK_INTEGRITY_REF_TAG;
break;
case NVME_NVM_NS_64B_GUARD:
- integrity.profile = &ext_pi_type1_crc64;
- integrity.tag_size = sizeof(u16);
- integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
+ bi->csum_type = BLK_INTEGRITY_CSUM_CRC64;
+ bi->tag_size = sizeof(u16);
+ bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE |
+ BLK_INTEGRITY_REF_TAG;
break;
default:
- integrity.profile = NULL;
break;
}
break;
default:
- integrity.profile = NULL;
break;
}
- integrity.tuple_size = head->ms;
- integrity.pi_offset = head->pi_offset;
- blk_integrity_register(disk, &integrity);
+ bi->tuple_size = head->ms;
+ bi->pi_offset = head->pi_offset;
return true;
}
@@ -1922,6 +1954,23 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl,
}
}
+
+static void nvme_update_atomic_write_disk_info(struct nvme_ns *ns,
+ struct nvme_id_ns *id, struct queue_limits *lim,
+ u32 bs, u32 atomic_bs)
+{
+ unsigned int boundary = 0;
+
+ if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) {
+ if (le16_to_cpu(id->nabspf))
+ boundary = (le16_to_cpu(id->nabspf) + 1) * bs;
+ }
+ lim->atomic_write_hw_max = atomic_bs;
+ lim->atomic_write_hw_boundary = boundary;
+ lim->atomic_write_hw_unit_min = bs;
+ lim->atomic_write_hw_unit_max = rounddown_pow_of_two(atomic_bs);
+}
+
static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl)
{
return ctrl->max_hw_sectors / (NVME_CTRL_PAGE_SIZE >> SECTOR_SHIFT) + 1;
@@ -1968,13 +2017,16 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
else
atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
+
+ nvme_update_atomic_write_disk_info(ns, id, lim, bs, atomic_bs);
}
if (id->nsfeat & NVME_NS_FEAT_IO_OPT) {
/* NPWG = Namespace Preferred Write Granularity */
phys_bs = bs * (1 + le16_to_cpu(id->npwg));
/* NOWS = Namespace Optimal Write Size */
- io_opt = bs * (1 + le16_to_cpu(id->nows));
+ if (id->nows)
+ io_opt = bs * (1 + le16_to_cpu(id->nows));
}
/*
@@ -2058,7 +2110,6 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns,
static int nvme_update_ns_info_block(struct nvme_ns *ns,
struct nvme_ns_info *info)
{
- bool vwc = ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT;
struct queue_limits lim;
struct nvme_id_ns_nvm *nvm = NULL;
struct nvme_zone_info zi = {};
@@ -2107,11 +2158,11 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
ns->head->ids.csi == NVME_CSI_ZNS)
nvme_update_zone_info(ns, &lim, &zi);
- ret = queue_limits_commit_update(ns->disk->queue, &lim);
- if (ret) {
- blk_mq_unfreeze_queue(ns->disk->queue);
- goto out;
- }
+
+ if (ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT)
+ lim.features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA;
+ else
+ lim.features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA);
/*
* Register a metadata profile for PI, or the plain non-integrity NVMe
@@ -2119,9 +2170,15 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
* I/O to namespaces with metadata except when the namespace supports
* PI, as it can strip/insert in that case.
*/
- if (!nvme_init_integrity(ns->disk, ns->head))
+ if (!nvme_init_integrity(ns->disk, ns->head, &lim))
capacity = 0;
+ ret = queue_limits_commit_update(ns->disk->queue, &lim);
+ if (ret) {
+ blk_mq_unfreeze_queue(ns->disk->queue);
+ goto out;
+ }
+
set_capacity_and_notify(ns->disk, capacity);
/*
@@ -2133,7 +2190,6 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3)))
ns->head->features |= NVME_NS_DEAC;
set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
- blk_queue_write_cache(ns->disk->queue, vwc, vwc);
set_bit(NVME_NS_READY, &ns->flags);
blk_mq_unfreeze_queue(ns->disk->queue);
@@ -2193,14 +2249,6 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
struct queue_limits lim;
blk_mq_freeze_queue(ns->head->disk->queue);
- if (unsupported)
- ns->head->disk->flags |= GENHD_FL_HIDDEN;
- else
- nvme_init_integrity(ns->head->disk, ns->head);
- set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk));
- set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
- nvme_mpath_revalidate_paths(ns);
-
/*
* queue_limits mixes values that are the hardware limitations
* for bio splitting with what is the device configuration.
@@ -2223,13 +2271,48 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
lim.io_opt = ns_lim->io_opt;
queue_limits_stack_bdev(&lim, ns->disk->part0, 0,
ns->head->disk->disk_name);
+ if (unsupported)
+ ns->head->disk->flags |= GENHD_FL_HIDDEN;
+ else
+ nvme_init_integrity(ns->head->disk, ns->head, &lim);
ret = queue_limits_commit_update(ns->head->disk->queue, &lim);
+
+ set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk));
+ set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
+ nvme_mpath_revalidate_paths(ns);
+
blk_mq_unfreeze_queue(ns->head->disk->queue);
}
return ret;
}
+int nvme_ns_get_unique_id(struct nvme_ns *ns, u8 id[16],
+ enum blk_unique_id type)
+{
+ struct nvme_ns_ids *ids = &ns->head->ids;
+
+ if (type != BLK_UID_EUI64)
+ return -EINVAL;
+
+ if (memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) {
+ memcpy(id, &ids->nguid, sizeof(ids->nguid));
+ return sizeof(ids->nguid);
+ }
+ if (memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) {
+ memcpy(id, &ids->eui64, sizeof(ids->eui64));
+ return sizeof(ids->eui64);
+ }
+
+ return -EINVAL;
+}
+
+static int nvme_get_unique_id(struct gendisk *disk, u8 id[16],
+ enum blk_unique_id type)
+{
+ return nvme_ns_get_unique_id(disk->private_data, id, type);
+}
+
#ifdef CONFIG_BLK_SED_OPAL
static int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
bool send)
@@ -2285,6 +2368,7 @@ const struct block_device_operations nvme_bdev_ops = {
.open = nvme_open,
.release = nvme_release,
.getgeo = nvme_getgeo,
+ .get_unique_id = nvme_get_unique_id,
.report_zones = nvme_report_zones,
.pr_ops = &nvme_pr_ops,
};
@@ -3721,6 +3805,7 @@ static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns)
static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
{
+ struct queue_limits lim = { };
struct nvme_ns *ns;
struct gendisk *disk;
int node = ctrl->numa_node;
@@ -3729,7 +3814,13 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
if (!ns)
return;
- disk = blk_mq_alloc_disk(ctrl->tagset, NULL, ns);
+ if (ctrl->opts && ctrl->opts->data_digest)
+ lim.features |= BLK_FEAT_STABLE_WRITES;
+ if (ctrl->ops->supports_pci_p2pdma &&
+ ctrl->ops->supports_pci_p2pdma(ctrl))
+ lim.features |= BLK_FEAT_PCI_P2PDMA;
+
+ disk = blk_mq_alloc_disk(ctrl->tagset, &lim, ns);
if (IS_ERR(disk))
goto out_free_ns;
disk->fops = &nvme_bdev_ops;
@@ -3737,15 +3828,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
ns->disk = disk;
ns->queue = disk->queue;
-
- if (ctrl->opts && ctrl->opts->data_digest)
- blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, ns->queue);
-
- blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue);
- if (ctrl->ops->supports_pci_p2pdma &&
- ctrl->ops->supports_pci_p2pdma(ctrl))
- blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue);
-
ns->ctrl = ctrl;
kref_init(&ns->kref);
@@ -3887,7 +3969,7 @@ static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid)
static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_info *info)
{
- int ret = NVME_SC_INVALID_NS | NVME_SC_DNR;
+ int ret = NVME_SC_INVALID_NS | NVME_STATUS_DNR;
if (!nvme_ns_ids_equal(&ns->head->ids, &info->ids)) {
dev_err(ns->ctrl->device,
@@ -3903,7 +3985,7 @@ out:
*
* TODO: we should probably schedule a delayed retry here.
*/
- if (ret > 0 && (ret & NVME_SC_DNR))
+ if (ret > 0 && (ret & NVME_STATUS_DNR))
nvme_ns_remove(ns);
}
@@ -4095,7 +4177,7 @@ static void nvme_scan_work(struct work_struct *work)
* they report) but don't actually support it.
*/
ret = nvme_scan_ns_list(ctrl);
- if (ret > 0 && ret & NVME_SC_DNR)
+ if (ret > 0 && ret & NVME_STATUS_DNR)
nvme_scan_ns_sequential(ctrl);
}
mutex_unlock(&ctrl->scan_lock);
@@ -4489,13 +4571,15 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
return ret;
if (ctrl->ops->flags & NVME_F_FABRICS) {
- ctrl->connect_q = blk_mq_alloc_queue(set, NULL, NULL);
+ struct queue_limits lim = {
+ .features = BLK_FEAT_SKIP_TAGSET_QUIESCE,
+ };
+
+ ctrl->connect_q = blk_mq_alloc_queue(set, &lim, NULL);
if (IS_ERR(ctrl->connect_q)) {
ret = PTR_ERR(ctrl->connect_q);
goto out_free_tag_set;
}
- blk_queue_flag_set(QUEUE_FLAG_SKIP_TAGSET_QUIESCE,
- ctrl->connect_q);
}
ctrl->tagset = set;
@@ -4613,6 +4697,9 @@ static void nvme_free_ctrl(struct device *dev)
* Initialize a NVMe controller structures. This needs to be called during
* earliest initialization so that we have the initialized structured around
* during probing.
+ *
+ * On success, the caller must use the nvme_put_ctrl() to release this when
+ * needed, which also invokes the ops->free_ctrl() callback.
*/
int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
const struct nvme_ctrl_ops *ops, unsigned long quirks)
@@ -4661,6 +4748,12 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
goto out;
ctrl->instance = ret;
+ ret = nvme_auth_init_ctrl(ctrl);
+ if (ret)
+ goto out_release_instance;
+
+ nvme_mpath_init_ctrl(ctrl);
+
device_initialize(&ctrl->ctrl_device);
ctrl->device = &ctrl->ctrl_device;
ctrl->device->devt = MKDEV(MAJOR(nvme_ctrl_base_chr_devt),
@@ -4673,16 +4766,36 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
ctrl->device->groups = nvme_dev_attr_groups;
ctrl->device->release = nvme_free_ctrl;
dev_set_drvdata(ctrl->device, ctrl);
+
+ return ret;
+
+out_release_instance:
+ ida_free(&nvme_instance_ida, ctrl->instance);
+out:
+ if (ctrl->discard_page)
+ __free_page(ctrl->discard_page);
+ cleanup_srcu_struct(&ctrl->srcu);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nvme_init_ctrl);
+
+/*
+ * On success, returns with an elevated controller reference and caller must
+ * use nvme_uninit_ctrl() to properly free resources associated with the ctrl.
+ */
+int nvme_add_ctrl(struct nvme_ctrl *ctrl)
+{
+ int ret;
+
ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance);
if (ret)
- goto out_release_instance;
+ return ret;
- nvme_get_ctrl(ctrl);
cdev_init(&ctrl->cdev, &nvme_dev_fops);
- ctrl->cdev.owner = ops->module;
+ ctrl->cdev.owner = ctrl->ops->module;
ret = cdev_device_add(&ctrl->cdev, ctrl->device);
if (ret)
- goto out_free_name;
+ return ret;
/*
* Initialize latency tolerance controls. The sysfs files won't
@@ -4693,28 +4806,11 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
min(default_ps_max_latency_us, (unsigned long)S32_MAX));
nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device));
- nvme_mpath_init_ctrl(ctrl);
- ret = nvme_auth_init_ctrl(ctrl);
- if (ret)
- goto out_free_cdev;
+ nvme_get_ctrl(ctrl);
return 0;
-out_free_cdev:
- nvme_fault_inject_fini(&ctrl->fault_inject);
- dev_pm_qos_hide_latency_tolerance(ctrl->device);
- cdev_device_del(&ctrl->cdev, ctrl->device);
-out_free_name:
- nvme_put_ctrl(ctrl);
- kfree_const(ctrl->device->kobj.name);
-out_release_instance:
- ida_free(&nvme_instance_ida, ctrl->instance);
-out:
- if (ctrl->discard_page)
- __free_page(ctrl->discard_page);
- cleanup_srcu_struct(&ctrl->srcu);
- return ret;
}
-EXPORT_SYMBOL_GPL(nvme_init_ctrl);
+EXPORT_SYMBOL_GPL(nvme_add_ctrl);
/* let I/O to all namespaces fail in preparation for surprise removal */
void nvme_mark_namespaces_dead(struct nvme_ctrl *ctrl)
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index ceb9c0ed3120..44e342a46f39 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -187,7 +187,7 @@ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
if (unlikely(ret != 0))
dev_err(ctrl->device,
"Property Get error: %d, offset %#x\n",
- ret > 0 ? ret & ~NVME_SC_DNR : ret, off);
+ ret > 0 ? ret & ~NVME_STATUS_DNR : ret, off);
return ret;
}
@@ -233,7 +233,7 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
if (unlikely(ret != 0))
dev_err(ctrl->device,
"Property Get error: %d, offset %#x\n",
- ret > 0 ? ret & ~NVME_SC_DNR : ret, off);
+ ret > 0 ? ret & ~NVME_STATUS_DNR : ret, off);
return ret;
}
EXPORT_SYMBOL_GPL(nvmf_reg_read64);
@@ -275,11 +275,26 @@ int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
if (unlikely(ret))
dev_err(ctrl->device,
"Property Set error: %d, offset %#x\n",
- ret > 0 ? ret & ~NVME_SC_DNR : ret, off);
+ ret > 0 ? ret & ~NVME_STATUS_DNR : ret, off);
return ret;
}
EXPORT_SYMBOL_GPL(nvmf_reg_write32);
+int nvmf_subsystem_reset(struct nvme_ctrl *ctrl)
+{
+ int ret;
+
+ if (!nvme_wait_reset(ctrl))
+ return -EBUSY;
+
+ ret = ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, NVME_SUBSYS_RESET);
+ if (ret)
+ return ret;
+
+ return nvme_try_sched_reset(ctrl);
+}
+EXPORT_SYMBOL_GPL(nvmf_subsystem_reset);
+
/**
* nvmf_log_connect_error() - Error-parsing-diagnostic print out function for
* connect() errors.
@@ -295,7 +310,7 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
int errval, int offset, struct nvme_command *cmd,
struct nvmf_connect_data *data)
{
- int err_sctype = errval & ~NVME_SC_DNR;
+ int err_sctype = errval & ~NVME_STATUS_DNR;
if (errval < 0) {
dev_err(ctrl->device,
@@ -573,7 +588,7 @@ EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
*/
bool nvmf_should_reconnect(struct nvme_ctrl *ctrl, int status)
{
- if (status > 0 && (status & NVME_SC_DNR))
+ if (status > 0 && (status & NVME_STATUS_DNR))
return false;
if (status == -EKEYREJECTED)
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 602135910ae9..21d75dc4a3a0 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -217,6 +217,7 @@ static inline unsigned int nvmf_nr_io_queues(struct nvmf_ctrl_options *opts)
int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val);
int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
+int nvmf_subsystem_reset(struct nvme_ctrl *ctrl);
int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl);
int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid);
int nvmf_register_transport(struct nvmf_transport_ops *ops);
diff --git a/drivers/nvme/host/fault_inject.c b/drivers/nvme/host/fault_inject.c
index 1ba10a5c656d..1d1b6441a339 100644
--- a/drivers/nvme/host/fault_inject.c
+++ b/drivers/nvme/host/fault_inject.c
@@ -75,7 +75,7 @@ void nvme_should_fail(struct request *req)
/* inject status code and DNR bit */
status = fault_inject->status;
if (fault_inject->dont_retry)
- status |= NVME_SC_DNR;
+ status |= NVME_STATUS_DNR;
nvme_req(req)->status = status;
}
}
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index f0b081332749..b81af7919e94 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3132,7 +3132,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
if (ctrl->ctrl.icdoff) {
dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n",
ctrl->ctrl.icdoff);
- ret = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ ret = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto out_stop_keep_alive;
}
@@ -3140,7 +3140,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
if (!nvme_ctrl_sgl_supported(&ctrl->ctrl)) {
dev_err(ctrl->ctrl.device,
"Mandatory sgls are not supported!\n");
- ret = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ ret = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto out_stop_keep_alive;
}
@@ -3325,7 +3325,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
queue_delayed_work(nvme_wq, &ctrl->connect_work, recon_delay);
} else {
if (portptr->port_state == FC_OBJSTATE_ONLINE) {
- if (status > 0 && (status & NVME_SC_DNR))
+ if (status > 0 && (status & NVME_STATUS_DNR))
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: reconnect failure\n",
ctrl->cnum);
@@ -3382,6 +3382,7 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
+ .subsystem_reset = nvmf_subsystem_reset,
.free_ctrl = nvme_fc_free_ctrl,
.submit_async_event = nvme_fc_submit_async_event,
.delete_ctrl = nvme_fc_delete_ctrl,
@@ -3444,12 +3445,11 @@ nvme_fc_existing_controller(struct nvme_fc_rport *rport,
return found;
}
-static struct nvme_ctrl *
-nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
+static struct nvme_fc_ctrl *
+nvme_fc_alloc_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
struct nvme_fc_lport *lport, struct nvme_fc_rport *rport)
{
struct nvme_fc_ctrl *ctrl;
- unsigned long flags;
int ret, idx, ctrl_loss_tmo;
if (!(rport->remoteport.port_role &
@@ -3538,7 +3538,35 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
if (lport->dev)
ctrl->ctrl.numa_node = dev_to_node(lport->dev);
- /* at this point, teardown path changes to ref counting on nvme ctrl */
+ return ctrl;
+
+out_free_queues:
+ kfree(ctrl->queues);
+out_free_ida:
+ put_device(ctrl->dev);
+ ida_free(&nvme_fc_ctrl_cnt, ctrl->cnum);
+out_free_ctrl:
+ kfree(ctrl);
+out_fail:
+ /* exit via here doesn't follow ctlr ref points */
+ return ERR_PTR(ret);
+}
+
+static struct nvme_ctrl *
+nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
+ struct nvme_fc_lport *lport, struct nvme_fc_rport *rport)
+{
+ struct nvme_fc_ctrl *ctrl;
+ unsigned long flags;
+ int ret;
+
+ ctrl = nvme_fc_alloc_ctrl(dev, opts, lport, rport);
+ if (IS_ERR(ctrl))
+ return ERR_CAST(ctrl);
+
+ ret = nvme_add_ctrl(&ctrl->ctrl);
+ if (ret)
+ goto out_put_ctrl;
ret = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set,
&nvme_fc_admin_mq_ops,
@@ -3584,6 +3612,7 @@ fail_ctrl:
/* initiate nvme ctrl ref counting teardown */
nvme_uninit_ctrl(&ctrl->ctrl);
+out_put_ctrl:
/* Remove core ctrl ref. */
nvme_put_ctrl(&ctrl->ctrl);
@@ -3597,20 +3626,8 @@ fail_ctrl:
nvme_fc_rport_get(rport);
return ERR_PTR(-EIO);
-
-out_free_queues:
- kfree(ctrl->queues);
-out_free_ida:
- put_device(ctrl->dev);
- ida_free(&nvme_fc_ctrl_cnt, ctrl->cnum);
-out_free_ctrl:
- kfree(ctrl);
-out_fail:
- /* exit via here doesn't follow ctlr ref points */
- return ERR_PTR(ret);
}
-
struct nvmet_fc_traddr {
u64 nn;
u64 pn;
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index d8b6b4648eaf..91d9eb3c22ef 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -17,6 +17,7 @@ MODULE_PARM_DESC(multipath,
static const char *nvme_iopolicy_names[] = {
[NVME_IOPOLICY_NUMA] = "numa",
[NVME_IOPOLICY_RR] = "round-robin",
+ [NVME_IOPOLICY_QD] = "queue-depth",
};
static int iopolicy = NVME_IOPOLICY_NUMA;
@@ -29,6 +30,8 @@ static int nvme_set_iopolicy(const char *val, const struct kernel_param *kp)
iopolicy = NVME_IOPOLICY_NUMA;
else if (!strncmp(val, "round-robin", 11))
iopolicy = NVME_IOPOLICY_RR;
+ else if (!strncmp(val, "queue-depth", 11))
+ iopolicy = NVME_IOPOLICY_QD;
else
return -EINVAL;
@@ -43,7 +46,7 @@ static int nvme_get_iopolicy(char *buf, const struct kernel_param *kp)
module_param_call(iopolicy, nvme_set_iopolicy, nvme_get_iopolicy,
&iopolicy, 0644);
MODULE_PARM_DESC(iopolicy,
- "Default multipath I/O policy; 'numa' (default) or 'round-robin'");
+ "Default multipath I/O policy; 'numa' (default), 'round-robin' or 'queue-depth'");
void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys)
{
@@ -83,7 +86,7 @@ void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
void nvme_failover_req(struct request *req)
{
struct nvme_ns *ns = req->q->queuedata;
- u16 status = nvme_req(req)->status & 0x7ff;
+ u16 status = nvme_req(req)->status & NVME_SCT_SC_MASK;
unsigned long flags;
struct bio *bio;
@@ -128,6 +131,11 @@ void nvme_mpath_start_request(struct request *rq)
struct nvme_ns *ns = rq->q->queuedata;
struct gendisk *disk = ns->head->disk;
+ if (READ_ONCE(ns->head->subsys->iopolicy) == NVME_IOPOLICY_QD) {
+ atomic_inc(&ns->ctrl->nr_active);
+ nvme_req(rq)->flags |= NVME_MPATH_CNT_ACTIVE;
+ }
+
if (!blk_queue_io_stat(disk->queue) || blk_rq_is_passthrough(rq))
return;
@@ -141,6 +149,9 @@ void nvme_mpath_end_request(struct request *rq)
{
struct nvme_ns *ns = rq->q->queuedata;
+ if (nvme_req(rq)->flags & NVME_MPATH_CNT_ACTIVE)
+ atomic_dec_if_positive(&ns->ctrl->nr_active);
+
if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS))
return;
bdev_end_io_acct(ns->head->disk->part0, req_op(rq),
@@ -291,10 +302,15 @@ static struct nvme_ns *nvme_next_ns(struct nvme_ns_head *head,
return list_first_or_null_rcu(&head->list, struct nvme_ns, siblings);
}
-static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head,
- int node, struct nvme_ns *old)
+static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head)
{
struct nvme_ns *ns, *found = NULL;
+ int node = numa_node_id();
+ struct nvme_ns *old = srcu_dereference(head->current_path[node],
+ &head->srcu);
+
+ if (unlikely(!old))
+ return __nvme_find_path(head, node);
if (list_is_singular(&head->list)) {
if (nvme_path_is_disabled(old))
@@ -334,13 +350,49 @@ out:
return found;
}
+static struct nvme_ns *nvme_queue_depth_path(struct nvme_ns_head *head)
+{
+ struct nvme_ns *best_opt = NULL, *best_nonopt = NULL, *ns;
+ unsigned int min_depth_opt = UINT_MAX, min_depth_nonopt = UINT_MAX;
+ unsigned int depth;
+
+ list_for_each_entry_rcu(ns, &head->list, siblings) {
+ if (nvme_path_is_disabled(ns))
+ continue;
+
+ depth = atomic_read(&ns->ctrl->nr_active);
+
+ switch (ns->ana_state) {
+ case NVME_ANA_OPTIMIZED:
+ if (depth < min_depth_opt) {
+ min_depth_opt = depth;
+ best_opt = ns;
+ }
+ break;
+ case NVME_ANA_NONOPTIMIZED:
+ if (depth < min_depth_nonopt) {
+ min_depth_nonopt = depth;
+ best_nonopt = ns;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (min_depth_opt == 0)
+ return best_opt;
+ }
+
+ return best_opt ? best_opt : best_nonopt;
+}
+
static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
{
return nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE &&
ns->ana_state == NVME_ANA_OPTIMIZED;
}
-inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
+static struct nvme_ns *nvme_numa_path(struct nvme_ns_head *head)
{
int node = numa_node_id();
struct nvme_ns *ns;
@@ -348,14 +400,23 @@ inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
ns = srcu_dereference(head->current_path[node], &head->srcu);
if (unlikely(!ns))
return __nvme_find_path(head, node);
-
- if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_RR)
- return nvme_round_robin_path(head, node, ns);
if (unlikely(!nvme_path_is_optimized(ns)))
return __nvme_find_path(head, node);
return ns;
}
+inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
+{
+ switch (READ_ONCE(head->subsys->iopolicy)) {
+ case NVME_IOPOLICY_QD:
+ return nvme_queue_depth_path(head);
+ case NVME_IOPOLICY_RR:
+ return nvme_round_robin_path(head);
+ default:
+ return nvme_numa_path(head);
+ }
+}
+
static bool nvme_available_path(struct nvme_ns_head *head)
{
struct nvme_ns *ns;
@@ -427,6 +488,21 @@ static void nvme_ns_head_release(struct gendisk *disk)
nvme_put_ns_head(disk->private_data);
}
+static int nvme_ns_head_get_unique_id(struct gendisk *disk, u8 id[16],
+ enum blk_unique_id type)
+{
+ struct nvme_ns_head *head = disk->private_data;
+ struct nvme_ns *ns;
+ int srcu_idx, ret = -EWOULDBLOCK;
+
+ srcu_idx = srcu_read_lock(&head->srcu);
+ ns = nvme_find_path(head);
+ if (ns)
+ ret = nvme_ns_get_unique_id(ns, id, type);
+ srcu_read_unlock(&head->srcu, srcu_idx);
+ return ret;
+}
+
#ifdef CONFIG_BLK_DEV_ZONED
static int nvme_ns_head_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data)
@@ -454,6 +530,7 @@ const struct block_device_operations nvme_ns_head_ops = {
.ioctl = nvme_ns_head_ioctl,
.compat_ioctl = blkdev_compat_ptr_ioctl,
.getgeo = nvme_getgeo,
+ .get_unique_id = nvme_ns_head_get_unique_id,
.report_zones = nvme_ns_head_report_zones,
.pr_ops = &nvme_pr_ops,
};
@@ -521,7 +598,6 @@ static void nvme_requeue_work(struct work_struct *work)
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
{
struct queue_limits lim;
- bool vwc = false;
mutex_init(&head->lock);
bio_list_init(&head->requeue_list);
@@ -539,6 +615,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
blk_set_stacking_limits(&lim);
lim.dma_alignment = 3;
+ lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | BLK_FEAT_POLL;
if (head->ids.csi != NVME_CSI_ZNS)
lim.max_zone_append_sectors = 0;
@@ -549,24 +626,6 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
head->disk->private_data = head;
sprintf(head->disk->disk_name, "nvme%dn%d",
ctrl->subsys->instance, head->instance);
-
- blk_queue_flag_set(QUEUE_FLAG_NONROT, head->disk->queue);
- blk_queue_flag_set(QUEUE_FLAG_NOWAIT, head->disk->queue);
- blk_queue_flag_set(QUEUE_FLAG_IO_STAT, head->disk->queue);
- /*
- * This assumes all controllers that refer to a namespace either
- * support poll queues or not. That is not a strict guarantee,
- * but if the assumption is wrong the effect is only suboptimal
- * performance but not correctness problem.
- */
- if (ctrl->tagset->nr_maps > HCTX_TYPE_POLL &&
- ctrl->tagset->map[HCTX_TYPE_POLL].nr_queues)
- blk_queue_flag_set(QUEUE_FLAG_POLL, head->disk->queue);
-
- /* we need to propagate up the VMC settings */
- if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
- vwc = true;
- blk_queue_write_cache(head->disk->queue, vwc, vwc);
return 0;
}
@@ -803,6 +862,29 @@ static ssize_t nvme_subsys_iopolicy_show(struct device *dev,
nvme_iopolicy_names[READ_ONCE(subsys->iopolicy)]);
}
+static void nvme_subsys_iopolicy_update(struct nvme_subsystem *subsys,
+ int iopolicy)
+{
+ struct nvme_ctrl *ctrl;
+ int old_iopolicy = READ_ONCE(subsys->iopolicy);
+
+ if (old_iopolicy == iopolicy)
+ return;
+
+ WRITE_ONCE(subsys->iopolicy, iopolicy);
+
+ /* iopolicy changes clear the mpath by design */
+ mutex_lock(&nvme_subsystems_lock);
+ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
+ nvme_mpath_clear_ctrl_paths(ctrl);
+ mutex_unlock(&nvme_subsystems_lock);
+
+ pr_notice("subsysnqn %s iopolicy changed from %s to %s\n",
+ subsys->subnqn,
+ nvme_iopolicy_names[old_iopolicy],
+ nvme_iopolicy_names[iopolicy]);
+}
+
static ssize_t nvme_subsys_iopolicy_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
@@ -812,7 +894,7 @@ static ssize_t nvme_subsys_iopolicy_store(struct device *dev,
for (i = 0; i < ARRAY_SIZE(nvme_iopolicy_names); i++) {
if (sysfs_streq(buf, nvme_iopolicy_names[i])) {
- WRITE_ONCE(subsys->iopolicy, i);
+ nvme_subsys_iopolicy_update(subsys, i);
return count;
}
}
@@ -875,9 +957,6 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid)
nvme_mpath_set_live(ns);
}
- if (blk_queue_stable_writes(ns->queue) && ns->head->disk)
- blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES,
- ns->head->disk->queue);
#ifdef CONFIG_BLK_DEV_ZONED
if (blk_queue_is_zoned(ns->queue) && ns->head->disk)
ns->head->disk->nr_zones = ns->disk->nr_zones;
@@ -923,6 +1002,9 @@ int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
!(ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA))
return 0;
+ /* initialize this in the identify path to cover controller resets */
+ atomic_set(&ctrl->nr_active, 0);
+
if (!ctrl->max_namespaces ||
ctrl->max_namespaces > le32_to_cpu(id->nn)) {
dev_err(ctrl->device,
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 68b400f9c42d..f900e44243ae 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -49,6 +49,7 @@ extern unsigned int admin_timeout;
extern struct workqueue_struct *nvme_wq;
extern struct workqueue_struct *nvme_reset_wq;
extern struct workqueue_struct *nvme_delete_wq;
+extern struct mutex nvme_subsystems_lock;
/*
* List of workarounds for devices that required behavior not specified in
@@ -195,6 +196,7 @@ enum {
NVME_REQ_CANCELLED = (1 << 0),
NVME_REQ_USERCMD = (1 << 1),
NVME_MPATH_IO_STATS = (1 << 2),
+ NVME_MPATH_CNT_ACTIVE = (1 << 3),
};
static inline struct nvme_request *nvme_req(struct request *req)
@@ -360,6 +362,7 @@ struct nvme_ctrl {
size_t ana_log_size;
struct timer_list anatt_timer;
struct work_struct ana_work;
+ atomic_t nr_active;
#endif
#ifdef CONFIG_NVME_HOST_AUTH
@@ -408,6 +411,7 @@ static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
enum nvme_iopolicy {
NVME_IOPOLICY_NUMA,
NVME_IOPOLICY_RR,
+ NVME_IOPOLICY_QD,
};
struct nvme_subsystem {
@@ -551,6 +555,7 @@ struct nvme_ctrl_ops {
int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
void (*free_ctrl)(struct nvme_ctrl *ctrl);
void (*submit_async_event)(struct nvme_ctrl *ctrl);
+ int (*subsystem_reset)(struct nvme_ctrl *ctrl);
void (*delete_ctrl)(struct nvme_ctrl *ctrl);
void (*stop_ctrl)(struct nvme_ctrl *ctrl);
int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
@@ -649,18 +654,9 @@ int nvme_try_sched_reset(struct nvme_ctrl *ctrl);
static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
{
- int ret;
-
- if (!ctrl->subsystem)
+ if (!ctrl->subsystem || !ctrl->ops->subsystem_reset)
return -ENOTTY;
- if (!nvme_wait_reset(ctrl))
- return -EBUSY;
-
- ret = ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65);
- if (ret)
- return ret;
-
- return nvme_try_sched_reset(ctrl);
+ return ctrl->ops->subsystem_reset(ctrl);
}
/*
@@ -689,7 +685,7 @@ static inline u32 nvme_bytes_to_numd(size_t len)
static inline bool nvme_is_ana_error(u16 status)
{
- switch (status & 0x7ff) {
+ switch (status & NVME_SCT_SC_MASK) {
case NVME_SC_ANA_TRANSITION:
case NVME_SC_ANA_INACCESSIBLE:
case NVME_SC_ANA_PERSISTENT_LOSS:
@@ -702,7 +698,7 @@ static inline bool nvme_is_ana_error(u16 status)
static inline bool nvme_is_path_error(u16 status)
{
/* check for a status code type of 'path related status' */
- return (status & 0x700) == 0x300;
+ return (status & NVME_SCT_MASK) == NVME_SCT_PATH;
}
/*
@@ -792,6 +788,7 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, bool shutdown);
int nvme_enable_ctrl(struct nvme_ctrl *ctrl);
int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
const struct nvme_ctrl_ops *ops, unsigned long quirks);
+int nvme_add_ctrl(struct nvme_ctrl *ctrl);
void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
void nvme_start_ctrl(struct nvme_ctrl *ctrl);
void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
@@ -877,7 +874,7 @@ enum {
NVME_SUBMIT_NOWAIT = (__force nvme_submit_flags_t)(1 << 1),
/* Set BLK_MQ_REQ_RESERVED when allocating request */
NVME_SUBMIT_RESERVED = (__force nvme_submit_flags_t)(1 << 2),
- /* Retry command when NVME_SC_DNR is not set in the result */
+ /* Retry command when NVME_STATUS_DNR is not set in the result */
NVME_SUBMIT_RETRY = (__force nvme_submit_flags_t)(1 << 3),
};
@@ -1062,6 +1059,9 @@ static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
}
#endif /* CONFIG_NVME_MULTIPATH */
+int nvme_ns_get_unique_id(struct nvme_ns *ns, u8 id[16],
+ enum blk_unique_id type);
+
struct nvme_zone_info {
u64 zone_size;
unsigned int max_open_zones;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 102a9fb0c65f..21f8e1b9801f 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -826,9 +826,9 @@ static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
struct nvme_command *cmnd)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ struct bio_vec bv = rq_integrity_vec(req);
- iod->meta_dma = dma_map_bvec(dev->dev, rq_integrity_vec(req),
- rq_dma_dir(req), 0);
+ iod->meta_dma = dma_map_bvec(dev->dev, &bv, rq_dma_dir(req), 0);
if (dma_mapping_error(dev->dev, iod->meta_dma))
return BLK_STS_IOERR;
cmnd->rw.metadata = cpu_to_le64(iod->meta_dma);
@@ -967,7 +967,7 @@ static __always_inline void nvme_pci_unmap_rq(struct request *req)
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
dma_unmap_page(dev->dev, iod->meta_dma,
- rq_integrity_vec(req)->bv_len, rq_dma_dir(req));
+ rq_integrity_vec(req).bv_len, rq_dma_dir(req));
}
if (blk_rq_nr_phys_segments(req))
@@ -1143,6 +1143,41 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl)
spin_unlock(&nvmeq->sq_lock);
}
+static int nvme_pci_subsystem_reset(struct nvme_ctrl *ctrl)
+{
+ struct nvme_dev *dev = to_nvme_dev(ctrl);
+ int ret = 0;
+
+ /*
+ * Taking the shutdown_lock ensures the BAR mapping is not being
+ * altered by reset_work. Holding this lock before the RESETTING state
+ * change, if successful, also ensures nvme_remove won't be able to
+ * proceed to iounmap until we're done.
+ */
+ mutex_lock(&dev->shutdown_lock);
+ if (!dev->bar_mapped_size) {
+ ret = -ENODEV;
+ goto unlock;
+ }
+
+ if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) {
+ ret = -EBUSY;
+ goto unlock;
+ }
+
+ writel(NVME_SUBSYS_RESET, dev->bar + NVME_REG_NSSR);
+ nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE);
+
+ /*
+ * Read controller status to flush the previous write and trigger a
+ * pcie read error.
+ */
+ readl(dev->bar + NVME_REG_CSTS);
+unlock:
+ mutex_unlock(&dev->shutdown_lock);
+ return ret;
+}
+
static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
{
struct nvme_command c = { };
@@ -2859,6 +2894,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.reg_read64 = nvme_pci_reg_read64,
.free_ctrl = nvme_pci_free_ctrl,
.submit_async_event = nvme_pci_submit_async_event,
+ .subsystem_reset = nvme_pci_subsystem_reset,
.get_address = nvme_pci_get_address,
.print_device_info = nvme_pci_print_device_info,
.supports_pci_p2pdma = nvme_pci_supports_pci_p2pdma,
@@ -3015,6 +3051,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (IS_ERR(dev))
return PTR_ERR(dev);
+ result = nvme_add_ctrl(&dev->ctrl);
+ if (result)
+ goto out_put_ctrl;
+
result = nvme_dev_map(dev);
if (result)
goto out_uninit_ctrl;
@@ -3101,6 +3141,7 @@ out_dev_unmap:
nvme_dev_unmap(dev);
out_uninit_ctrl:
nvme_uninit_ctrl(&dev->ctrl);
+out_put_ctrl:
nvme_put_ctrl(&dev->ctrl);
return result;
}
diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c
index 8fa1ffcdaed4..7347ddf85f00 100644
--- a/drivers/nvme/host/pr.c
+++ b/drivers/nvme/host/pr.c
@@ -72,12 +72,12 @@ static int nvme_send_ns_pr_command(struct nvme_ns *ns, struct nvme_command *c,
return nvme_submit_sync_cmd(ns->queue, c, data, data_len);
}
-static int nvme_sc_to_pr_err(int nvme_sc)
+static int nvme_status_to_pr_err(int status)
{
- if (nvme_is_path_error(nvme_sc))
+ if (nvme_is_path_error(status))
return PR_STS_PATH_FAILED;
- switch (nvme_sc & 0x7ff) {
+ switch (status & NVME_SCT_SC_MASK) {
case NVME_SC_SUCCESS:
return PR_STS_SUCCESS;
case NVME_SC_RESERVATION_CONFLICT:
@@ -121,7 +121,7 @@ static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
if (ret < 0)
return ret;
- return nvme_sc_to_pr_err(ret);
+ return nvme_status_to_pr_err(ret);
}
static int nvme_pr_register(struct block_device *bdev, u64 old,
@@ -196,7 +196,7 @@ retry:
if (ret < 0)
return ret;
- return nvme_sc_to_pr_err(ret);
+ return nvme_status_to_pr_err(ret);
}
static int nvme_pr_read_keys(struct block_device *bdev,
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 51a62b0c645a..2eb33842f971 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -2201,6 +2201,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
+ .subsystem_reset = nvmf_subsystem_reset,
.free_ctrl = nvme_rdma_free_ctrl,
.submit_async_event = nvme_rdma_submit_async_event,
.delete_ctrl = nvme_rdma_delete_ctrl,
@@ -2237,12 +2238,11 @@ nvme_rdma_existing_controller(struct nvmf_ctrl_options *opts)
return found;
}
-static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
+static struct nvme_rdma_ctrl *nvme_rdma_alloc_ctrl(struct device *dev,
struct nvmf_ctrl_options *opts)
{
struct nvme_rdma_ctrl *ctrl;
int ret;
- bool changed;
ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
if (!ctrl)
@@ -2304,6 +2304,30 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
if (ret)
goto out_kfree_queues;
+ return ctrl;
+
+out_kfree_queues:
+ kfree(ctrl->queues);
+out_free_ctrl:
+ kfree(ctrl);
+ return ERR_PTR(ret);
+}
+
+static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
+ struct nvmf_ctrl_options *opts)
+{
+ struct nvme_rdma_ctrl *ctrl;
+ bool changed;
+ int ret;
+
+ ctrl = nvme_rdma_alloc_ctrl(dev, opts);
+ if (IS_ERR(ctrl))
+ return ERR_CAST(ctrl);
+
+ ret = nvme_add_ctrl(&ctrl->ctrl);
+ if (ret)
+ goto out_put_ctrl;
+
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
WARN_ON_ONCE(!changed);
@@ -2322,15 +2346,11 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
out_uninit_ctrl:
nvme_uninit_ctrl(&ctrl->ctrl);
+out_put_ctrl:
nvme_put_ctrl(&ctrl->ctrl);
if (ret > 0)
ret = -EIO;
return ERR_PTR(ret);
-out_kfree_queues:
- kfree(ctrl->queues);
-out_free_ctrl:
- kfree(ctrl);
- return ERR_PTR(ret);
}
static struct nvmf_transport_ops nvme_rdma_transport = {
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 8b5e4327fe83..a2a47d3ab99f 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2662,6 +2662,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
+ .subsystem_reset = nvmf_subsystem_reset,
.free_ctrl = nvme_tcp_free_ctrl,
.submit_async_event = nvme_tcp_submit_async_event,
.delete_ctrl = nvme_tcp_delete_ctrl,
@@ -2686,7 +2687,7 @@ nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts)
return found;
}
-static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
+static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev,
struct nvmf_ctrl_options *opts)
{
struct nvme_tcp_ctrl *ctrl;
@@ -2761,6 +2762,28 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
if (ret)
goto out_kfree_queues;
+ return ctrl;
+out_kfree_queues:
+ kfree(ctrl->queues);
+out_free_ctrl:
+ kfree(ctrl);
+ return ERR_PTR(ret);
+}
+
+static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
+ struct nvmf_ctrl_options *opts)
+{
+ struct nvme_tcp_ctrl *ctrl;
+ int ret;
+
+ ctrl = nvme_tcp_alloc_ctrl(dev, opts);
+ if (IS_ERR(ctrl))
+ return ERR_CAST(ctrl);
+
+ ret = nvme_add_ctrl(&ctrl->ctrl);
+ if (ret)
+ goto out_put_ctrl;
+
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
WARN_ON_ONCE(1);
ret = -EINTR;
@@ -2782,15 +2805,11 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
out_uninit_ctrl:
nvme_uninit_ctrl(&ctrl->ctrl);
+out_put_ctrl:
nvme_put_ctrl(&ctrl->ctrl);
if (ret > 0)
ret = -EIO;
return ERR_PTR(ret);
-out_kfree_queues:
- kfree(ctrl->queues);
-out_free_ctrl:
- kfree(ctrl);
- return ERR_PTR(ret);
}
static struct nvmf_transport_ops nvme_tcp_transport = {
diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
index 77aa0f440a6d..9a06f9d98cd6 100644
--- a/drivers/nvme/host/zns.c
+++ b/drivers/nvme/host/zns.c
@@ -108,13 +108,12 @@ free_data:
void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim,
struct nvme_zone_info *zi)
{
- lim->zoned = 1;
+ lim->features |= BLK_FEAT_ZONED;
lim->max_open_zones = zi->max_open_zones;
lim->max_active_zones = zi->max_active_zones;
lim->max_zone_append_sectors = ns->ctrl->max_zone_append;
lim->chunk_sectors = ns->head->zsze =
nvme_lba_to_sect(ns->head, zi->zone_size);
- blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ns->queue);
}
static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index 872dd1a0acd8..46be031f91b4 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -6,7 +6,6 @@ config NVME_TARGET
depends on CONFIGFS_FS
select NVME_KEYRING if NVME_TARGET_TCP_TLS
select KEYS if NVME_TARGET_TCP_TLS
- select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY
select SGL_ALLOC
help
This enabled target side support for the NVMe protocol, that is
@@ -18,6 +17,15 @@ config NVME_TARGET
To configure the NVMe target you probably want to use the nvmetcli
tool from http://git.infradead.org/users/hch/nvmetcli.git.
+config NVME_TARGET_DEBUGFS
+ bool "NVMe Target debugfs support"
+ depends on NVME_TARGET
+ help
+ This enables debugfs support to display the connected controllers
+ to each subsystem
+
+ If unsure, say N.
+
config NVME_TARGET_PASSTHRU
bool "NVMe Target Passthrough support"
depends on NVME_TARGET
diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile
index c66820102493..c402c44350b2 100644
--- a/drivers/nvme/target/Makefile
+++ b/drivers/nvme/target/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_NVME_TARGET_TCP) += nvmet-tcp.o
nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \
discovery.o io-cmd-file.o io-cmd-bdev.o
+nvmet-$(CONFIG_NVME_TARGET_DEBUGFS) += debugfs.o
nvmet-$(CONFIG_NVME_TARGET_PASSTHRU) += passthru.o
nvmet-$(CONFIG_BLK_DEV_ZONED) += zns.o
nvmet-$(CONFIG_NVME_TARGET_AUTH) += fabrics-cmd-auth.o auth.o
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index f5b7054a4a05..f7e1156ac7ec 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -344,7 +344,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
pr_debug("unhandled lid %d on qid %d\n",
req->cmd->get_log_page.lid, req->sq->qid);
req->error_loc = offsetof(struct nvme_get_log_page_command, lid);
- nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
+ nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_STATUS_DNR);
}
static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
@@ -496,7 +496,7 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
if (le32_to_cpu(req->cmd->identify.nsid) == NVME_NSID_ALL) {
req->error_loc = offsetof(struct nvme_identify, nsid);
- status = NVME_SC_INVALID_NS | NVME_SC_DNR;
+ status = NVME_SC_INVALID_NS | NVME_STATUS_DNR;
goto out;
}
@@ -662,7 +662,7 @@ static void nvmet_execute_identify_desclist(struct nvmet_req *req)
if (sg_zero_buffer(req->sg, req->sg_cnt, NVME_IDENTIFY_DATA_SIZE - off,
off) != NVME_IDENTIFY_DATA_SIZE - off)
- status = NVME_SC_INTERNAL | NVME_SC_DNR;
+ status = NVME_SC_INTERNAL | NVME_STATUS_DNR;
out:
nvmet_req_complete(req, status);
@@ -724,7 +724,7 @@ static void nvmet_execute_identify(struct nvmet_req *req)
pr_debug("unhandled identify cns %d on qid %d\n",
req->cmd->identify.cns, req->sq->qid);
req->error_loc = offsetof(struct nvme_identify, cns);
- nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
+ nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_STATUS_DNR);
}
/*
@@ -807,7 +807,7 @@ u16 nvmet_set_feat_async_event(struct nvmet_req *req, u32 mask)
if (val32 & ~mask) {
req->error_loc = offsetof(struct nvme_common_command, cdw11);
- return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
}
WRITE_ONCE(req->sq->ctrl->aen_enabled, val32);
@@ -833,7 +833,7 @@ void nvmet_execute_set_features(struct nvmet_req *req)
ncqr = (cdw11 >> 16) & 0xffff;
nsqr = cdw11 & 0xffff;
if (ncqr == 0xffff || nsqr == 0xffff) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
break;
}
nvmet_set_result(req,
@@ -846,14 +846,14 @@ void nvmet_execute_set_features(struct nvmet_req *req)
status = nvmet_set_feat_async_event(req, NVMET_AEN_CFG_ALL);
break;
case NVME_FEAT_HOST_ID:
- status = NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
+ status = NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR;
break;
case NVME_FEAT_WRITE_PROTECT:
status = nvmet_set_feat_write_protect(req);
break;
default:
req->error_loc = offsetof(struct nvme_common_command, cdw10);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
break;
}
@@ -939,7 +939,7 @@ void nvmet_execute_get_features(struct nvmet_req *req)
if (!(req->cmd->common.cdw11 & cpu_to_le32(1 << 0))) {
req->error_loc =
offsetof(struct nvme_common_command, cdw11);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
break;
}
@@ -952,7 +952,7 @@ void nvmet_execute_get_features(struct nvmet_req *req)
default:
req->error_loc =
offsetof(struct nvme_common_command, cdw10);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
break;
}
@@ -969,7 +969,7 @@ void nvmet_execute_async_event(struct nvmet_req *req)
mutex_lock(&ctrl->lock);
if (ctrl->nr_async_event_cmds >= NVMET_ASYNC_EVENTS) {
mutex_unlock(&ctrl->lock);
- nvmet_req_complete(req, NVME_SC_ASYNC_LIMIT | NVME_SC_DNR);
+ nvmet_req_complete(req, NVME_SC_ASYNC_LIMIT | NVME_STATUS_DNR);
return;
}
ctrl->async_event_cmds[ctrl->nr_async_event_cmds++] = req;
@@ -1006,7 +1006,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
if (nvme_is_fabrics(cmd))
return nvmet_parse_fabrics_admin_cmd(req);
if (unlikely(!nvmet_check_auth_status(req)))
- return NVME_SC_AUTH_REQUIRED | NVME_SC_DNR;
+ return NVME_SC_AUTH_REQUIRED | NVME_STATUS_DNR;
if (nvmet_is_disc_subsys(nvmet_req_subsys(req)))
return nvmet_parse_discovery_cmd(req);
diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c
index 7d2633940f9b..8bc3f431c77f 100644
--- a/drivers/nvme/target/auth.c
+++ b/drivers/nvme/target/auth.c
@@ -314,7 +314,7 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response,
req->sq->dhchap_c1,
challenge, shash_len);
if (ret)
- goto out_free_response;
+ goto out_free_challenge;
}
pr_debug("ctrl %d qid %d host response seq %u transaction %d\n",
@@ -325,7 +325,7 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response,
GFP_KERNEL);
if (!shash) {
ret = -ENOMEM;
- goto out_free_response;
+ goto out_free_challenge;
}
shash->tfm = shash_tfm;
ret = crypto_shash_init(shash);
@@ -361,9 +361,10 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response,
goto out;
ret = crypto_shash_final(shash, response);
out:
+ kfree(shash);
+out_free_challenge:
if (challenge != req->sq->dhchap_c1)
kfree(challenge);
- kfree(shash);
out_free_response:
nvme_auth_free_key(transformed_key);
out_free_tfm:
@@ -427,14 +428,14 @@ int nvmet_auth_ctrl_hash(struct nvmet_req *req, u8 *response,
req->sq->dhchap_c2,
challenge, shash_len);
if (ret)
- goto out_free_response;
+ goto out_free_challenge;
}
shash = kzalloc(sizeof(*shash) + crypto_shash_descsize(shash_tfm),
GFP_KERNEL);
if (!shash) {
ret = -ENOMEM;
- goto out_free_response;
+ goto out_free_challenge;
}
shash->tfm = shash_tfm;
@@ -471,9 +472,10 @@ int nvmet_auth_ctrl_hash(struct nvmet_req *req, u8 *response,
goto out;
ret = crypto_shash_final(shash, response);
out:
+ kfree(shash);
+out_free_challenge:
if (challenge != req->sq->dhchap_c2)
kfree(challenge);
- kfree(shash);
out_free_response:
nvme_auth_free_key(transformed_key);
out_free_tfm:
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 4ff460ba2826..ed2424f8a396 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -16,6 +16,7 @@
#include "trace.h"
#include "nvmet.h"
+#include "debugfs.h"
struct kmem_cache *nvmet_bvec_cache;
struct workqueue_struct *buffered_io_wq;
@@ -55,18 +56,18 @@ inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno)
return NVME_SC_SUCCESS;
case -ENOSPC:
req->error_loc = offsetof(struct nvme_rw_command, length);
- return NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
+ return NVME_SC_CAP_EXCEEDED | NVME_STATUS_DNR;
case -EREMOTEIO:
req->error_loc = offsetof(struct nvme_rw_command, slba);
- return NVME_SC_LBA_RANGE | NVME_SC_DNR;
+ return NVME_SC_LBA_RANGE | NVME_STATUS_DNR;
case -EOPNOTSUPP:
req->error_loc = offsetof(struct nvme_common_command, opcode);
switch (req->cmd->common.opcode) {
case nvme_cmd_dsm:
case nvme_cmd_write_zeroes:
- return NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR;
+ return NVME_SC_ONCS_NOT_SUPPORTED | NVME_STATUS_DNR;
default:
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
}
break;
case -ENODATA:
@@ -76,7 +77,7 @@ inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno)
fallthrough;
default:
req->error_loc = offsetof(struct nvme_common_command, opcode);
- return NVME_SC_INTERNAL | NVME_SC_DNR;
+ return NVME_SC_INTERNAL | NVME_STATUS_DNR;
}
}
@@ -86,7 +87,7 @@ u16 nvmet_report_invalid_opcode(struct nvmet_req *req)
req->sq->qid);
req->error_loc = offsetof(struct nvme_common_command, opcode);
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
}
static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
@@ -97,7 +98,7 @@ u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
{
if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) {
req->error_loc = offsetof(struct nvme_common_command, dptr);
- return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
+ return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR;
}
return 0;
}
@@ -106,7 +107,7 @@ u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len)
{
if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) {
req->error_loc = offsetof(struct nvme_common_command, dptr);
- return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
+ return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR;
}
return 0;
}
@@ -115,7 +116,7 @@ u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
{
if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) {
req->error_loc = offsetof(struct nvme_common_command, dptr);
- return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
+ return NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR;
}
return 0;
}
@@ -145,7 +146,7 @@ static void nvmet_async_events_failall(struct nvmet_ctrl *ctrl)
while (ctrl->nr_async_event_cmds) {
req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
mutex_unlock(&ctrl->lock);
- nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
+ nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_STATUS_DNR);
mutex_lock(&ctrl->lock);
}
mutex_unlock(&ctrl->lock);
@@ -444,7 +445,7 @@ u16 nvmet_req_find_ns(struct nvmet_req *req)
req->error_loc = offsetof(struct nvme_common_command, nsid);
if (nvmet_subsys_nsid_exists(subsys, nsid))
return NVME_SC_INTERNAL_PATH_ERROR;
- return NVME_SC_INVALID_NS | NVME_SC_DNR;
+ return NVME_SC_INVALID_NS | NVME_STATUS_DNR;
}
percpu_ref_get(&req->ns->ref);
@@ -904,7 +905,7 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
return nvmet_parse_fabrics_io_cmd(req);
if (unlikely(!nvmet_check_auth_status(req)))
- return NVME_SC_AUTH_REQUIRED | NVME_SC_DNR;
+ return NVME_SC_AUTH_REQUIRED | NVME_STATUS_DNR;
ret = nvmet_check_ctrl_status(req);
if (unlikely(ret))
@@ -967,7 +968,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
/* no support for fused commands yet */
if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
req->error_loc = offsetof(struct nvme_common_command, flags);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto fail;
}
@@ -978,7 +979,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
*/
if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) {
req->error_loc = offsetof(struct nvme_common_command, flags);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto fail;
}
@@ -996,7 +997,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
trace_nvmet_req_init(req, req->cmd);
if (unlikely(!percpu_ref_tryget_live(&sq->ref))) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto fail;
}
@@ -1023,7 +1024,7 @@ bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len)
{
if (unlikely(len != req->transfer_len)) {
req->error_loc = offsetof(struct nvme_common_command, dptr);
- nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
+ nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR);
return false;
}
@@ -1035,7 +1036,7 @@ bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len)
{
if (unlikely(data_len > req->transfer_len)) {
req->error_loc = offsetof(struct nvme_common_command, dptr);
- nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
+ nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR);
return false;
}
@@ -1304,18 +1305,18 @@ u16 nvmet_check_ctrl_status(struct nvmet_req *req)
if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
pr_err("got cmd %d while CC.EN == 0 on qid = %d\n",
req->cmd->common.opcode, req->sq->qid);
- return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
+ return NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR;
}
if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n",
req->cmd->common.opcode, req->sq->qid);
- return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
+ return NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR;
}
if (unlikely(!nvmet_check_auth_status(req))) {
pr_warn("qid %d not authenticated\n", req->sq->qid);
- return NVME_SC_AUTH_REQUIRED | NVME_SC_DNR;
+ return NVME_SC_AUTH_REQUIRED | NVME_STATUS_DNR;
}
return 0;
}
@@ -1389,7 +1390,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
int ret;
u16 status;
- status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+ status = NVME_SC_CONNECT_INVALID_PARAM | NVME_STATUS_DNR;
subsys = nvmet_find_get_subsys(req->port, subsysnqn);
if (!subsys) {
pr_warn("connect request for invalid subsystem %s!\n",
@@ -1405,7 +1406,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
hostnqn, subsysnqn);
req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
up_read(&nvmet_config_sem);
- status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR;
+ status = NVME_SC_CONNECT_INVALID_HOST | NVME_STATUS_DNR;
req->error_loc = offsetof(struct nvme_common_command, dptr);
goto out_put_subsystem;
}
@@ -1456,7 +1457,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
subsys->cntlid_min, subsys->cntlid_max,
GFP_KERNEL);
if (ret < 0) {
- status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
+ status = NVME_SC_CONNECT_CTRL_BUSY | NVME_STATUS_DNR;
goto out_free_sqs;
}
ctrl->cntlid = ret;
@@ -1479,6 +1480,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
mutex_lock(&subsys->lock);
list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
nvmet_setup_p2p_ns_map(ctrl, req);
+ nvmet_debugfs_ctrl_setup(ctrl);
mutex_unlock(&subsys->lock);
*ctrlp = ctrl;
@@ -1513,6 +1515,8 @@ static void nvmet_ctrl_free(struct kref *ref)
nvmet_destroy_auth(ctrl);
+ nvmet_debugfs_ctrl_free(ctrl);
+
ida_free(&cntlid_ida, ctrl->cntlid);
nvmet_async_events_free(ctrl);
@@ -1539,6 +1543,14 @@ void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error);
+ssize_t nvmet_ctrl_host_traddr(struct nvmet_ctrl *ctrl,
+ char *traddr, size_t traddr_len)
+{
+ if (!ctrl->ops->host_traddr)
+ return -EOPNOTSUPP;
+ return ctrl->ops->host_traddr(ctrl, traddr, traddr_len);
+}
+
static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
const char *subsysnqn)
{
@@ -1633,8 +1645,14 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
INIT_LIST_HEAD(&subsys->ctrls);
INIT_LIST_HEAD(&subsys->hosts);
+ ret = nvmet_debugfs_subsys_setup(subsys);
+ if (ret)
+ goto free_subsysnqn;
+
return subsys;
+free_subsysnqn:
+ kfree(subsys->subsysnqn);
free_fr:
kfree(subsys->firmware_rev);
free_mn:
@@ -1651,6 +1669,8 @@ static void nvmet_subsys_free(struct kref *ref)
WARN_ON_ONCE(!xa_empty(&subsys->namespaces));
+ nvmet_debugfs_subsys_free(subsys);
+
xa_destroy(&subsys->namespaces);
nvmet_passthru_subsys_free(subsys);
@@ -1705,11 +1725,18 @@ static int __init nvmet_init(void)
if (error)
goto out_free_nvmet_work_queue;
- error = nvmet_init_configfs();
+ error = nvmet_init_debugfs();
if (error)
goto out_exit_discovery;
+
+ error = nvmet_init_configfs();
+ if (error)
+ goto out_exit_debugfs;
+
return 0;
+out_exit_debugfs:
+ nvmet_exit_debugfs();
out_exit_discovery:
nvmet_exit_discovery();
out_free_nvmet_work_queue:
@@ -1726,6 +1753,7 @@ out_destroy_bvec_cache:
static void __exit nvmet_exit(void)
{
nvmet_exit_configfs();
+ nvmet_exit_debugfs();
nvmet_exit_discovery();
ida_destroy(&cntlid_ida);
destroy_workqueue(nvmet_wq);
diff --git a/drivers/nvme/target/debugfs.c b/drivers/nvme/target/debugfs.c
new file mode 100644
index 000000000000..cb2befc8619e
--- /dev/null
+++ b/drivers/nvme/target/debugfs.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DebugFS interface for the NVMe target.
+ * Copyright (c) 2022-2024 Shadow
+ * Copyright (c) 2024 SUSE LLC
+ */
+
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+
+#include "nvmet.h"
+#include "debugfs.h"
+
+struct dentry *nvmet_debugfs;
+
+#define NVMET_DEBUGFS_ATTR(field) \
+ static int field##_open(struct inode *inode, struct file *file) \
+ { return single_open(file, field##_show, inode->i_private); } \
+ \
+ static const struct file_operations field##_fops = { \
+ .open = field##_open, \
+ .read = seq_read, \
+ .release = single_release, \
+ }
+
+#define NVMET_DEBUGFS_RW_ATTR(field) \
+ static int field##_open(struct inode *inode, struct file *file) \
+ { return single_open(file, field##_show, inode->i_private); } \
+ \
+ static const struct file_operations field##_fops = { \
+ .open = field##_open, \
+ .read = seq_read, \
+ .write = field##_write, \
+ .release = single_release, \
+ }
+
+static int nvmet_ctrl_hostnqn_show(struct seq_file *m, void *p)
+{
+ struct nvmet_ctrl *ctrl = m->private;
+
+ seq_puts(m, ctrl->hostnqn);
+ return 0;
+}
+NVMET_DEBUGFS_ATTR(nvmet_ctrl_hostnqn);
+
+static int nvmet_ctrl_kato_show(struct seq_file *m, void *p)
+{
+ struct nvmet_ctrl *ctrl = m->private;
+
+ seq_printf(m, "%d\n", ctrl->kato);
+ return 0;
+}
+NVMET_DEBUGFS_ATTR(nvmet_ctrl_kato);
+
+static int nvmet_ctrl_port_show(struct seq_file *m, void *p)
+{
+ struct nvmet_ctrl *ctrl = m->private;
+
+ seq_printf(m, "%d\n", le16_to_cpu(ctrl->port->disc_addr.portid));
+ return 0;
+}
+NVMET_DEBUGFS_ATTR(nvmet_ctrl_port);
+
+static const char *const csts_state_names[] = {
+ [NVME_CSTS_RDY] = "ready",
+ [NVME_CSTS_CFS] = "fatal",
+ [NVME_CSTS_NSSRO] = "reset",
+ [NVME_CSTS_SHST_OCCUR] = "shutdown",
+ [NVME_CSTS_SHST_CMPLT] = "completed",
+ [NVME_CSTS_PP] = "paused",
+};
+
+static int nvmet_ctrl_state_show(struct seq_file *m, void *p)
+{
+ struct nvmet_ctrl *ctrl = m->private;
+ bool sep = false;
+ int i;
+
+ for (i = 0; i < 7; i++) {
+ int state = BIT(i);
+
+ if (!(ctrl->csts & state))
+ continue;
+ if (sep)
+ seq_puts(m, "|");
+ sep = true;
+ if (csts_state_names[state])
+ seq_puts(m, csts_state_names[state]);
+ else
+ seq_printf(m, "%d", state);
+ }
+ if (sep)
+ seq_printf(m, "\n");
+ return 0;
+}
+
+static ssize_t nvmet_ctrl_state_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct seq_file *m = file->private_data;
+ struct nvmet_ctrl *ctrl = m->private;
+ char reset[16];
+
+ if (count >= sizeof(reset))
+ return -EINVAL;
+ if (copy_from_user(reset, buf, count))
+ return -EFAULT;
+ if (!memcmp(reset, "fatal", 5))
+ nvmet_ctrl_fatal_error(ctrl);
+ else
+ return -EINVAL;
+ return count;
+}
+NVMET_DEBUGFS_RW_ATTR(nvmet_ctrl_state);
+
+static int nvmet_ctrl_host_traddr_show(struct seq_file *m, void *p)
+{
+ struct nvmet_ctrl *ctrl = m->private;
+ ssize_t size;
+ char buf[NVMF_TRADDR_SIZE + 1];
+
+ size = nvmet_ctrl_host_traddr(ctrl, buf, NVMF_TRADDR_SIZE);
+ if (size < 0) {
+ buf[0] = '\0';
+ size = 0;
+ }
+ buf[size] = '\0';
+ seq_printf(m, "%s\n", buf);
+ return 0;
+}
+NVMET_DEBUGFS_ATTR(nvmet_ctrl_host_traddr);
+
+int nvmet_debugfs_ctrl_setup(struct nvmet_ctrl *ctrl)
+{
+ char name[32];
+ struct dentry *parent = ctrl->subsys->debugfs_dir;
+ int ret;
+
+ if (!parent)
+ return -ENODEV;
+ snprintf(name, sizeof(name), "ctrl%d", ctrl->cntlid);
+ ctrl->debugfs_dir = debugfs_create_dir(name, parent);
+ if (IS_ERR(ctrl->debugfs_dir)) {
+ ret = PTR_ERR(ctrl->debugfs_dir);
+ ctrl->debugfs_dir = NULL;
+ return ret;
+ }
+ debugfs_create_file("port", S_IRUSR, ctrl->debugfs_dir, ctrl,
+ &nvmet_ctrl_port_fops);
+ debugfs_create_file("hostnqn", S_IRUSR, ctrl->debugfs_dir, ctrl,
+ &nvmet_ctrl_hostnqn_fops);
+ debugfs_create_file("kato", S_IRUSR, ctrl->debugfs_dir, ctrl,
+ &nvmet_ctrl_kato_fops);
+ debugfs_create_file("state", S_IRUSR | S_IWUSR, ctrl->debugfs_dir, ctrl,
+ &nvmet_ctrl_state_fops);
+ debugfs_create_file("host_traddr", S_IRUSR, ctrl->debugfs_dir, ctrl,
+ &nvmet_ctrl_host_traddr_fops);
+ return 0;
+}
+
+void nvmet_debugfs_ctrl_free(struct nvmet_ctrl *ctrl)
+{
+ debugfs_remove_recursive(ctrl->debugfs_dir);
+}
+
+int nvmet_debugfs_subsys_setup(struct nvmet_subsys *subsys)
+{
+ int ret = 0;
+
+ subsys->debugfs_dir = debugfs_create_dir(subsys->subsysnqn,
+ nvmet_debugfs);
+ if (IS_ERR(subsys->debugfs_dir)) {
+ ret = PTR_ERR(subsys->debugfs_dir);
+ subsys->debugfs_dir = NULL;
+ }
+ return ret;
+}
+
+void nvmet_debugfs_subsys_free(struct nvmet_subsys *subsys)
+{
+ debugfs_remove_recursive(subsys->debugfs_dir);
+}
+
+int __init nvmet_init_debugfs(void)
+{
+ struct dentry *parent;
+
+ parent = debugfs_create_dir("nvmet", NULL);
+ if (IS_ERR(parent)) {
+ pr_warn("%s: failed to create debugfs directory\n", "nvmet");
+ return PTR_ERR(parent);
+ }
+ nvmet_debugfs = parent;
+ return 0;
+}
+
+void nvmet_exit_debugfs(void)
+{
+ debugfs_remove_recursive(nvmet_debugfs);
+}
diff --git a/drivers/nvme/target/debugfs.h b/drivers/nvme/target/debugfs.h
new file mode 100644
index 000000000000..cfb8bbf6a297
--- /dev/null
+++ b/drivers/nvme/target/debugfs.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * DebugFS interface for the NVMe target.
+ * Copyright (c) 2022-2024 Shadow
+ * Copyright (c) 2024 SUSE LLC
+ */
+#ifndef NVMET_DEBUGFS_H
+#define NVMET_DEBUGFS_H
+
+#include <linux/types.h>
+
+#ifdef CONFIG_NVME_TARGET_DEBUGFS
+int nvmet_debugfs_subsys_setup(struct nvmet_subsys *subsys);
+void nvmet_debugfs_subsys_free(struct nvmet_subsys *subsys);
+int nvmet_debugfs_ctrl_setup(struct nvmet_ctrl *ctrl);
+void nvmet_debugfs_ctrl_free(struct nvmet_ctrl *ctrl);
+
+int __init nvmet_init_debugfs(void);
+void nvmet_exit_debugfs(void);
+#else
+static inline int nvmet_debugfs_subsys_setup(struct nvmet_subsys *subsys)
+{
+ return 0;
+}
+static inline void nvmet_debugfs_subsys_free(struct nvmet_subsys *subsys){}
+
+static inline int nvmet_debugfs_ctrl_setup(struct nvmet_ctrl *ctrl)
+{
+ return 0;
+}
+static inline void nvmet_debugfs_ctrl_free(struct nvmet_ctrl *ctrl) {}
+
+static inline int __init nvmet_init_debugfs(void)
+{
+ return 0;
+}
+
+static inline void nvmet_exit_debugfs(void) {}
+
+#endif
+
+#endif /* NVMET_DEBUGFS_H */
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index ce54da8c6b36..28843df5fa7c 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -179,7 +179,7 @@ static void nvmet_execute_disc_get_log_page(struct nvmet_req *req)
if (req->cmd->get_log_page.lid != NVME_LOG_DISC) {
req->error_loc =
offsetof(struct nvme_get_log_page_command, lid);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto out;
}
@@ -187,7 +187,7 @@ static void nvmet_execute_disc_get_log_page(struct nvmet_req *req)
if (offset & 0x3) {
req->error_loc =
offsetof(struct nvme_get_log_page_command, lpo);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto out;
}
@@ -256,7 +256,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req)
if (req->cmd->identify.cns != NVME_ID_CNS_CTRL) {
req->error_loc = offsetof(struct nvme_identify, cns);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto out;
}
@@ -320,7 +320,7 @@ static void nvmet_execute_disc_set_features(struct nvmet_req *req)
default:
req->error_loc =
offsetof(struct nvme_common_command, cdw10);
- stat = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ stat = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
break;
}
@@ -345,7 +345,7 @@ static void nvmet_execute_disc_get_features(struct nvmet_req *req)
default:
req->error_loc =
offsetof(struct nvme_common_command, cdw10);
- stat = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ stat = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
break;
}
@@ -361,7 +361,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
cmd->common.opcode);
req->error_loc =
offsetof(struct nvme_common_command, opcode);
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
}
switch (cmd->common.opcode) {
@@ -386,7 +386,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
default:
pr_debug("unhandled cmd %d\n", cmd->common.opcode);
req->error_loc = offsetof(struct nvme_common_command, opcode);
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
}
}
diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c
index cb34d644ed08..3f2857c17d95 100644
--- a/drivers/nvme/target/fabrics-cmd-auth.c
+++ b/drivers/nvme/target/fabrics-cmd-auth.c
@@ -189,26 +189,26 @@ void nvmet_execute_auth_send(struct nvmet_req *req)
u8 dhchap_status;
if (req->cmd->auth_send.secp != NVME_AUTH_DHCHAP_PROTOCOL_IDENTIFIER) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
req->error_loc =
offsetof(struct nvmf_auth_send_command, secp);
goto done;
}
if (req->cmd->auth_send.spsp0 != 0x01) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
req->error_loc =
offsetof(struct nvmf_auth_send_command, spsp0);
goto done;
}
if (req->cmd->auth_send.spsp1 != 0x01) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
req->error_loc =
offsetof(struct nvmf_auth_send_command, spsp1);
goto done;
}
tl = le32_to_cpu(req->cmd->auth_send.tl);
if (!tl) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
req->error_loc =
offsetof(struct nvmf_auth_send_command, tl);
goto done;
@@ -437,26 +437,26 @@ void nvmet_execute_auth_receive(struct nvmet_req *req)
u16 status = 0;
if (req->cmd->auth_receive.secp != NVME_AUTH_DHCHAP_PROTOCOL_IDENTIFIER) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
req->error_loc =
offsetof(struct nvmf_auth_receive_command, secp);
goto done;
}
if (req->cmd->auth_receive.spsp0 != 0x01) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
req->error_loc =
offsetof(struct nvmf_auth_receive_command, spsp0);
goto done;
}
if (req->cmd->auth_receive.spsp1 != 0x01) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
req->error_loc =
offsetof(struct nvmf_auth_receive_command, spsp1);
goto done;
}
al = le32_to_cpu(req->cmd->auth_receive.al);
if (!al) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
req->error_loc =
offsetof(struct nvmf_auth_receive_command, al);
goto done;
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index 69d77d34bec1..c4b2eddd5666 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -18,7 +18,7 @@ static void nvmet_execute_prop_set(struct nvmet_req *req)
if (req->cmd->prop_set.attrib & 1) {
req->error_loc =
offsetof(struct nvmf_property_set_command, attrib);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto out;
}
@@ -29,7 +29,7 @@ static void nvmet_execute_prop_set(struct nvmet_req *req)
default:
req->error_loc =
offsetof(struct nvmf_property_set_command, offset);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
}
out:
nvmet_req_complete(req, status);
@@ -50,7 +50,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req)
val = ctrl->cap;
break;
default:
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
break;
}
} else {
@@ -65,7 +65,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req)
val = ctrl->csts;
break;
default:
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
break;
}
}
@@ -105,7 +105,7 @@ u16 nvmet_parse_fabrics_admin_cmd(struct nvmet_req *req)
pr_debug("received unknown capsule type 0x%x\n",
cmd->fabrics.fctype);
req->error_loc = offsetof(struct nvmf_common_command, fctype);
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
}
return 0;
@@ -128,7 +128,7 @@ u16 nvmet_parse_fabrics_io_cmd(struct nvmet_req *req)
pr_debug("received unknown capsule type 0x%x\n",
cmd->fabrics.fctype);
req->error_loc = offsetof(struct nvmf_common_command, fctype);
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
}
return 0;
@@ -147,14 +147,14 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
pr_warn("queue size zero!\n");
req->error_loc = offsetof(struct nvmf_connect_command, sqsize);
req->cqe->result.u32 = IPO_IATTR_CONNECT_SQE(sqsize);
- ret = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+ ret = NVME_SC_CONNECT_INVALID_PARAM | NVME_STATUS_DNR;
goto err;
}
if (ctrl->sqs[qid] != NULL) {
pr_warn("qid %u has already been created\n", qid);
req->error_loc = offsetof(struct nvmf_connect_command, qid);
- return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
+ return NVME_SC_CMD_SEQ_ERROR | NVME_STATUS_DNR;
}
/* for fabrics, this value applies to only the I/O Submission Queues */
@@ -163,14 +163,14 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
sqsize, mqes, ctrl->cntlid);
req->error_loc = offsetof(struct nvmf_connect_command, sqsize);
req->cqe->result.u32 = IPO_IATTR_CONNECT_SQE(sqsize);
- return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+ return NVME_SC_CONNECT_INVALID_PARAM | NVME_STATUS_DNR;
}
old = cmpxchg(&req->sq->ctrl, NULL, ctrl);
if (old) {
pr_warn("queue already connected!\n");
req->error_loc = offsetof(struct nvmf_connect_command, opcode);
- return NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
+ return NVME_SC_CONNECT_CTRL_BUSY | NVME_STATUS_DNR;
}
/* note: convert queue size from 0's-based value to 1's-based value */
@@ -230,14 +230,14 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
pr_warn("invalid connect version (%d).\n",
le16_to_cpu(c->recfmt));
req->error_loc = offsetof(struct nvmf_connect_command, recfmt);
- status = NVME_SC_CONNECT_FORMAT | NVME_SC_DNR;
+ status = NVME_SC_CONNECT_FORMAT | NVME_STATUS_DNR;
goto out;
}
if (unlikely(d->cntlid != cpu_to_le16(0xffff))) {
pr_warn("connect attempt for invalid controller ID %#x\n",
d->cntlid);
- status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+ status = NVME_SC_CONNECT_INVALID_PARAM | NVME_STATUS_DNR;
req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
goto out;
}
@@ -257,7 +257,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
dhchap_status);
nvmet_ctrl_put(ctrl);
if (dhchap_status == NVME_AUTH_DHCHAP_FAILURE_FAILED)
- status = (NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR);
+ status = (NVME_SC_CONNECT_INVALID_HOST | NVME_STATUS_DNR);
else
status = NVME_SC_INTERNAL;
goto out;
@@ -305,7 +305,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
if (c->recfmt != 0) {
pr_warn("invalid connect version (%d).\n",
le16_to_cpu(c->recfmt));
- status = NVME_SC_CONNECT_FORMAT | NVME_SC_DNR;
+ status = NVME_SC_CONNECT_FORMAT | NVME_STATUS_DNR;
goto out;
}
@@ -314,13 +314,13 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
ctrl = nvmet_ctrl_find_get(d->subsysnqn, d->hostnqn,
le16_to_cpu(d->cntlid), req);
if (!ctrl) {
- status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+ status = NVME_SC_CONNECT_INVALID_PARAM | NVME_STATUS_DNR;
goto out;
}
if (unlikely(qid > ctrl->subsys->max_qid)) {
pr_warn("invalid queue id (%d)\n", qid);
- status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+ status = NVME_SC_CONNECT_INVALID_PARAM | NVME_STATUS_DNR;
req->cqe->result.u32 = IPO_IATTR_CONNECT_SQE(qid);
goto out_ctrl_put;
}
@@ -350,13 +350,13 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req)
pr_debug("invalid command 0x%x on unconnected queue.\n",
cmd->fabrics.opcode);
req->error_loc = offsetof(struct nvme_common_command, opcode);
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
}
if (cmd->fabrics.fctype != nvme_fabrics_type_connect) {
pr_debug("invalid capsule type 0x%x on unconnected queue.\n",
cmd->fabrics.fctype);
req->error_loc = offsetof(struct nvmf_common_command, fctype);
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
}
if (cmd->connect.qid == 0)
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 381b4394731f..3ef4beacde32 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -2934,6 +2934,38 @@ nvmet_fc_discovery_chg(struct nvmet_port *port)
tgtport->ops->discovery_event(&tgtport->fc_target_port);
}
+static ssize_t
+nvmet_fc_host_traddr(struct nvmet_ctrl *ctrl,
+ char *traddr, size_t traddr_size)
+{
+ struct nvmet_sq *sq = ctrl->sqs[0];
+ struct nvmet_fc_tgt_queue *queue =
+ container_of(sq, struct nvmet_fc_tgt_queue, nvme_sq);
+ struct nvmet_fc_tgtport *tgtport = queue->assoc ? queue->assoc->tgtport : NULL;
+ struct nvmet_fc_hostport *hostport = queue->assoc ? queue->assoc->hostport : NULL;
+ u64 wwnn, wwpn;
+ ssize_t ret = 0;
+
+ if (!tgtport || !nvmet_fc_tgtport_get(tgtport))
+ return -ENODEV;
+ if (!hostport || !nvmet_fc_hostport_get(hostport)) {
+ ret = -ENODEV;
+ goto out_put;
+ }
+
+ if (tgtport->ops->host_traddr) {
+ ret = tgtport->ops->host_traddr(hostport->hosthandle, &wwnn, &wwpn);
+ if (ret)
+ goto out_put_host;
+ ret = snprintf(traddr, traddr_size, "nn-0x%llx:pn-0x%llx", wwnn, wwpn);
+ }
+out_put_host:
+ nvmet_fc_hostport_put(hostport);
+out_put:
+ nvmet_fc_tgtport_put(tgtport);
+ return ret;
+}
+
static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = {
.owner = THIS_MODULE,
.type = NVMF_TRTYPE_FC,
@@ -2943,6 +2975,7 @@ static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = {
.queue_response = nvmet_fc_fcp_nvme_cmd_done,
.delete_ctrl = nvmet_fc_delete_ctrl,
.discovery_chg = nvmet_fc_discovery_chg,
+ .host_traddr = nvmet_fc_host_traddr,
};
static int __init nvmet_fc_init_module(void)
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 913cd2ec7a6f..e1abb27927ff 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -492,6 +492,16 @@ fcloop_t2h_host_release(void *hosthandle)
/* host handle ignored for now */
}
+static int
+fcloop_t2h_host_traddr(void *hosthandle, u64 *wwnn, u64 *wwpn)
+{
+ struct fcloop_rport *rport = hosthandle;
+
+ *wwnn = rport->lport->localport->node_name;
+ *wwpn = rport->lport->localport->port_name;
+ return 0;
+}
+
/*
* Simulate reception of RSCN and converting it to a initiator transport
* call to rescan a remote port.
@@ -1074,6 +1084,7 @@ static struct nvmet_fc_target_template tgttemplate = {
.ls_req = fcloop_t2h_ls_req,
.ls_abort = fcloop_t2h_ls_abort,
.host_release = fcloop_t2h_host_release,
+ .host_traddr = fcloop_t2h_host_traddr,
.max_hw_queues = FCLOOP_HW_QUEUES,
.max_sgl_segments = FCLOOP_SGL_SEGS,
.max_dif_sgl_segments = FCLOOP_SGL_SEGS,
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 6426aac2634a..0bda83d0fc3e 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -61,15 +61,17 @@ static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns)
{
struct blk_integrity *bi = bdev_get_integrity(ns->bdev);
- if (bi) {
+ if (!bi)
+ return;
+
+ if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC) {
ns->metadata_size = bi->tuple_size;
- if (bi->profile == &t10_pi_type1_crc)
+ if (bi->flags & BLK_INTEGRITY_REF_TAG)
ns->pi_type = NVME_NS_DPS_PI_TYPE1;
- else if (bi->profile == &t10_pi_type3_crc)
- ns->pi_type = NVME_NS_DPS_PI_TYPE3;
else
- /* Unsupported metadata type */
- ns->metadata_size = 0;
+ ns->pi_type = NVME_NS_DPS_PI_TYPE3;
+ } else {
+ ns->metadata_size = 0;
}
}
@@ -102,7 +104,7 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
ns->pi_type = 0;
ns->metadata_size = 0;
- if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10))
+ if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
nvmet_bdev_ns_enable_integrity(ns);
if (bdev_is_zoned(ns->bdev)) {
@@ -135,11 +137,11 @@ u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
*/
switch (blk_sts) {
case BLK_STS_NOSPC:
- status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
+ status = NVME_SC_CAP_EXCEEDED | NVME_STATUS_DNR;
req->error_loc = offsetof(struct nvme_rw_command, length);
break;
case BLK_STS_TARGET:
- status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
+ status = NVME_SC_LBA_RANGE | NVME_STATUS_DNR;
req->error_loc = offsetof(struct nvme_rw_command, slba);
break;
case BLK_STS_NOTSUPP:
@@ -147,10 +149,10 @@ u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
switch (req->cmd->common.opcode) {
case nvme_cmd_dsm:
case nvme_cmd_write_zeroes:
- status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR;
+ status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_STATUS_DNR;
break;
default:
- status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ status = NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
}
break;
case BLK_STS_MEDIUM:
@@ -159,7 +161,7 @@ u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
break;
case BLK_STS_IOERR:
default:
- status = NVME_SC_INTERNAL | NVME_SC_DNR;
+ status = NVME_SC_INTERNAL | NVME_STATUS_DNR;
req->error_loc = offsetof(struct nvme_common_command, opcode);
}
@@ -356,7 +358,7 @@ u16 nvmet_bdev_flush(struct nvmet_req *req)
return 0;
if (blkdev_issue_flush(req->ns->bdev))
- return NVME_SC_INTERNAL | NVME_SC_DNR;
+ return NVME_SC_INTERNAL | NVME_STATUS_DNR;
return 0;
}
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index e589915ddef8..e32790d8fc26 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -555,6 +555,10 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
goto out;
}
+ ret = nvme_add_ctrl(&ctrl->ctrl);
+ if (ret)
+ goto out_put_ctrl;
+
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
WARN_ON_ONCE(1);
@@ -611,6 +615,7 @@ out_free_queues:
kfree(ctrl->queues);
out_uninit_ctrl:
nvme_uninit_ctrl(&ctrl->ctrl);
+out_put_ctrl:
nvme_put_ctrl(&ctrl->ctrl);
out:
if (ret > 0)
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 2f22b07eab29..190f55e6d753 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -230,7 +230,9 @@ struct nvmet_ctrl {
struct device *p2p_client;
struct radix_tree_root p2p_ns_map;
-
+#ifdef CONFIG_NVME_TARGET_DEBUGFS
+ struct dentry *debugfs_dir;
+#endif
spinlock_t error_lock;
u64 err_counter;
struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS];
@@ -262,7 +264,9 @@ struct nvmet_subsys {
struct list_head hosts;
bool allow_any_host;
-
+#ifdef CONFIG_NVME_TARGET_DEBUGFS
+ struct dentry *debugfs_dir;
+#endif
u16 max_qid;
u64 ver;
@@ -350,6 +354,8 @@ struct nvmet_fabrics_ops {
void (*delete_ctrl)(struct nvmet_ctrl *ctrl);
void (*disc_traddr)(struct nvmet_req *req,
struct nvmet_port *port, char *traddr);
+ ssize_t (*host_traddr)(struct nvmet_ctrl *ctrl,
+ char *traddr, size_t traddr_len);
u16 (*install_queue)(struct nvmet_sq *nvme_sq);
void (*discovery_chg)(struct nvmet_port *port);
u8 (*get_mdts)(const struct nvmet_ctrl *ctrl);
@@ -498,6 +504,8 @@ struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn,
struct nvmet_req *req);
void nvmet_ctrl_put(struct nvmet_ctrl *ctrl);
u16 nvmet_check_ctrl_status(struct nvmet_req *req);
+ssize_t nvmet_ctrl_host_traddr(struct nvmet_ctrl *ctrl,
+ char *traddr, size_t traddr_len);
struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
enum nvme_subsys_type type);
diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index f003782d4ecf..24d0e2418d2e 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -306,7 +306,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
ns = nvme_find_get_ns(ctrl, nsid);
if (unlikely(!ns)) {
pr_err("failed to get passthru ns nsid:%u\n", nsid);
- status = NVME_SC_INVALID_NS | NVME_SC_DNR;
+ status = NVME_SC_INVALID_NS | NVME_STATUS_DNR;
goto out;
}
@@ -426,7 +426,7 @@ u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req)
* emulated in the future if regular targets grow support for
* this feature.
*/
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
}
return nvmet_setup_passthru_command(req);
@@ -478,7 +478,7 @@ static u16 nvmet_passthru_get_set_features(struct nvmet_req *req)
case NVME_FEAT_RESV_PERSIST:
/* No reservations, see nvmet_parse_passthru_io_cmd() */
default:
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
}
}
@@ -546,7 +546,7 @@ u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
req->p.use_workqueue = true;
return NVME_SC_SUCCESS;
}
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
case NVME_ID_CNS_NS:
req->execute = nvmet_passthru_execute_cmd;
req->p.use_workqueue = true;
@@ -558,7 +558,7 @@ u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
req->p.use_workqueue = true;
return NVME_SC_SUCCESS;
}
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
default:
return nvmet_setup_passthru_command(req);
}
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 689bb5d3cfdc..1eff8ca6a5f1 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -852,12 +852,12 @@ static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp)
if (!nvme_is_write(rsp->req.cmd)) {
rsp->req.error_loc =
offsetof(struct nvme_common_command, opcode);
- return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
}
if (off + len > rsp->queue->dev->inline_data_size) {
pr_err("invalid inline data offset!\n");
- return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR;
+ return NVME_SC_SGL_INVALID_OFFSET | NVME_STATUS_DNR;
}
/* no data command? */
@@ -919,7 +919,7 @@ static u16 nvmet_rdma_map_sgl(struct nvmet_rdma_rsp *rsp)
pr_err("invalid SGL subtype: %#x\n", sgl->type);
rsp->req.error_loc =
offsetof(struct nvme_common_command, dptr);
- return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
}
case NVME_KEY_SGL_FMT_DATA_DESC:
switch (sgl->type & 0xf) {
@@ -931,12 +931,12 @@ static u16 nvmet_rdma_map_sgl(struct nvmet_rdma_rsp *rsp)
pr_err("invalid SGL subtype: %#x\n", sgl->type);
rsp->req.error_loc =
offsetof(struct nvme_common_command, dptr);
- return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
}
default:
pr_err("invalid SGL type: %#x\n", sgl->type);
rsp->req.error_loc = offsetof(struct nvme_common_command, dptr);
- return NVME_SC_SGL_INVALID_TYPE | NVME_SC_DNR;
+ return NVME_SC_SGL_INVALID_TYPE | NVME_STATUS_DNR;
}
}
@@ -2000,6 +2000,17 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
}
}
+static ssize_t nvmet_rdma_host_port_addr(struct nvmet_ctrl *ctrl,
+ char *traddr, size_t traddr_len)
+{
+ struct nvmet_sq *nvme_sq = ctrl->sqs[0];
+ struct nvmet_rdma_queue *queue =
+ container_of(nvme_sq, struct nvmet_rdma_queue, nvme_sq);
+
+ return snprintf(traddr, traddr_len, "%pISc",
+ (struct sockaddr *)&queue->cm_id->route.addr.dst_addr);
+}
+
static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl)
{
if (ctrl->pi_support)
@@ -2024,6 +2035,7 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
.queue_response = nvmet_rdma_queue_response,
.delete_ctrl = nvmet_rdma_delete_ctrl,
.disc_traddr = nvmet_rdma_disc_port_addr,
+ .host_traddr = nvmet_rdma_host_port_addr,
.get_mdts = nvmet_rdma_get_mdts,
.get_max_queue_size = nvmet_rdma_get_max_queue_size,
};
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 380f22ee3ebb..5bff0d5464d1 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -416,10 +416,10 @@ static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd)
if (sgl->type == ((NVME_SGL_FMT_DATA_DESC << 4) |
NVME_SGL_FMT_OFFSET)) {
if (!nvme_is_write(cmd->req.cmd))
- return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
if (len > cmd->req.port->inline_data_size)
- return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR;
+ return NVME_SC_SGL_INVALID_OFFSET | NVME_STATUS_DNR;
cmd->pdu_len = len;
}
cmd->req.transfer_len += len;
@@ -2167,6 +2167,19 @@ static void nvmet_tcp_disc_port_addr(struct nvmet_req *req,
}
}
+static ssize_t nvmet_tcp_host_port_addr(struct nvmet_ctrl *ctrl,
+ char *traddr, size_t traddr_len)
+{
+ struct nvmet_sq *sq = ctrl->sqs[0];
+ struct nvmet_tcp_queue *queue =
+ container_of(sq, struct nvmet_tcp_queue, nvme_sq);
+
+ if (queue->sockaddr_peer.ss_family == AF_UNSPEC)
+ return -EINVAL;
+ return snprintf(traddr, traddr_len, "%pISc",
+ (struct sockaddr *)&queue->sockaddr_peer);
+}
+
static const struct nvmet_fabrics_ops nvmet_tcp_ops = {
.owner = THIS_MODULE,
.type = NVMF_TRTYPE_TCP,
@@ -2177,6 +2190,7 @@ static const struct nvmet_fabrics_ops nvmet_tcp_ops = {
.delete_ctrl = nvmet_tcp_delete_ctrl,
.install_queue = nvmet_tcp_install_queue,
.disc_traddr = nvmet_tcp_disc_port_addr,
+ .host_traddr = nvmet_tcp_host_port_addr,
};
static int __init nvmet_tcp_init(void)
diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
index 0021d06041c1..af9e13be7678 100644
--- a/drivers/nvme/target/zns.c
+++ b/drivers/nvme/target/zns.c
@@ -100,7 +100,7 @@ void nvmet_execute_identify_ns_zns(struct nvmet_req *req)
if (le32_to_cpu(req->cmd->identify.nsid) == NVME_NSID_ALL) {
req->error_loc = offsetof(struct nvme_identify, nsid);
- status = NVME_SC_INVALID_NS | NVME_SC_DNR;
+ status = NVME_SC_INVALID_NS | NVME_STATUS_DNR;
goto out;
}
@@ -121,7 +121,7 @@ void nvmet_execute_identify_ns_zns(struct nvmet_req *req)
}
if (!bdev_is_zoned(req->ns->bdev)) {
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
req->error_loc = offsetof(struct nvme_identify, nsid);
goto out;
}
@@ -158,17 +158,17 @@ static u16 nvmet_bdev_validate_zone_mgmt_recv(struct nvmet_req *req)
if (sect >= get_capacity(req->ns->bdev->bd_disk)) {
req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, slba);
- return NVME_SC_LBA_RANGE | NVME_SC_DNR;
+ return NVME_SC_LBA_RANGE | NVME_STATUS_DNR;
}
if (out_bufsize < sizeof(struct nvme_zone_report)) {
req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, numd);
- return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
}
if (req->cmd->zmr.zra != NVME_ZRA_ZONE_REPORT) {
req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, zra);
- return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
}
switch (req->cmd->zmr.pr) {
@@ -177,7 +177,7 @@ static u16 nvmet_bdev_validate_zone_mgmt_recv(struct nvmet_req *req)
break;
default:
req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, pr);
- return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
}
switch (req->cmd->zmr.zrasf) {
@@ -193,7 +193,7 @@ static u16 nvmet_bdev_validate_zone_mgmt_recv(struct nvmet_req *req)
default:
req->error_loc =
offsetof(struct nvme_zone_mgmt_recv_cmd, zrasf);
- return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
}
return NVME_SC_SUCCESS;
@@ -341,7 +341,7 @@ static u16 blkdev_zone_mgmt_errno_to_nvme_status(int ret)
return NVME_SC_SUCCESS;
case -EINVAL:
case -EIO:
- return NVME_SC_ZONE_INVALID_TRANSITION | NVME_SC_DNR;
+ return NVME_SC_ZONE_INVALID_TRANSITION | NVME_STATUS_DNR;
default:
return NVME_SC_INTERNAL;
}
@@ -463,7 +463,7 @@ static u16 nvmet_bdev_execute_zmgmt_send_all(struct nvmet_req *req)
default:
/* this is needed to quiet compiler warning */
req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, zsa);
- return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
}
return NVME_SC_SUCCESS;
@@ -481,7 +481,7 @@ static void nvmet_bdev_zmgmt_send_work(struct work_struct *w)
if (op == REQ_OP_LAST) {
req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, zsa);
- status = NVME_SC_ZONE_INVALID_TRANSITION | NVME_SC_DNR;
+ status = NVME_SC_ZONE_INVALID_TRANSITION | NVME_STATUS_DNR;
goto out;
}
@@ -493,13 +493,13 @@ static void nvmet_bdev_zmgmt_send_work(struct work_struct *w)
if (sect >= get_capacity(bdev->bd_disk)) {
req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, slba);
- status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
+ status = NVME_SC_LBA_RANGE | NVME_STATUS_DNR;
goto out;
}
if (sect & (zone_sectors - 1)) {
req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, slba);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto out;
}
@@ -551,13 +551,13 @@ void nvmet_bdev_execute_zone_append(struct nvmet_req *req)
if (sect >= get_capacity(req->ns->bdev->bd_disk)) {
req->error_loc = offsetof(struct nvme_rw_command, slba);
- status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
+ status = NVME_SC_LBA_RANGE | NVME_STATUS_DNR;
goto out;
}
if (sect & (bdev_zone_sectors(req->ns->bdev) - 1)) {
req->error_loc = offsetof(struct nvme_rw_command, slba);
- status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
goto out;
}
@@ -590,7 +590,7 @@ void nvmet_bdev_execute_zone_append(struct nvmet_req *req)
}
if (total_len != nvmet_rw_data_len(req)) {
- status = NVME_SC_INTERNAL | NVME_SC_DNR;
+ status = NVME_SC_INTERNAL | NVME_STATUS_DNR;
goto out_put_bio;
}
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index 4533dd055ca8..1aa426b1dedd 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -68,7 +68,6 @@ int dasd_gendisk_alloc(struct dasd_block *block)
blk_mq_free_tag_set(&block->tag_set);
return PTR_ERR(gdp);
}
- blk_queue_flag_set(QUEUE_FLAG_NONROT, gdp->queue);
/* Initialize gendisk structure. */
gdp->major = DASD_MAJOR;
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 6d1689a2717e..d5a5d11ae0dc 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -548,6 +548,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
{
struct queue_limits lim = {
.logical_block_size = 4096,
+ .features = BLK_FEAT_DAX,
};
int rc, i, j, num_of_segments;
struct dcssblk_dev_info *dev_info;
@@ -643,7 +644,6 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
dev_info->gd->fops = &dcssblk_devops;
dev_info->gd->private_data = dev_info;
dev_info->gd->flags |= GENHD_FL_NO_PART;
- blk_queue_flag_set(QUEUE_FLAG_DAX, dev_info->gd->queue);
seg_byte_size = (dev_info->end - dev_info->start + 1);
set_capacity(dev_info->gd, seg_byte_size >> 9); // size in sectors
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 1d456a5a3bfb..3fcfe029db1b 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -439,7 +439,6 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
.logical_block_size = 1 << 12,
};
unsigned int devindex;
- struct request_queue *rq;
int len, ret;
lim.max_segments = min(scmdev->nr_max_block,
@@ -474,10 +473,6 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
ret = PTR_ERR(bdev->gendisk);
goto out_tag;
}
- rq = bdev->rq = bdev->gendisk->queue;
- blk_queue_flag_set(QUEUE_FLAG_NONROT, rq);
- blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, rq);
-
bdev->gendisk->private_data = scmdev;
bdev->gendisk->fops = &scm_blk_devops;
bdev->gendisk->major = scm_major;
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 065db86d6021..37c24ffea65c 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -82,7 +82,6 @@ comment "SCSI support type (disk, tape, CD-ROM)"
config BLK_DEV_SD
tristate "SCSI disk support"
depends on SCSI
- select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY
help
If you want to use SCSI hard disks, Fibre Channel disks,
Serial ATA (SATA) or Parallel ATA (PATA) hard disks,
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 60688f18fac6..c708e1059638 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -1057,15 +1057,15 @@ static umode_t iscsi_sw_tcp_attr_is_visible(int param_type, int param)
return 0;
}
-static int iscsi_sw_tcp_slave_configure(struct scsi_device *sdev)
+static int iscsi_sw_tcp_device_configure(struct scsi_device *sdev,
+ struct queue_limits *lim)
{
struct iscsi_sw_tcp_host *tcp_sw_host = iscsi_host_priv(sdev->host);
struct iscsi_session *session = tcp_sw_host->session;
struct iscsi_conn *conn = session->leadconn;
if (conn->datadgst_en)
- blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES,
- sdev->request_queue);
+ lim->features |= BLK_FEAT_STABLE_WRITES;
return 0;
}
@@ -1083,7 +1083,7 @@ static const struct scsi_host_template iscsi_sw_tcp_sht = {
.eh_device_reset_handler= iscsi_eh_device_reset,
.eh_target_reset_handler = iscsi_eh_recover_target,
.dma_boundary = PAGE_SIZE - 1,
- .slave_configure = iscsi_sw_tcp_slave_configure,
+ .device_configure = iscsi_sw_tcp_device_configure,
.proc_name = "iscsi_tcp",
.this_id = -1,
.track_queue_depth = 1,
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 5297cacc8beb..0cef5d089f34 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -1363,6 +1363,16 @@ lpfc_nvmet_ls_abort(struct nvmet_fc_target_port *targetport,
atomic_inc(&lpfc_nvmet->xmt_ls_abort);
}
+static int
+lpfc_nvmet_host_traddr(void *hosthandle, u64 *wwnn, u64 *wwpn)
+{
+ struct lpfc_nodelist *ndlp = hosthandle;
+
+ *wwnn = wwn_to_u64(ndlp->nlp_nodename.u.wwn);
+ *wwpn = wwn_to_u64(ndlp->nlp_portname.u.wwn);
+ return 0;
+}
+
static void
lpfc_nvmet_host_release(void *hosthandle)
{
@@ -1413,6 +1423,7 @@ static struct nvmet_fc_target_template lpfc_tgttemplate = {
.ls_req = lpfc_nvmet_ls_req,
.ls_abort = lpfc_nvmet_ls_abort,
.host_release = lpfc_nvmet_host_release,
+ .host_traddr = lpfc_nvmet_host_traddr,
.max_hw_queues = 1,
.max_sgl_segments = LPFC_NVMET_DEFAULT_SEGS,
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 88acefbf9aea..6c79c350a4d5 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -1981,8 +1981,6 @@ megasas_set_nvme_device_properties(struct scsi_device *sdev,
lim->max_hw_sectors = max_io_size / 512;
lim->virt_boundary_mask = mr_nvme_pg_size - 1;
-
- blk_queue_flag_set(QUEUE_FLAG_NOMERGES, sdev->request_queue);
}
/*
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 870ec2cb4af4..97c2472cd434 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -2680,12 +2680,6 @@ scsih_device_configure(struct scsi_device *sdev, struct queue_limits *lim)
pcie_device_put(pcie_device);
spin_unlock_irqrestore(&ioc->pcie_device_lock, flags);
mpt3sas_scsih_change_queue_depth(sdev, qdepth);
- /* Enable QUEUE_FLAG_NOMERGES flag, so that IOs won't be
- ** merged and can eliminate holes created during merging
- ** operation.
- **/
- blk_queue_flag_set(QUEUE_FLAG_NOMERGES,
- sdev->request_queue);
lim->virt_boundary_mask = ioc->page_size - 1;
return 0;
}
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 91f022fb8d0c..a9d8a9c62663 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -69,6 +69,8 @@ static const char *sdebug_version_date = "20210520";
/* Additional Sense Code (ASC) */
#define NO_ADDITIONAL_SENSE 0x0
+#define OVERLAP_ATOMIC_COMMAND_ASC 0x0
+#define OVERLAP_ATOMIC_COMMAND_ASCQ 0x23
#define LOGICAL_UNIT_NOT_READY 0x4
#define LOGICAL_UNIT_COMMUNICATION_FAILURE 0x8
#define UNRECOVERED_READ_ERR 0x11
@@ -103,6 +105,7 @@ static const char *sdebug_version_date = "20210520";
#define READ_BOUNDARY_ASCQ 0x7
#define ATTEMPT_ACCESS_GAP 0x9
#define INSUFF_ZONE_ASCQ 0xe
+/* see drivers/scsi/sense_codes.h */
/* Additional Sense Code Qualifier (ASCQ) */
#define ACK_NAK_TO 0x3
@@ -152,6 +155,12 @@ static const char *sdebug_version_date = "20210520";
#define DEF_VIRTUAL_GB 0
#define DEF_VPD_USE_HOSTNO 1
#define DEF_WRITESAME_LENGTH 0xFFFF
+#define DEF_ATOMIC_WR 0
+#define DEF_ATOMIC_WR_MAX_LENGTH 8192
+#define DEF_ATOMIC_WR_ALIGN 2
+#define DEF_ATOMIC_WR_GRAN 2
+#define DEF_ATOMIC_WR_MAX_LENGTH_BNDRY (DEF_ATOMIC_WR_MAX_LENGTH)
+#define DEF_ATOMIC_WR_MAX_BNDRY 128
#define DEF_STRICT 0
#define DEF_STATISTICS false
#define DEF_SUBMIT_QUEUES 1
@@ -374,7 +383,9 @@ struct sdebug_host_info {
/* There is an xarray of pointers to this struct's objects, one per host */
struct sdeb_store_info {
- rwlock_t macc_lck; /* for atomic media access on this store */
+ rwlock_t macc_data_lck; /* for media data access on this store */
+ rwlock_t macc_meta_lck; /* for atomic media meta access on this store */
+ rwlock_t macc_sector_lck; /* per-sector media data access on this store */
u8 *storep; /* user data storage (ram) */
struct t10_pi_tuple *dif_storep; /* protection info */
void *map_storep; /* provisioning map */
@@ -398,12 +409,20 @@ struct sdebug_defer {
enum sdeb_defer_type defer_t;
};
+struct sdebug_device_access_info {
+ bool atomic_write;
+ u64 lba;
+ u32 num;
+ struct scsi_cmnd *self;
+};
+
struct sdebug_queued_cmd {
/* corresponding bit set in in_use_bm[] in owning struct sdebug_queue
* instance indicates this slot is in use.
*/
struct sdebug_defer sd_dp;
struct scsi_cmnd *scmd;
+ struct sdebug_device_access_info *i;
};
struct sdebug_scsi_cmd {
@@ -463,7 +482,8 @@ enum sdeb_opcode_index {
SDEB_I_PRE_FETCH = 29, /* 10, 16 */
SDEB_I_ZONE_OUT = 30, /* 0x94+SA; includes no data xfer */
SDEB_I_ZONE_IN = 31, /* 0x95+SA; all have data-in */
- SDEB_I_LAST_ELEM_P1 = 32, /* keep this last (previous + 1) */
+ SDEB_I_ATOMIC_WRITE_16 = 32,
+ SDEB_I_LAST_ELEM_P1 = 33, /* keep this last (previous + 1) */
};
@@ -497,7 +517,8 @@ static const unsigned char opcode_ind_arr[256] = {
0, 0, 0, SDEB_I_VERIFY,
SDEB_I_PRE_FETCH, SDEB_I_SYNC_CACHE, 0, SDEB_I_WRITE_SAME,
SDEB_I_ZONE_OUT, SDEB_I_ZONE_IN, 0, 0,
- 0, 0, 0, 0, 0, 0, SDEB_I_SERV_ACT_IN_16, SDEB_I_SERV_ACT_OUT_16,
+ 0, 0, 0, 0,
+ SDEB_I_ATOMIC_WRITE_16, 0, SDEB_I_SERV_ACT_IN_16, SDEB_I_SERV_ACT_OUT_16,
/* 0xa0; 0xa0->0xbf: 12 byte cdbs */
SDEB_I_REPORT_LUNS, SDEB_I_ATA_PT, 0, SDEB_I_MAINT_IN,
SDEB_I_MAINT_OUT, 0, 0, 0,
@@ -547,6 +568,7 @@ static int resp_write_buffer(struct scsi_cmnd *, struct sdebug_dev_info *);
static int resp_sync_cache(struct scsi_cmnd *, struct sdebug_dev_info *);
static int resp_pre_fetch(struct scsi_cmnd *, struct sdebug_dev_info *);
static int resp_report_zones(struct scsi_cmnd *, struct sdebug_dev_info *);
+static int resp_atomic_write(struct scsi_cmnd *, struct sdebug_dev_info *);
static int resp_open_zone(struct scsi_cmnd *, struct sdebug_dev_info *);
static int resp_close_zone(struct scsi_cmnd *, struct sdebug_dev_info *);
static int resp_finish_zone(struct scsi_cmnd *, struct sdebug_dev_info *);
@@ -788,6 +810,11 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEM_P1 + 1] = {
resp_report_zones, zone_in_iarr, /* ZONE_IN(16), REPORT ZONES) */
{16, 0x0 /* SA */, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xbf, 0xc7} },
+/* 31 */
+ {0, 0x0, 0x0, F_D_OUT | FF_MEDIA_IO,
+ resp_atomic_write, NULL, /* ATOMIC WRITE 16 */
+ {16, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff} },
/* sentinel */
{0xff, 0, 0, 0, NULL, NULL, /* terminating element */
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
@@ -835,6 +862,13 @@ static unsigned int sdebug_unmap_granularity = DEF_UNMAP_GRANULARITY;
static unsigned int sdebug_unmap_max_blocks = DEF_UNMAP_MAX_BLOCKS;
static unsigned int sdebug_unmap_max_desc = DEF_UNMAP_MAX_DESC;
static unsigned int sdebug_write_same_length = DEF_WRITESAME_LENGTH;
+static unsigned int sdebug_atomic_wr = DEF_ATOMIC_WR;
+static unsigned int sdebug_atomic_wr_max_length = DEF_ATOMIC_WR_MAX_LENGTH;
+static unsigned int sdebug_atomic_wr_align = DEF_ATOMIC_WR_ALIGN;
+static unsigned int sdebug_atomic_wr_gran = DEF_ATOMIC_WR_GRAN;
+static unsigned int sdebug_atomic_wr_max_length_bndry =
+ DEF_ATOMIC_WR_MAX_LENGTH_BNDRY;
+static unsigned int sdebug_atomic_wr_max_bndry = DEF_ATOMIC_WR_MAX_BNDRY;
static int sdebug_uuid_ctl = DEF_UUID_CTL;
static bool sdebug_random = DEF_RANDOM;
static bool sdebug_per_host_store = DEF_PER_HOST_STORE;
@@ -1192,6 +1226,11 @@ static inline bool scsi_debug_lbp(void)
(sdebug_lbpu || sdebug_lbpws || sdebug_lbpws10);
}
+static inline bool scsi_debug_atomic_write(void)
+{
+ return sdebug_fake_rw == 0 && sdebug_atomic_wr;
+}
+
static void *lba2fake_store(struct sdeb_store_info *sip,
unsigned long long lba)
{
@@ -1819,6 +1858,14 @@ static int inquiry_vpd_b0(unsigned char *arr)
/* Maximum WRITE SAME Length */
put_unaligned_be64(sdebug_write_same_length, &arr[32]);
+ if (sdebug_atomic_wr) {
+ put_unaligned_be32(sdebug_atomic_wr_max_length, &arr[40]);
+ put_unaligned_be32(sdebug_atomic_wr_align, &arr[44]);
+ put_unaligned_be32(sdebug_atomic_wr_gran, &arr[48]);
+ put_unaligned_be32(sdebug_atomic_wr_max_length_bndry, &arr[52]);
+ put_unaligned_be32(sdebug_atomic_wr_max_bndry, &arr[56]);
+ }
+
return 0x3c; /* Mandatory page length for Logical Block Provisioning */
}
@@ -3381,16 +3428,238 @@ static inline struct sdeb_store_info *devip2sip(struct sdebug_dev_info *devip,
return xa_load(per_store_ap, devip->sdbg_host->si_idx);
}
+static inline void
+sdeb_read_lock(rwlock_t *lock)
+{
+ if (sdebug_no_rwlock)
+ __acquire(lock);
+ else
+ read_lock(lock);
+}
+
+static inline void
+sdeb_read_unlock(rwlock_t *lock)
+{
+ if (sdebug_no_rwlock)
+ __release(lock);
+ else
+ read_unlock(lock);
+}
+
+static inline void
+sdeb_write_lock(rwlock_t *lock)
+{
+ if (sdebug_no_rwlock)
+ __acquire(lock);
+ else
+ write_lock(lock);
+}
+
+static inline void
+sdeb_write_unlock(rwlock_t *lock)
+{
+ if (sdebug_no_rwlock)
+ __release(lock);
+ else
+ write_unlock(lock);
+}
+
+static inline void
+sdeb_data_read_lock(struct sdeb_store_info *sip)
+{
+ BUG_ON(!sip);
+
+ sdeb_read_lock(&sip->macc_data_lck);
+}
+
+static inline void
+sdeb_data_read_unlock(struct sdeb_store_info *sip)
+{
+ BUG_ON(!sip);
+
+ sdeb_read_unlock(&sip->macc_data_lck);
+}
+
+static inline void
+sdeb_data_write_lock(struct sdeb_store_info *sip)
+{
+ BUG_ON(!sip);
+
+ sdeb_write_lock(&sip->macc_data_lck);
+}
+
+static inline void
+sdeb_data_write_unlock(struct sdeb_store_info *sip)
+{
+ BUG_ON(!sip);
+
+ sdeb_write_unlock(&sip->macc_data_lck);
+}
+
+static inline void
+sdeb_data_sector_read_lock(struct sdeb_store_info *sip)
+{
+ BUG_ON(!sip);
+
+ sdeb_read_lock(&sip->macc_sector_lck);
+}
+
+static inline void
+sdeb_data_sector_read_unlock(struct sdeb_store_info *sip)
+{
+ BUG_ON(!sip);
+
+ sdeb_read_unlock(&sip->macc_sector_lck);
+}
+
+static inline void
+sdeb_data_sector_write_lock(struct sdeb_store_info *sip)
+{
+ BUG_ON(!sip);
+
+ sdeb_write_lock(&sip->macc_sector_lck);
+}
+
+static inline void
+sdeb_data_sector_write_unlock(struct sdeb_store_info *sip)
+{
+ BUG_ON(!sip);
+
+ sdeb_write_unlock(&sip->macc_sector_lck);
+}
+
+/*
+ * Atomic locking:
+ * We simplify the atomic model to allow only 1x atomic write and many non-
+ * atomic reads or writes for all LBAs.
+
+ * A RW lock has a similar bahaviour:
+ * Only 1x writer and many readers.
+
+ * So use a RW lock for per-device read and write locking:
+ * An atomic access grabs the lock as a writer and non-atomic grabs the lock
+ * as a reader.
+ */
+
+static inline void
+sdeb_data_lock(struct sdeb_store_info *sip, bool atomic)
+{
+ if (atomic)
+ sdeb_data_write_lock(sip);
+ else
+ sdeb_data_read_lock(sip);
+}
+
+static inline void
+sdeb_data_unlock(struct sdeb_store_info *sip, bool atomic)
+{
+ if (atomic)
+ sdeb_data_write_unlock(sip);
+ else
+ sdeb_data_read_unlock(sip);
+}
+
+/* Allow many reads but only 1x write per sector */
+static inline void
+sdeb_data_sector_lock(struct sdeb_store_info *sip, bool do_write)
+{
+ if (do_write)
+ sdeb_data_sector_write_lock(sip);
+ else
+ sdeb_data_sector_read_lock(sip);
+}
+
+static inline void
+sdeb_data_sector_unlock(struct sdeb_store_info *sip, bool do_write)
+{
+ if (do_write)
+ sdeb_data_sector_write_unlock(sip);
+ else
+ sdeb_data_sector_read_unlock(sip);
+}
+
+static inline void
+sdeb_meta_read_lock(struct sdeb_store_info *sip)
+{
+ if (sdebug_no_rwlock) {
+ if (sip)
+ __acquire(&sip->macc_meta_lck);
+ else
+ __acquire(&sdeb_fake_rw_lck);
+ } else {
+ if (sip)
+ read_lock(&sip->macc_meta_lck);
+ else
+ read_lock(&sdeb_fake_rw_lck);
+ }
+}
+
+static inline void
+sdeb_meta_read_unlock(struct sdeb_store_info *sip)
+{
+ if (sdebug_no_rwlock) {
+ if (sip)
+ __release(&sip->macc_meta_lck);
+ else
+ __release(&sdeb_fake_rw_lck);
+ } else {
+ if (sip)
+ read_unlock(&sip->macc_meta_lck);
+ else
+ read_unlock(&sdeb_fake_rw_lck);
+ }
+}
+
+static inline void
+sdeb_meta_write_lock(struct sdeb_store_info *sip)
+{
+ if (sdebug_no_rwlock) {
+ if (sip)
+ __acquire(&sip->macc_meta_lck);
+ else
+ __acquire(&sdeb_fake_rw_lck);
+ } else {
+ if (sip)
+ write_lock(&sip->macc_meta_lck);
+ else
+ write_lock(&sdeb_fake_rw_lck);
+ }
+}
+
+static inline void
+sdeb_meta_write_unlock(struct sdeb_store_info *sip)
+{
+ if (sdebug_no_rwlock) {
+ if (sip)
+ __release(&sip->macc_meta_lck);
+ else
+ __release(&sdeb_fake_rw_lck);
+ } else {
+ if (sip)
+ write_unlock(&sip->macc_meta_lck);
+ else
+ write_unlock(&sdeb_fake_rw_lck);
+ }
+}
+
/* Returns number of bytes copied or -1 if error. */
static int do_device_access(struct sdeb_store_info *sip, struct scsi_cmnd *scp,
- u32 sg_skip, u64 lba, u32 num, bool do_write,
- u8 group_number)
+ u32 sg_skip, u64 lba, u32 num, u8 group_number,
+ bool do_write, bool atomic)
{
int ret;
- u64 block, rest = 0;
+ u64 block;
enum dma_data_direction dir;
struct scsi_data_buffer *sdb = &scp->sdb;
u8 *fsp;
+ int i;
+
+ /*
+ * Even though reads are inherently atomic (in this driver), we expect
+ * the atomic flag only for writes.
+ */
+ if (!do_write && atomic)
+ return -1;
if (do_write) {
dir = DMA_TO_DEVICE;
@@ -3410,21 +3679,26 @@ static int do_device_access(struct sdeb_store_info *sip, struct scsi_cmnd *scp,
fsp = sip->storep;
block = do_div(lba, sdebug_store_sectors);
- if (block + num > sdebug_store_sectors)
- rest = block + num - sdebug_store_sectors;
- ret = sg_copy_buffer(sdb->table.sgl, sdb->table.nents,
+ /* Only allow 1x atomic write or multiple non-atomic writes at any given time */
+ sdeb_data_lock(sip, atomic);
+ for (i = 0; i < num; i++) {
+ /* We shouldn't need to lock for atomic writes, but do it anyway */
+ sdeb_data_sector_lock(sip, do_write);
+ ret = sg_copy_buffer(sdb->table.sgl, sdb->table.nents,
fsp + (block * sdebug_sector_size),
- (num - rest) * sdebug_sector_size, sg_skip, do_write);
- if (ret != (num - rest) * sdebug_sector_size)
- return ret;
-
- if (rest) {
- ret += sg_copy_buffer(sdb->table.sgl, sdb->table.nents,
- fsp, rest * sdebug_sector_size,
- sg_skip + ((num - rest) * sdebug_sector_size),
- do_write);
+ sdebug_sector_size, sg_skip, do_write);
+ sdeb_data_sector_unlock(sip, do_write);
+ if (ret != sdebug_sector_size) {
+ ret += (i * sdebug_sector_size);
+ break;
+ }
+ sg_skip += sdebug_sector_size;
+ if (++block >= sdebug_store_sectors)
+ block = 0;
}
+ ret = num * sdebug_sector_size;
+ sdeb_data_unlock(sip, atomic);
return ret;
}
@@ -3600,70 +3874,6 @@ static int prot_verify_read(struct scsi_cmnd *scp, sector_t start_sec,
return ret;
}
-static inline void
-sdeb_read_lock(struct sdeb_store_info *sip)
-{
- if (sdebug_no_rwlock) {
- if (sip)
- __acquire(&sip->macc_lck);
- else
- __acquire(&sdeb_fake_rw_lck);
- } else {
- if (sip)
- read_lock(&sip->macc_lck);
- else
- read_lock(&sdeb_fake_rw_lck);
- }
-}
-
-static inline void
-sdeb_read_unlock(struct sdeb_store_info *sip)
-{
- if (sdebug_no_rwlock) {
- if (sip)
- __release(&sip->macc_lck);
- else
- __release(&sdeb_fake_rw_lck);
- } else {
- if (sip)
- read_unlock(&sip->macc_lck);
- else
- read_unlock(&sdeb_fake_rw_lck);
- }
-}
-
-static inline void
-sdeb_write_lock(struct sdeb_store_info *sip)
-{
- if (sdebug_no_rwlock) {
- if (sip)
- __acquire(&sip->macc_lck);
- else
- __acquire(&sdeb_fake_rw_lck);
- } else {
- if (sip)
- write_lock(&sip->macc_lck);
- else
- write_lock(&sdeb_fake_rw_lck);
- }
-}
-
-static inline void
-sdeb_write_unlock(struct sdeb_store_info *sip)
-{
- if (sdebug_no_rwlock) {
- if (sip)
- __release(&sip->macc_lck);
- else
- __release(&sdeb_fake_rw_lck);
- } else {
- if (sip)
- write_unlock(&sip->macc_lck);
- else
- write_unlock(&sdeb_fake_rw_lck);
- }
-}
-
static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
{
bool check_prot;
@@ -3673,6 +3883,7 @@ static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
u64 lba;
struct sdeb_store_info *sip = devip2sip(devip, true);
u8 *cmd = scp->cmnd;
+ bool meta_data_locked = false;
switch (cmd[0]) {
case READ_16:
@@ -3731,6 +3942,10 @@ static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
atomic_set(&sdeb_inject_pending, 0);
}
+ /*
+ * When checking device access params, for reads we only check data
+ * versus what is set at init time, so no need to lock.
+ */
ret = check_device_access_params(scp, lba, num, false);
if (ret)
return ret;
@@ -3750,29 +3965,33 @@ static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
return check_condition_result;
}
- sdeb_read_lock(sip);
+ if (sdebug_dev_is_zoned(devip) ||
+ (sdebug_dix && scsi_prot_sg_count(scp))) {
+ sdeb_meta_read_lock(sip);
+ meta_data_locked = true;
+ }
/* DIX + T10 DIF */
if (unlikely(sdebug_dix && scsi_prot_sg_count(scp))) {
switch (prot_verify_read(scp, lba, num, ei_lba)) {
case 1: /* Guard tag error */
if (cmd[1] >> 5 != 3) { /* RDPROTECT != 3 */
- sdeb_read_unlock(sip);
+ sdeb_meta_read_unlock(sip);
mk_sense_buffer(scp, ABORTED_COMMAND, 0x10, 1);
return check_condition_result;
} else if (scp->prot_flags & SCSI_PROT_GUARD_CHECK) {
- sdeb_read_unlock(sip);
+ sdeb_meta_read_unlock(sip);
mk_sense_buffer(scp, ILLEGAL_REQUEST, 0x10, 1);
return illegal_condition_result;
}
break;
case 3: /* Reference tag error */
if (cmd[1] >> 5 != 3) { /* RDPROTECT != 3 */
- sdeb_read_unlock(sip);
+ sdeb_meta_read_unlock(sip);
mk_sense_buffer(scp, ABORTED_COMMAND, 0x10, 3);
return check_condition_result;
} else if (scp->prot_flags & SCSI_PROT_REF_CHECK) {
- sdeb_read_unlock(sip);
+ sdeb_meta_read_unlock(sip);
mk_sense_buffer(scp, ILLEGAL_REQUEST, 0x10, 3);
return illegal_condition_result;
}
@@ -3780,8 +3999,9 @@ static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
}
}
- ret = do_device_access(sip, scp, 0, lba, num, false, 0);
- sdeb_read_unlock(sip);
+ ret = do_device_access(sip, scp, 0, lba, num, 0, false, false);
+ if (meta_data_locked)
+ sdeb_meta_read_unlock(sip);
if (unlikely(ret == -1))
return DID_ERROR << 16;
@@ -3971,6 +4191,7 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
u64 lba;
struct sdeb_store_info *sip = devip2sip(devip, true);
u8 *cmd = scp->cmnd;
+ bool meta_data_locked = false;
switch (cmd[0]) {
case WRITE_16:
@@ -4029,10 +4250,17 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
"to DIF device\n");
}
- sdeb_write_lock(sip);
+ if (sdebug_dev_is_zoned(devip) ||
+ (sdebug_dix && scsi_prot_sg_count(scp)) ||
+ scsi_debug_lbp()) {
+ sdeb_meta_write_lock(sip);
+ meta_data_locked = true;
+ }
+
ret = check_device_access_params(scp, lba, num, true);
if (ret) {
- sdeb_write_unlock(sip);
+ if (meta_data_locked)
+ sdeb_meta_write_unlock(sip);
return ret;
}
@@ -4041,22 +4269,22 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
switch (prot_verify_write(scp, lba, num, ei_lba)) {
case 1: /* Guard tag error */
if (scp->prot_flags & SCSI_PROT_GUARD_CHECK) {
- sdeb_write_unlock(sip);
+ sdeb_meta_write_unlock(sip);
mk_sense_buffer(scp, ILLEGAL_REQUEST, 0x10, 1);
return illegal_condition_result;
} else if (scp->cmnd[1] >> 5 != 3) { /* WRPROTECT != 3 */
- sdeb_write_unlock(sip);
+ sdeb_meta_write_unlock(sip);
mk_sense_buffer(scp, ABORTED_COMMAND, 0x10, 1);
return check_condition_result;
}
break;
case 3: /* Reference tag error */
if (scp->prot_flags & SCSI_PROT_REF_CHECK) {
- sdeb_write_unlock(sip);
+ sdeb_meta_write_unlock(sip);
mk_sense_buffer(scp, ILLEGAL_REQUEST, 0x10, 3);
return illegal_condition_result;
} else if (scp->cmnd[1] >> 5 != 3) { /* WRPROTECT != 3 */
- sdeb_write_unlock(sip);
+ sdeb_meta_write_unlock(sip);
mk_sense_buffer(scp, ABORTED_COMMAND, 0x10, 3);
return check_condition_result;
}
@@ -4064,13 +4292,16 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
}
}
- ret = do_device_access(sip, scp, 0, lba, num, true, group);
+ ret = do_device_access(sip, scp, 0, lba, num, group, true, false);
if (unlikely(scsi_debug_lbp()))
map_region(sip, lba, num);
+
/* If ZBC zone then bump its write pointer */
if (sdebug_dev_is_zoned(devip))
zbc_inc_wp(devip, lba, num);
- sdeb_write_unlock(sip);
+ if (meta_data_locked)
+ sdeb_meta_write_unlock(sip);
+
if (unlikely(-1 == ret))
return DID_ERROR << 16;
else if (unlikely(sdebug_verbose &&
@@ -4180,7 +4411,8 @@ static int resp_write_scat(struct scsi_cmnd *scp,
goto err_out;
}
- sdeb_write_lock(sip);
+ /* Just keep it simple and always lock for now */
+ sdeb_meta_write_lock(sip);
sg_off = lbdof_blen;
/* Spec says Buffer xfer Length field in number of LBs in dout */
cum_lb = 0;
@@ -4223,7 +4455,11 @@ static int resp_write_scat(struct scsi_cmnd *scp,
}
}
- ret = do_device_access(sip, scp, sg_off, lba, num, true, group);
+ /*
+ * Write ranges atomically to keep as close to pre-atomic
+ * writes behaviour as possible.
+ */
+ ret = do_device_access(sip, scp, sg_off, lba, num, group, true, true);
/* If ZBC zone then bump its write pointer */
if (sdebug_dev_is_zoned(devip))
zbc_inc_wp(devip, lba, num);
@@ -4262,7 +4498,7 @@ static int resp_write_scat(struct scsi_cmnd *scp,
}
ret = 0;
err_out_unlock:
- sdeb_write_unlock(sip);
+ sdeb_meta_write_unlock(sip);
err_out:
kfree(lrdp);
return ret;
@@ -4281,14 +4517,16 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num,
scp->device->hostdata, true);
u8 *fs1p;
u8 *fsp;
+ bool meta_data_locked = false;
- sdeb_write_lock(sip);
+ if (sdebug_dev_is_zoned(devip) || scsi_debug_lbp()) {
+ sdeb_meta_write_lock(sip);
+ meta_data_locked = true;
+ }
ret = check_device_access_params(scp, lba, num, true);
- if (ret) {
- sdeb_write_unlock(sip);
- return ret;
- }
+ if (ret)
+ goto out;
if (unmap && scsi_debug_lbp()) {
unmap_region(sip, lba, num);
@@ -4299,6 +4537,7 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num,
/* if ndob then zero 1 logical block, else fetch 1 logical block */
fsp = sip->storep;
fs1p = fsp + (block * lb_size);
+ sdeb_data_write_lock(sip);
if (ndob) {
memset(fs1p, 0, lb_size);
ret = 0;
@@ -4306,8 +4545,8 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num,
ret = fetch_to_dev_buffer(scp, fs1p, lb_size);
if (-1 == ret) {
- sdeb_write_unlock(sip);
- return DID_ERROR << 16;
+ ret = DID_ERROR << 16;
+ goto out;
} else if (sdebug_verbose && !ndob && (ret < lb_size))
sdev_printk(KERN_INFO, scp->device,
"%s: %s: lb size=%u, IO sent=%d bytes\n",
@@ -4324,10 +4563,12 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num,
/* If ZBC zone then bump its write pointer */
if (sdebug_dev_is_zoned(devip))
zbc_inc_wp(devip, lba, num);
+ sdeb_data_write_unlock(sip);
+ ret = 0;
out:
- sdeb_write_unlock(sip);
-
- return 0;
+ if (meta_data_locked)
+ sdeb_meta_write_unlock(sip);
+ return ret;
}
static int resp_write_same_10(struct scsi_cmnd *scp,
@@ -4470,25 +4711,30 @@ static int resp_comp_write(struct scsi_cmnd *scp,
return check_condition_result;
}
- sdeb_write_lock(sip);
-
ret = do_dout_fetch(scp, dnum, arr);
if (ret == -1) {
retval = DID_ERROR << 16;
- goto cleanup;
+ goto cleanup_free;
} else if (sdebug_verbose && (ret < (dnum * lb_size)))
sdev_printk(KERN_INFO, scp->device, "%s: compare_write: cdb "
"indicated=%u, IO sent=%d bytes\n", my_name,
dnum * lb_size, ret);
+
+ sdeb_data_write_lock(sip);
+ sdeb_meta_write_lock(sip);
if (!comp_write_worker(sip, lba, num, arr, false)) {
mk_sense_buffer(scp, MISCOMPARE, MISCOMPARE_VERIFY_ASC, 0);
retval = check_condition_result;
- goto cleanup;
+ goto cleanup_unlock;
}
+
+ /* Cover sip->map_storep (which map_region()) sets with data lock */
if (scsi_debug_lbp())
map_region(sip, lba, num);
-cleanup:
- sdeb_write_unlock(sip);
+cleanup_unlock:
+ sdeb_meta_write_unlock(sip);
+ sdeb_data_write_unlock(sip);
+cleanup_free:
kfree(arr);
return retval;
}
@@ -4532,7 +4778,7 @@ static int resp_unmap(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
desc = (void *)&buf[8];
- sdeb_write_lock(sip);
+ sdeb_meta_write_lock(sip);
for (i = 0 ; i < descriptors ; i++) {
unsigned long long lba = get_unaligned_be64(&desc[i].lba);
@@ -4548,7 +4794,7 @@ static int resp_unmap(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
ret = 0;
out:
- sdeb_write_unlock(sip);
+ sdeb_meta_write_unlock(sip);
kfree(buf);
return ret;
@@ -4706,12 +4952,13 @@ static int resp_pre_fetch(struct scsi_cmnd *scp,
rest = block + nblks - sdebug_store_sectors;
/* Try to bring the PRE-FETCH range into CPU's cache */
- sdeb_read_lock(sip);
+ sdeb_data_read_lock(sip);
prefetch_range(fsp + (sdebug_sector_size * block),
(nblks - rest) * sdebug_sector_size);
if (rest)
prefetch_range(fsp, rest * sdebug_sector_size);
- sdeb_read_unlock(sip);
+
+ sdeb_data_read_unlock(sip);
fini:
if (cmd[1] & 0x2)
res = SDEG_RES_IMMED_MASK;
@@ -4870,7 +5117,7 @@ static int resp_verify(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
return check_condition_result;
}
/* Not changing store, so only need read access */
- sdeb_read_lock(sip);
+ sdeb_data_read_lock(sip);
ret = do_dout_fetch(scp, a_num, arr);
if (ret == -1) {
@@ -4892,7 +5139,7 @@ static int resp_verify(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
goto cleanup;
}
cleanup:
- sdeb_read_unlock(sip);
+ sdeb_data_read_unlock(sip);
kfree(arr);
return ret;
}
@@ -4938,7 +5185,7 @@ static int resp_report_zones(struct scsi_cmnd *scp,
return check_condition_result;
}
- sdeb_read_lock(sip);
+ sdeb_meta_read_lock(sip);
desc = arr + 64;
for (lba = zs_lba; lba < sdebug_capacity;
@@ -5036,11 +5283,70 @@ static int resp_report_zones(struct scsi_cmnd *scp,
ret = fill_from_dev_buffer(scp, arr, min_t(u32, alloc_len, rep_len));
fini:
- sdeb_read_unlock(sip);
+ sdeb_meta_read_unlock(sip);
kfree(arr);
return ret;
}
+static int resp_atomic_write(struct scsi_cmnd *scp,
+ struct sdebug_dev_info *devip)
+{
+ struct sdeb_store_info *sip;
+ u8 *cmd = scp->cmnd;
+ u16 boundary, len;
+ u64 lba, lba_tmp;
+ int ret;
+
+ if (!scsi_debug_atomic_write()) {
+ mk_sense_invalid_opcode(scp);
+ return check_condition_result;
+ }
+
+ sip = devip2sip(devip, true);
+
+ lba = get_unaligned_be64(cmd + 2);
+ boundary = get_unaligned_be16(cmd + 10);
+ len = get_unaligned_be16(cmd + 12);
+
+ lba_tmp = lba;
+ if (sdebug_atomic_wr_align &&
+ do_div(lba_tmp, sdebug_atomic_wr_align)) {
+ /* Does not meet alignment requirement */
+ mk_sense_buffer(scp, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0);
+ return check_condition_result;
+ }
+
+ if (sdebug_atomic_wr_gran && len % sdebug_atomic_wr_gran) {
+ /* Does not meet alignment requirement */
+ mk_sense_buffer(scp, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0);
+ return check_condition_result;
+ }
+
+ if (boundary > 0) {
+ if (boundary > sdebug_atomic_wr_max_bndry) {
+ mk_sense_invalid_fld(scp, SDEB_IN_CDB, 12, -1);
+ return check_condition_result;
+ }
+
+ if (len > sdebug_atomic_wr_max_length_bndry) {
+ mk_sense_invalid_fld(scp, SDEB_IN_CDB, 12, -1);
+ return check_condition_result;
+ }
+ } else {
+ if (len > sdebug_atomic_wr_max_length) {
+ mk_sense_invalid_fld(scp, SDEB_IN_CDB, 12, -1);
+ return check_condition_result;
+ }
+ }
+
+ ret = do_device_access(sip, scp, 0, lba, len, 0, true, true);
+ if (unlikely(ret == -1))
+ return DID_ERROR << 16;
+ if (unlikely(ret != len * sdebug_sector_size))
+ return DID_ERROR << 16;
+ return 0;
+}
+
/* Logic transplanted from tcmu-runner, file_zbc.c */
static void zbc_open_all(struct sdebug_dev_info *devip)
{
@@ -5067,8 +5373,7 @@ static int resp_open_zone(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
mk_sense_invalid_opcode(scp);
return check_condition_result;
}
-
- sdeb_write_lock(sip);
+ sdeb_meta_write_lock(sip);
if (all) {
/* Check if all closed zones can be open */
@@ -5117,7 +5422,7 @@ static int resp_open_zone(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
zbc_open_zone(devip, zsp, true);
fini:
- sdeb_write_unlock(sip);
+ sdeb_meta_write_unlock(sip);
return res;
}
@@ -5144,7 +5449,7 @@ static int resp_close_zone(struct scsi_cmnd *scp,
return check_condition_result;
}
- sdeb_write_lock(sip);
+ sdeb_meta_write_lock(sip);
if (all) {
zbc_close_all(devip);
@@ -5173,7 +5478,7 @@ static int resp_close_zone(struct scsi_cmnd *scp,
zbc_close_zone(devip, zsp);
fini:
- sdeb_write_unlock(sip);
+ sdeb_meta_write_unlock(sip);
return res;
}
@@ -5216,7 +5521,7 @@ static int resp_finish_zone(struct scsi_cmnd *scp,
return check_condition_result;
}
- sdeb_write_lock(sip);
+ sdeb_meta_write_lock(sip);
if (all) {
zbc_finish_all(devip);
@@ -5245,7 +5550,7 @@ static int resp_finish_zone(struct scsi_cmnd *scp,
zbc_finish_zone(devip, zsp, true);
fini:
- sdeb_write_unlock(sip);
+ sdeb_meta_write_unlock(sip);
return res;
}
@@ -5296,7 +5601,7 @@ static int resp_rwp_zone(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
return check_condition_result;
}
- sdeb_write_lock(sip);
+ sdeb_meta_write_lock(sip);
if (all) {
zbc_rwp_all(devip);
@@ -5324,7 +5629,7 @@ static int resp_rwp_zone(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
zbc_rwp_zone(devip, zsp);
fini:
- sdeb_write_unlock(sip);
+ sdeb_meta_write_unlock(sip);
return res;
}
@@ -6288,6 +6593,7 @@ module_param_named(lbprz, sdebug_lbprz, int, S_IRUGO);
module_param_named(lbpu, sdebug_lbpu, int, S_IRUGO);
module_param_named(lbpws, sdebug_lbpws, int, S_IRUGO);
module_param_named(lbpws10, sdebug_lbpws10, int, S_IRUGO);
+module_param_named(atomic_wr, sdebug_atomic_wr, int, S_IRUGO);
module_param_named(lowest_aligned, sdebug_lowest_aligned, int, S_IRUGO);
module_param_named(lun_format, sdebug_lun_am_i, int, S_IRUGO | S_IWUSR);
module_param_named(max_luns, sdebug_max_luns, int, S_IRUGO | S_IWUSR);
@@ -6322,6 +6628,11 @@ module_param_named(unmap_alignment, sdebug_unmap_alignment, int, S_IRUGO);
module_param_named(unmap_granularity, sdebug_unmap_granularity, int, S_IRUGO);
module_param_named(unmap_max_blocks, sdebug_unmap_max_blocks, int, S_IRUGO);
module_param_named(unmap_max_desc, sdebug_unmap_max_desc, int, S_IRUGO);
+module_param_named(atomic_wr_max_length, sdebug_atomic_wr_max_length, int, S_IRUGO);
+module_param_named(atomic_wr_align, sdebug_atomic_wr_align, int, S_IRUGO);
+module_param_named(atomic_wr_gran, sdebug_atomic_wr_gran, int, S_IRUGO);
+module_param_named(atomic_wr_max_length_bndry, sdebug_atomic_wr_max_length_bndry, int, S_IRUGO);
+module_param_named(atomic_wr_max_bndry, sdebug_atomic_wr_max_bndry, int, S_IRUGO);
module_param_named(uuid_ctl, sdebug_uuid_ctl, int, S_IRUGO);
module_param_named(virtual_gb, sdebug_virtual_gb, int, S_IRUGO | S_IWUSR);
module_param_named(vpd_use_hostno, sdebug_vpd_use_hostno, int,
@@ -6365,6 +6676,7 @@ MODULE_PARM_DESC(lbprz,
MODULE_PARM_DESC(lbpu, "enable LBP, support UNMAP command (def=0)");
MODULE_PARM_DESC(lbpws, "enable LBP, support WRITE SAME(16) with UNMAP bit (def=0)");
MODULE_PARM_DESC(lbpws10, "enable LBP, support WRITE SAME(10) with UNMAP bit (def=0)");
+MODULE_PARM_DESC(atomic_write, "enable ATOMIC WRITE support, support WRITE ATOMIC(16) (def=0)");
MODULE_PARM_DESC(lowest_aligned, "lowest aligned lba (def=0)");
MODULE_PARM_DESC(lun_format, "LUN format: 0->peripheral (def); 1 --> flat address method");
MODULE_PARM_DESC(max_luns, "number of LUNs per target to simulate(def=1)");
@@ -6396,6 +6708,11 @@ MODULE_PARM_DESC(unmap_alignment, "lowest aligned thin provisioning lba (def=0)"
MODULE_PARM_DESC(unmap_granularity, "thin provisioning granularity in blocks (def=1)");
MODULE_PARM_DESC(unmap_max_blocks, "max # of blocks can be unmapped in one cmd (def=0xffffffff)");
MODULE_PARM_DESC(unmap_max_desc, "max # of ranges that can be unmapped in one cmd (def=256)");
+MODULE_PARM_DESC(atomic_wr_max_length, "max # of blocks can be atomically written in one cmd (def=8192)");
+MODULE_PARM_DESC(atomic_wr_align, "minimum alignment of atomic write in blocks (def=2)");
+MODULE_PARM_DESC(atomic_wr_gran, "minimum granularity of atomic write in blocks (def=2)");
+MODULE_PARM_DESC(atomic_wr_max_length_bndry, "max # of blocks can be atomically written in one cmd with boundary set (def=8192)");
+MODULE_PARM_DESC(atomic_wr_max_bndry, "max # boundaries per atomic write (def=128)");
MODULE_PARM_DESC(uuid_ctl,
"1->use uuid for lu name, 0->don't, 2->all use same (def=0)");
MODULE_PARM_DESC(virtual_gb, "virtual gigabyte (GiB) size (def=0 -> use dev_size_mb)");
@@ -7567,6 +7884,7 @@ static int __init scsi_debug_init(void)
return -EINVAL;
}
}
+
xa_init_flags(per_store_ap, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
if (want_store) {
idx = sdebug_add_store();
@@ -7774,7 +8092,9 @@ static int sdebug_add_store(void)
map_region(sip, 0, 2);
}
- rwlock_init(&sip->macc_lck);
+ rwlock_init(&sip->macc_data_lck);
+ rwlock_init(&sip->macc_meta_lck);
+ rwlock_init(&sip->macc_sector_lck);
return (int)n_idx;
err:
sdebug_erase_store((int)n_idx, sip);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index ec39acc986d6..3958a6d14bf4 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -631,8 +631,7 @@ static bool scsi_end_request(struct request *req, blk_status_t error,
if (blk_update_request(req, error, bytes))
return true;
- // XXX:
- if (blk_queue_add_random(q))
+ if (q->limits.features & BLK_FEAT_ADD_RANDOM)
add_disk_randomness(req->q->disk);
WARN_ON_ONCE(!blk_rq_is_passthrough(req) &&
@@ -1140,9 +1139,9 @@ blk_status_t scsi_alloc_sgtables(struct scsi_cmnd *cmd)
*/
count = __blk_rq_map_sg(rq->q, rq, cmd->sdb.table.sgl, &last_sg);
- if (blk_rq_bytes(rq) & rq->q->dma_pad_mask) {
+ if (blk_rq_bytes(rq) & rq->q->limits.dma_pad_mask) {
unsigned int pad_len =
- (rq->q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
+ (rq->q->limits.dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
last_sg->length += pad_len;
cmd->extra_len += pad_len;
@@ -1987,7 +1986,7 @@ void scsi_init_limits(struct Scsi_Host *shost, struct queue_limits *lim)
shost->dma_alignment, dma_get_cache_alignment() - 1);
if (shost->no_highmem)
- lim->bounce = BLK_BOUNCE_HIGH;
+ lim->features |= BLK_FEAT_BOUNCE_HIGH;
dma_set_seg_boundary(dev, shost->dma_boundary);
dma_set_max_seg_size(dev, shost->max_segment_size);
diff --git a/drivers/scsi/scsi_trace.c b/drivers/scsi/scsi_trace.c
index 41a950075913..3e47c4472a80 100644
--- a/drivers/scsi/scsi_trace.c
+++ b/drivers/scsi/scsi_trace.c
@@ -326,6 +326,26 @@ out:
}
static const char *
+scsi_trace_atomic_write16_out(struct trace_seq *p, unsigned char *cdb, int len)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ unsigned int boundary_size;
+ unsigned int nr_blocks;
+ sector_t lba;
+
+ lba = get_unaligned_be64(&cdb[2]);
+ boundary_size = get_unaligned_be16(&cdb[10]);
+ nr_blocks = get_unaligned_be16(&cdb[12]);
+
+ trace_seq_printf(p, "lba=%llu txlen=%u boundary_size=%u",
+ lba, nr_blocks, boundary_size);
+
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *
scsi_trace_varlen(struct trace_seq *p, unsigned char *cdb, int len)
{
switch (SERVICE_ACTION32(cdb)) {
@@ -385,6 +405,8 @@ scsi_trace_parse_cdb(struct trace_seq *p, unsigned char *cdb, int len)
return scsi_trace_zbc_in(p, cdb, len);
case ZBC_OUT:
return scsi_trace_zbc_out(p, cdb, len);
+ case WRITE_ATOMIC_16:
+ return scsi_trace_atomic_write16_out(p, cdb, len);
default:
return scsi_trace_misc(p, cdb, len);
}
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 1b7561abe05d..2e933fd1de70 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -102,12 +102,13 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_ZBC);
#define SD_MINORS 16
-static void sd_config_discard(struct scsi_disk *, unsigned int);
-static void sd_config_write_same(struct scsi_disk *);
+static void sd_config_discard(struct scsi_disk *sdkp, struct queue_limits *lim,
+ unsigned int mode);
+static void sd_config_write_same(struct scsi_disk *sdkp,
+ struct queue_limits *lim);
static int sd_revalidate_disk(struct gendisk *);
static void sd_unlock_native_capacity(struct gendisk *disk);
static void sd_shutdown(struct device *);
-static void sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer);
static void scsi_disk_release(struct device *cdev);
static DEFINE_IDA(sd_index_ida);
@@ -120,17 +121,18 @@ static const char *sd_cache_types[] = {
"write back, no read (daft)"
};
-static void sd_set_flush_flag(struct scsi_disk *sdkp)
+static void sd_set_flush_flag(struct scsi_disk *sdkp,
+ struct queue_limits *lim)
{
- bool wc = false, fua = false;
-
if (sdkp->WCE) {
- wc = true;
+ lim->features |= BLK_FEAT_WRITE_CACHE;
if (sdkp->DPOFUA)
- fua = true;
+ lim->features |= BLK_FEAT_FUA;
+ else
+ lim->features &= ~BLK_FEAT_FUA;
+ } else {
+ lim->features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA);
}
-
- blk_queue_write_cache(sdkp->disk->queue, wc, fua);
}
static ssize_t
@@ -168,9 +170,18 @@ cache_type_store(struct device *dev, struct device_attribute *attr,
wce = (ct & 0x02) && !sdkp->write_prot ? 1 : 0;
if (sdkp->cache_override) {
+ struct queue_limits lim;
+
sdkp->WCE = wce;
sdkp->RCD = rcd;
- sd_set_flush_flag(sdkp);
+
+ lim = queue_limits_start_update(sdkp->disk->queue);
+ sd_set_flush_flag(sdkp, &lim);
+ blk_mq_freeze_queue(sdkp->disk->queue);
+ ret = queue_limits_commit_update(sdkp->disk->queue, &lim);
+ blk_mq_unfreeze_queue(sdkp->disk->queue);
+ if (ret)
+ return ret;
return count;
}
@@ -457,16 +468,12 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr,
{
struct scsi_disk *sdkp = to_scsi_disk(dev);
struct scsi_device *sdp = sdkp->device;
- int mode;
+ struct queue_limits lim;
+ int mode, err;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
- if (sd_is_zoned(sdkp)) {
- sd_config_discard(sdkp, SD_LBP_DISABLE);
- return count;
- }
-
if (sdp->type != TYPE_DISK)
return -EINVAL;
@@ -474,8 +481,13 @@ provisioning_mode_store(struct device *dev, struct device_attribute *attr,
if (mode < 0)
return -EINVAL;
- sd_config_discard(sdkp, mode);
-
+ lim = queue_limits_start_update(sdkp->disk->queue);
+ sd_config_discard(sdkp, &lim, mode);
+ blk_mq_freeze_queue(sdkp->disk->queue);
+ err = queue_limits_commit_update(sdkp->disk->queue, &lim);
+ blk_mq_unfreeze_queue(sdkp->disk->queue);
+ if (err)
+ return err;
return count;
}
static DEVICE_ATTR_RW(provisioning_mode);
@@ -558,6 +570,7 @@ max_write_same_blocks_store(struct device *dev, struct device_attribute *attr,
{
struct scsi_disk *sdkp = to_scsi_disk(dev);
struct scsi_device *sdp = sdkp->device;
+ struct queue_limits lim;
unsigned long max;
int err;
@@ -579,8 +592,13 @@ max_write_same_blocks_store(struct device *dev, struct device_attribute *attr,
sdkp->max_ws_blocks = max;
}
- sd_config_write_same(sdkp);
-
+ lim = queue_limits_start_update(sdkp->disk->queue);
+ sd_config_write_same(sdkp, &lim);
+ blk_mq_freeze_queue(sdkp->disk->queue);
+ err = queue_limits_commit_update(sdkp->disk->queue, &lim);
+ blk_mq_unfreeze_queue(sdkp->disk->queue);
+ if (err)
+ return err;
return count;
}
static DEVICE_ATTR_RW(max_write_same_blocks);
@@ -823,25 +841,28 @@ static unsigned char sd_setup_protect_cmnd(struct scsi_cmnd *scmd,
return protect;
}
-static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
+static void sd_disable_discard(struct scsi_disk *sdkp)
+{
+ sdkp->provisioning_mode = SD_LBP_DISABLE;
+ blk_queue_disable_discard(sdkp->disk->queue);
+}
+
+static void sd_config_discard(struct scsi_disk *sdkp, struct queue_limits *lim,
+ unsigned int mode)
{
- struct request_queue *q = sdkp->disk->queue;
unsigned int logical_block_size = sdkp->device->sector_size;
unsigned int max_blocks = 0;
- q->limits.discard_alignment =
- sdkp->unmap_alignment * logical_block_size;
- q->limits.discard_granularity =
- max(sdkp->physical_block_size,
- sdkp->unmap_granularity * logical_block_size);
+ lim->discard_alignment = sdkp->unmap_alignment * logical_block_size;
+ lim->discard_granularity = max(sdkp->physical_block_size,
+ sdkp->unmap_granularity * logical_block_size);
sdkp->provisioning_mode = mode;
switch (mode) {
case SD_LBP_FULL:
case SD_LBP_DISABLE:
- blk_queue_max_discard_sectors(q, 0);
- return;
+ break;
case SD_LBP_UNMAP:
max_blocks = min_not_zero(sdkp->max_unmap_blocks,
@@ -872,7 +893,8 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
break;
}
- blk_queue_max_discard_sectors(q, max_blocks * (logical_block_size >> 9));
+ lim->max_hw_discard_sectors = max_blocks *
+ (logical_block_size >> SECTOR_SHIFT);
}
static void *sd_set_special_bvec(struct request *rq, unsigned int data_len)
@@ -918,6 +940,64 @@ static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
return scsi_alloc_sgtables(cmd);
}
+static void sd_config_atomic(struct scsi_disk *sdkp, struct queue_limits *lim)
+{
+ unsigned int logical_block_size = sdkp->device->sector_size,
+ physical_block_size_sectors, max_atomic, unit_min, unit_max;
+
+ if ((!sdkp->max_atomic && !sdkp->max_atomic_with_boundary) ||
+ sdkp->protection_type == T10_PI_TYPE2_PROTECTION)
+ return;
+
+ physical_block_size_sectors = sdkp->physical_block_size /
+ sdkp->device->sector_size;
+
+ unit_min = rounddown_pow_of_two(sdkp->atomic_granularity ?
+ sdkp->atomic_granularity :
+ physical_block_size_sectors);
+
+ /*
+ * Only use atomic boundary when we have the odd scenario of
+ * sdkp->max_atomic == 0, which the spec does permit.
+ */
+ if (sdkp->max_atomic) {
+ max_atomic = sdkp->max_atomic;
+ unit_max = rounddown_pow_of_two(sdkp->max_atomic);
+ sdkp->use_atomic_write_boundary = 0;
+ } else {
+ max_atomic = sdkp->max_atomic_with_boundary;
+ unit_max = rounddown_pow_of_two(sdkp->max_atomic_boundary);
+ sdkp->use_atomic_write_boundary = 1;
+ }
+
+ /*
+ * Ensure compliance with granularity and alignment. For now, keep it
+ * simple and just don't support atomic writes for values mismatched
+ * with max_{boundary}atomic, physical block size, and
+ * atomic_granularity itself.
+ *
+ * We're really being distrustful by checking unit_max also...
+ */
+ if (sdkp->atomic_granularity > 1) {
+ if (unit_min > 1 && unit_min % sdkp->atomic_granularity)
+ return;
+ if (unit_max > 1 && unit_max % sdkp->atomic_granularity)
+ return;
+ }
+
+ if (sdkp->atomic_alignment > 1) {
+ if (unit_min > 1 && unit_min % sdkp->atomic_alignment)
+ return;
+ if (unit_max > 1 && unit_max % sdkp->atomic_alignment)
+ return;
+ }
+
+ lim->atomic_write_hw_max = max_atomic * logical_block_size;
+ lim->atomic_write_hw_boundary = 0;
+ lim->atomic_write_hw_unit_min = unit_min * logical_block_size;
+ lim->atomic_write_hw_unit_max = unit_max * logical_block_size;
+}
+
static blk_status_t sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd,
bool unmap)
{
@@ -1000,9 +1080,16 @@ static blk_status_t sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd)
return sd_setup_write_same10_cmnd(cmd, false);
}
-static void sd_config_write_same(struct scsi_disk *sdkp)
+static void sd_disable_write_same(struct scsi_disk *sdkp)
+{
+ sdkp->device->no_write_same = 1;
+ sdkp->max_ws_blocks = 0;
+ blk_queue_disable_write_zeroes(sdkp->disk->queue);
+}
+
+static void sd_config_write_same(struct scsi_disk *sdkp,
+ struct queue_limits *lim)
{
- struct request_queue *q = sdkp->disk->queue;
unsigned int logical_block_size = sdkp->device->sector_size;
if (sdkp->device->no_write_same) {
@@ -1056,8 +1143,8 @@ static void sd_config_write_same(struct scsi_disk *sdkp)
}
out:
- blk_queue_max_write_zeroes_sectors(q, sdkp->max_ws_blocks *
- (logical_block_size >> 9));
+ lim->max_write_zeroes_sectors =
+ sdkp->max_ws_blocks * (logical_block_size >> SECTOR_SHIFT);
}
static blk_status_t sd_setup_flush_cmnd(struct scsi_cmnd *cmd)
@@ -1209,6 +1296,26 @@ static int sd_cdl_dld(struct scsi_disk *sdkp, struct scsi_cmnd *scmd)
return (hint - IOPRIO_HINT_DEV_DURATION_LIMIT_1) + 1;
}
+static blk_status_t sd_setup_atomic_cmnd(struct scsi_cmnd *cmd,
+ sector_t lba, unsigned int nr_blocks,
+ bool boundary, unsigned char flags)
+{
+ cmd->cmd_len = 16;
+ cmd->cmnd[0] = WRITE_ATOMIC_16;
+ cmd->cmnd[1] = flags;
+ put_unaligned_be64(lba, &cmd->cmnd[2]);
+ put_unaligned_be16(nr_blocks, &cmd->cmnd[12]);
+ if (boundary)
+ put_unaligned_be16(nr_blocks, &cmd->cmnd[10]);
+ else
+ put_unaligned_be16(0, &cmd->cmnd[10]);
+ put_unaligned_be16(nr_blocks, &cmd->cmnd[12]);
+ cmd->cmnd[14] = 0;
+ cmd->cmnd[15] = 0;
+
+ return BLK_STS_OK;
+}
+
static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
{
struct request *rq = scsi_cmd_to_rq(cmd);
@@ -1274,6 +1381,10 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
if (protect && sdkp->protection_type == T10_PI_TYPE2_PROTECTION) {
ret = sd_setup_rw32_cmnd(cmd, write, lba, nr_blocks,
protect | fua, dld);
+ } else if (rq->cmd_flags & REQ_ATOMIC && write) {
+ ret = sd_setup_atomic_cmnd(cmd, lba, nr_blocks,
+ sdkp->use_atomic_write_boundary,
+ protect | fua);
} else if (sdp->use_16_for_rw || (nr_blocks > 0xffff)) {
ret = sd_setup_rw16_cmnd(cmd, write, lba, nr_blocks,
protect | fua, dld);
@@ -2247,15 +2358,14 @@ static int sd_done(struct scsi_cmnd *SCpnt)
case 0x24: /* INVALID FIELD IN CDB */
switch (SCpnt->cmnd[0]) {
case UNMAP:
- sd_config_discard(sdkp, SD_LBP_DISABLE);
+ sd_disable_discard(sdkp);
break;
case WRITE_SAME_16:
case WRITE_SAME:
if (SCpnt->cmnd[1] & 8) { /* UNMAP */
- sd_config_discard(sdkp, SD_LBP_DISABLE);
+ sd_disable_discard(sdkp);
} else {
- sdkp->device->no_write_same = 1;
- sd_config_write_same(sdkp);
+ sd_disable_write_same(sdkp);
req->rq_flags |= RQF_QUIET;
}
break;
@@ -2267,7 +2377,7 @@ static int sd_done(struct scsi_cmnd *SCpnt)
}
out:
- if (sd_is_zoned(sdkp))
+ if (sdkp->device->type == TYPE_ZBC)
good_bytes = sd_zbc_complete(SCpnt, good_bytes, &sshdr);
SCSI_LOG_HLCOMPLETE(1, scmd_printk(KERN_INFO, SCpnt,
@@ -2461,11 +2571,13 @@ static int sd_read_protection_type(struct scsi_disk *sdkp, unsigned char *buffer
return 0;
}
-static void sd_config_protection(struct scsi_disk *sdkp)
+static void sd_config_protection(struct scsi_disk *sdkp,
+ struct queue_limits *lim)
{
struct scsi_device *sdp = sdkp->device;
- sd_dif_config_host(sdkp);
+ if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
+ sd_dif_config_host(sdkp, lim);
if (!sdkp->protection_type)
return;
@@ -2514,7 +2626,7 @@ static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp,
#define READ_CAPACITY_RETRIES_ON_RESET 10
static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp,
- unsigned char *buffer)
+ struct queue_limits *lim, unsigned char *buffer)
{
unsigned char cmd[16];
struct scsi_sense_hdr sshdr;
@@ -2588,7 +2700,7 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp,
/* Lowest aligned logical block */
alignment = ((buffer[14] & 0x3f) << 8 | buffer[15]) * sector_size;
- blk_queue_alignment_offset(sdp->request_queue, alignment);
+ lim->alignment_offset = alignment;
if (alignment && sdkp->first_scan)
sd_printk(KERN_NOTICE, sdkp,
"physical block alignment offset: %u\n", alignment);
@@ -2599,7 +2711,7 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp,
if (buffer[14] & 0x40) /* LBPRZ */
sdkp->lbprz = 1;
- sd_config_discard(sdkp, SD_LBP_WS16);
+ sd_config_discard(sdkp, lim, SD_LBP_WS16);
}
sdkp->capacity = lba + 1;
@@ -2702,13 +2814,14 @@ static int sd_try_rc16_first(struct scsi_device *sdp)
* read disk capacity
*/
static void
-sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer)
+sd_read_capacity(struct scsi_disk *sdkp, struct queue_limits *lim,
+ unsigned char *buffer)
{
int sector_size;
struct scsi_device *sdp = sdkp->device;
if (sd_try_rc16_first(sdp)) {
- sector_size = read_capacity_16(sdkp, sdp, buffer);
+ sector_size = read_capacity_16(sdkp, sdp, lim, buffer);
if (sector_size == -EOVERFLOW)
goto got_data;
if (sector_size == -ENODEV)
@@ -2728,7 +2841,7 @@ sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer)
int old_sector_size = sector_size;
sd_printk(KERN_NOTICE, sdkp, "Very big device. "
"Trying to use READ CAPACITY(16).\n");
- sector_size = read_capacity_16(sdkp, sdp, buffer);
+ sector_size = read_capacity_16(sdkp, sdp, lim, buffer);
if (sector_size < 0) {
sd_printk(KERN_NOTICE, sdkp,
"Using 0xffffffff as device size\n");
@@ -2787,9 +2900,8 @@ got_data:
*/
sector_size = 512;
}
- blk_queue_logical_block_size(sdp->request_queue, sector_size);
- blk_queue_physical_block_size(sdp->request_queue,
- sdkp->physical_block_size);
+ lim->logical_block_size = sector_size;
+ lim->physical_block_size = sdkp->physical_block_size;
sdkp->device->sector_size = sector_size;
if (sdkp->capacity > 0xffffffff)
@@ -3195,11 +3307,30 @@ static void sd_read_app_tag_own(struct scsi_disk *sdkp, unsigned char *buffer)
return;
}
-/**
- * sd_read_block_limits - Query disk device for preferred I/O sizes.
- * @sdkp: disk to query
+static unsigned int sd_discard_mode(struct scsi_disk *sdkp)
+{
+ if (!sdkp->lbpvpd) {
+ /* LBP VPD page not provided */
+ if (sdkp->max_unmap_blocks)
+ return SD_LBP_UNMAP;
+ return SD_LBP_WS16;
+ }
+
+ /* LBP VPD page tells us what to use */
+ if (sdkp->lbpu && sdkp->max_unmap_blocks)
+ return SD_LBP_UNMAP;
+ if (sdkp->lbpws)
+ return SD_LBP_WS16;
+ if (sdkp->lbpws10)
+ return SD_LBP_WS10;
+ return SD_LBP_DISABLE;
+}
+
+/*
+ * Query disk device for preferred I/O sizes.
*/
-static void sd_read_block_limits(struct scsi_disk *sdkp)
+static void sd_read_block_limits(struct scsi_disk *sdkp,
+ struct queue_limits *lim)
{
struct scsi_vpd *vpd;
@@ -3219,7 +3350,7 @@ static void sd_read_block_limits(struct scsi_disk *sdkp)
sdkp->max_ws_blocks = (u32)get_unaligned_be64(&vpd->data[36]);
if (!sdkp->lbpme)
- goto out;
+ goto config_atomic;
lba_count = get_unaligned_be32(&vpd->data[20]);
desc_count = get_unaligned_be32(&vpd->data[24]);
@@ -3233,23 +3364,16 @@ static void sd_read_block_limits(struct scsi_disk *sdkp)
sdkp->unmap_alignment =
get_unaligned_be32(&vpd->data[32]) & ~(1 << 31);
- if (!sdkp->lbpvpd) { /* LBP VPD page not provided */
-
- if (sdkp->max_unmap_blocks)
- sd_config_discard(sdkp, SD_LBP_UNMAP);
- else
- sd_config_discard(sdkp, SD_LBP_WS16);
-
- } else { /* LBP VPD page tells us what to use */
- if (sdkp->lbpu && sdkp->max_unmap_blocks)
- sd_config_discard(sdkp, SD_LBP_UNMAP);
- else if (sdkp->lbpws)
- sd_config_discard(sdkp, SD_LBP_WS16);
- else if (sdkp->lbpws10)
- sd_config_discard(sdkp, SD_LBP_WS10);
- else
- sd_config_discard(sdkp, SD_LBP_DISABLE);
- }
+ sd_config_discard(sdkp, lim, sd_discard_mode(sdkp));
+
+config_atomic:
+ sdkp->max_atomic = get_unaligned_be32(&vpd->data[44]);
+ sdkp->atomic_alignment = get_unaligned_be32(&vpd->data[48]);
+ sdkp->atomic_granularity = get_unaligned_be32(&vpd->data[52]);
+ sdkp->max_atomic_with_boundary = get_unaligned_be32(&vpd->data[56]);
+ sdkp->max_atomic_boundary = get_unaligned_be32(&vpd->data[60]);
+
+ sd_config_atomic(sdkp, lim);
}
out:
@@ -3268,13 +3392,10 @@ static void sd_read_block_limits_ext(struct scsi_disk *sdkp)
rcu_read_unlock();
}
-/**
- * sd_read_block_characteristics - Query block dev. characteristics
- * @sdkp: disk to query
- */
-static void sd_read_block_characteristics(struct scsi_disk *sdkp)
+/* Query block device characteristics */
+static void sd_read_block_characteristics(struct scsi_disk *sdkp,
+ struct queue_limits *lim)
{
- struct request_queue *q = sdkp->disk->queue;
struct scsi_vpd *vpd;
u16 rot;
@@ -3290,37 +3411,13 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp)
sdkp->zoned = (vpd->data[8] >> 4) & 3;
rcu_read_unlock();
- if (rot == 1) {
- blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
- blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
- }
-
-
-#ifdef CONFIG_BLK_DEV_ZONED /* sd_probe rejects ZBD devices early otherwise */
- if (sdkp->device->type == TYPE_ZBC) {
- /*
- * Host-managed.
- */
- disk_set_zoned(sdkp->disk);
-
- /*
- * Per ZBC and ZAC specifications, writes in sequential write
- * required zones of host-managed devices must be aligned to
- * the device physical block size.
- */
- blk_queue_zone_write_granularity(q, sdkp->physical_block_size);
- } else {
- /*
- * Host-aware devices are treated as conventional.
- */
- WARN_ON_ONCE(blk_queue_is_zoned(q));
- }
-#endif /* CONFIG_BLK_DEV_ZONED */
+ if (rot == 1)
+ lim->features &= ~(BLK_FEAT_ROTATIONAL | BLK_FEAT_ADD_RANDOM);
if (!sdkp->first_scan)
return;
- if (blk_queue_is_zoned(q))
+ if (sdkp->device->type == TYPE_ZBC)
sd_printk(KERN_NOTICE, sdkp, "Host-managed zoned block device\n");
else if (sdkp->zoned == 1)
sd_printk(KERN_NOTICE, sdkp, "Host-aware SMR disk used as regular disk\n");
@@ -3601,10 +3698,11 @@ static int sd_revalidate_disk(struct gendisk *disk)
{
struct scsi_disk *sdkp = scsi_disk(disk);
struct scsi_device *sdp = sdkp->device;
- struct request_queue *q = sdkp->disk->queue;
sector_t old_capacity = sdkp->capacity;
+ struct queue_limits lim;
unsigned char *buffer;
- unsigned int dev_max, rw_max;
+ unsigned int dev_max;
+ int err;
SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp,
"sd_revalidate_disk\n"));
@@ -3625,12 +3723,14 @@ static int sd_revalidate_disk(struct gendisk *disk)
sd_spinup_disk(sdkp);
+ lim = queue_limits_start_update(sdkp->disk->queue);
+
/*
* Without media there is no reason to ask; moreover, some devices
* react badly if we do.
*/
if (sdkp->media_present) {
- sd_read_capacity(sdkp, buffer);
+ sd_read_capacity(sdkp, &lim, buffer);
/*
* Some USB/UAS devices return generic values for mode pages
* until the media has been accessed. Trigger a READ operation
@@ -3644,15 +3744,14 @@ static int sd_revalidate_disk(struct gendisk *disk)
* cause this to be updated correctly and any device which
* doesn't support it should be treated as rotational.
*/
- blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
- blk_queue_flag_set(QUEUE_FLAG_ADD_RANDOM, q);
+ lim.features |= (BLK_FEAT_ROTATIONAL | BLK_FEAT_ADD_RANDOM);
if (scsi_device_supports_vpd(sdp)) {
sd_read_block_provisioning(sdkp);
- sd_read_block_limits(sdkp);
+ sd_read_block_limits(sdkp, &lim);
sd_read_block_limits_ext(sdkp);
- sd_read_block_characteristics(sdkp);
- sd_zbc_read_zones(sdkp, buffer);
+ sd_read_block_characteristics(sdkp, &lim);
+ sd_zbc_read_zones(sdkp, &lim, buffer);
sd_read_cpr(sdkp);
}
@@ -3664,64 +3763,50 @@ static int sd_revalidate_disk(struct gendisk *disk)
sd_read_app_tag_own(sdkp, buffer);
sd_read_write_same(sdkp, buffer);
sd_read_security(sdkp, buffer);
- sd_config_protection(sdkp);
+ sd_config_protection(sdkp, &lim);
}
/*
* We now have all cache related info, determine how we deal
* with flush requests.
*/
- sd_set_flush_flag(sdkp);
+ sd_set_flush_flag(sdkp, &lim);
/* Initial block count limit based on CDB TRANSFER LENGTH field size. */
dev_max = sdp->use_16_for_rw ? SD_MAX_XFER_BLOCKS : SD_DEF_XFER_BLOCKS;
/* Some devices report a maximum block count for READ/WRITE requests. */
dev_max = min_not_zero(dev_max, sdkp->max_xfer_blocks);
- q->limits.max_dev_sectors = logical_to_sectors(sdp, dev_max);
+ lim.max_dev_sectors = logical_to_sectors(sdp, dev_max);
if (sd_validate_min_xfer_size(sdkp))
- blk_queue_io_min(sdkp->disk->queue,
- logical_to_bytes(sdp, sdkp->min_xfer_blocks));
+ lim.io_min = logical_to_bytes(sdp, sdkp->min_xfer_blocks);
else
- blk_queue_io_min(sdkp->disk->queue, 0);
-
- if (sd_validate_opt_xfer_size(sdkp, dev_max)) {
- q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks);
- rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
- } else {
- q->limits.io_opt = 0;
- rw_max = min_not_zero(logical_to_sectors(sdp, dev_max),
- (sector_t)BLK_DEF_MAX_SECTORS_CAP);
- }
+ lim.io_min = 0;
/*
* Limit default to SCSI host optimal sector limit if set. There may be
* an impact on performance for when the size of a request exceeds this
* host limit.
*/
- rw_max = min_not_zero(rw_max, sdp->host->opt_sectors);
-
- /* Do not exceed controller limit */
- rw_max = min(rw_max, queue_max_hw_sectors(q));
-
- /*
- * Only update max_sectors if previously unset or if the current value
- * exceeds the capabilities of the hardware.
- */
- if (sdkp->first_scan ||
- q->limits.max_sectors > q->limits.max_dev_sectors ||
- q->limits.max_sectors > q->limits.max_hw_sectors) {
- q->limits.max_sectors = rw_max;
- q->limits.max_user_sectors = rw_max;
+ lim.io_opt = sdp->host->opt_sectors << SECTOR_SHIFT;
+ if (sd_validate_opt_xfer_size(sdkp, dev_max)) {
+ lim.io_opt = min_not_zero(lim.io_opt,
+ logical_to_bytes(sdp, sdkp->opt_xfer_blocks));
}
sdkp->first_scan = 0;
set_capacity_and_notify(disk, logical_to_sectors(sdp, sdkp->capacity));
- sd_config_write_same(sdkp);
+ sd_config_write_same(sdkp, &lim);
kfree(buffer);
+ blk_mq_freeze_queue(sdkp->disk->queue);
+ err = queue_limits_commit_update(sdkp->disk->queue, &lim);
+ blk_mq_unfreeze_queue(sdkp->disk->queue);
+ if (err)
+ return err;
+
/*
* For a zoned drive, revalidating the zones can be done only once
* the gendisk capacity is set. So if this fails, set back the gendisk
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 49dd600bfa48..36382eca941c 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -115,6 +115,13 @@ struct scsi_disk {
u32 max_unmap_blocks;
u32 unmap_granularity;
u32 unmap_alignment;
+
+ u32 max_atomic;
+ u32 atomic_alignment;
+ u32 atomic_granularity;
+ u32 max_atomic_with_boundary;
+ u32 max_atomic_boundary;
+
u32 index;
unsigned int physical_block_size;
unsigned int max_medium_access_timeouts;
@@ -148,6 +155,7 @@ struct scsi_disk {
unsigned security : 1;
unsigned ignore_medium_access_errors : 1;
unsigned rscs : 1; /* reduced stream control support */
+ unsigned use_atomic_write_boundary : 1;
};
#define to_scsi_disk(obj) container_of(obj, struct scsi_disk, disk_dev)
@@ -220,26 +228,12 @@ static inline sector_t sectors_to_logical(struct scsi_device *sdev, sector_t sec
return sector >> (ilog2(sdev->sector_size) - 9);
}
-#ifdef CONFIG_BLK_DEV_INTEGRITY
-
-extern void sd_dif_config_host(struct scsi_disk *);
-
-#else /* CONFIG_BLK_DEV_INTEGRITY */
-
-static inline void sd_dif_config_host(struct scsi_disk *disk)
-{
-}
-
-#endif /* CONFIG_BLK_DEV_INTEGRITY */
-
-static inline int sd_is_zoned(struct scsi_disk *sdkp)
-{
- return sdkp->zoned == 1 || sdkp->device->type == TYPE_ZBC;
-}
+void sd_dif_config_host(struct scsi_disk *sdkp, struct queue_limits *lim);
#ifdef CONFIG_BLK_DEV_ZONED
-int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE]);
+int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim,
+ u8 buf[SD_BUF_SIZE]);
int sd_zbc_revalidate_zones(struct scsi_disk *sdkp);
blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
unsigned char op, bool all);
@@ -250,7 +244,8 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
#else /* CONFIG_BLK_DEV_ZONED */
-static inline int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE])
+static inline int sd_zbc_read_zones(struct scsi_disk *sdkp,
+ struct queue_limits *lim, u8 buf[SD_BUF_SIZE])
{
return 0;
}
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 1df847b5f747..ae6ce6f5d622 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -24,14 +24,15 @@
/*
* Configure exchange of protection information between OS and HBA.
*/
-void sd_dif_config_host(struct scsi_disk *sdkp)
+void sd_dif_config_host(struct scsi_disk *sdkp, struct queue_limits *lim)
{
struct scsi_device *sdp = sdkp->device;
- struct gendisk *disk = sdkp->disk;
u8 type = sdkp->protection_type;
- struct blk_integrity bi;
+ struct blk_integrity *bi = &lim->integrity;
int dif, dix;
+ memset(bi, 0, sizeof(*bi));
+
dif = scsi_host_dif_capable(sdp->host, type);
dix = scsi_host_dix_capable(sdp->host, type);
@@ -39,45 +40,33 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
dif = 0; dix = 1;
}
- if (!dix) {
- blk_integrity_unregister(disk);
+ if (!dix)
return;
- }
-
- memset(&bi, 0, sizeof(bi));
/* Enable DMA of protection information */
- if (scsi_host_get_guard(sdkp->device->host) & SHOST_DIX_GUARD_IP) {
- if (type == T10_PI_TYPE3_PROTECTION)
- bi.profile = &t10_pi_type3_ip;
- else
- bi.profile = &t10_pi_type1_ip;
+ if (scsi_host_get_guard(sdkp->device->host) & SHOST_DIX_GUARD_IP)
+ bi->csum_type = BLK_INTEGRITY_CSUM_IP;
+ else
+ bi->csum_type = BLK_INTEGRITY_CSUM_CRC;
- bi.flags |= BLK_INTEGRITY_IP_CHECKSUM;
- } else
- if (type == T10_PI_TYPE3_PROTECTION)
- bi.profile = &t10_pi_type3_crc;
- else
- bi.profile = &t10_pi_type1_crc;
+ if (type != T10_PI_TYPE3_PROTECTION)
+ bi->flags |= BLK_INTEGRITY_REF_TAG;
- bi.tuple_size = sizeof(struct t10_pi_tuple);
+ bi->tuple_size = sizeof(struct t10_pi_tuple);
if (dif && type) {
- bi.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
+ bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
if (!sdkp->ATO)
- goto out;
+ return;
if (type == T10_PI_TYPE3_PROTECTION)
- bi.tag_size = sizeof(u16) + sizeof(u32);
+ bi->tag_size = sizeof(u16) + sizeof(u32);
else
- bi.tag_size = sizeof(u16);
+ bi->tag_size = sizeof(u16);
}
sd_first_printk(KERN_NOTICE, sdkp,
"Enabling DIX %s, application tag size %u bytes\n",
- bi.profile->name, bi.tag_size);
-out:
- blk_integrity_register(disk, &bi);
+ blk_integrity_profile_name(bi), bi->tag_size);
}
-
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 806036e48abe..c8b9654d30f0 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -232,7 +232,7 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
int zone_idx = 0;
int ret;
- if (!sd_is_zoned(sdkp))
+ if (sdkp->device->type != TYPE_ZBC)
/* Not a zoned device */
return -EOPNOTSUPP;
@@ -300,7 +300,7 @@ static blk_status_t sd_zbc_cmnd_checks(struct scsi_cmnd *cmd)
struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
sector_t sector = blk_rq_pos(rq);
- if (!sd_is_zoned(sdkp))
+ if (sdkp->device->type != TYPE_ZBC)
/* Not a zoned device */
return BLK_STS_IOERR;
@@ -521,7 +521,7 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf,
static void sd_zbc_print_zones(struct scsi_disk *sdkp)
{
- if (!sd_is_zoned(sdkp) || !sdkp->capacity)
+ if (sdkp->device->type != TYPE_ZBC || !sdkp->capacity)
return;
if (sdkp->capacity & (sdkp->zone_info.zone_blocks - 1))
@@ -565,12 +565,6 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
sdkp->zone_info.zone_blocks = zone_blocks;
sdkp->zone_info.nr_zones = nr_zones;
- blk_queue_chunk_sectors(q,
- logical_to_sectors(sdkp->device, zone_blocks));
-
- /* Enable block layer zone append emulation */
- blk_queue_max_zone_append_sectors(q, 0);
-
flags = memalloc_noio_save();
ret = blk_revalidate_disk_zones(disk);
memalloc_noio_restore(flags);
@@ -588,27 +582,31 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
/**
* sd_zbc_read_zones - Read zone information and update the request queue
* @sdkp: SCSI disk pointer.
+ * @lim: queue limits to read into
* @buf: 512 byte buffer used for storing SCSI command output.
*
* Read zone information and update the request queue zone characteristics and
* also the zoned device information in *sdkp. Called by sd_revalidate_disk()
* before the gendisk capacity has been set.
*/
-int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE])
+int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim,
+ u8 buf[SD_BUF_SIZE])
{
- struct gendisk *disk = sdkp->disk;
- struct request_queue *q = disk->queue;
unsigned int nr_zones;
u32 zone_blocks = 0;
int ret;
- if (!sd_is_zoned(sdkp)) {
- /*
- * Device managed or normal SCSI disk, no special handling
- * required.
- */
+ if (sdkp->device->type != TYPE_ZBC)
return 0;
- }
+
+ lim->features |= BLK_FEAT_ZONED;
+
+ /*
+ * Per ZBC and ZAC specifications, writes in sequential write required
+ * zones of host-managed devices must be aligned to the device physical
+ * block size.
+ */
+ lim->zone_write_granularity = sdkp->physical_block_size;
/* READ16/WRITE16/SYNC16 is mandatory for ZBC devices */
sdkp->device->use_16_for_rw = 1;
@@ -625,18 +623,20 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE])
if (ret != 0)
goto err;
- /* The drive satisfies the kernel restrictions: set it up */
- blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
- if (sdkp->zones_max_open == U32_MAX)
- disk_set_max_open_zones(disk, 0);
- else
- disk_set_max_open_zones(disk, sdkp->zones_max_open);
- disk_set_max_active_zones(disk, 0);
nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
-
sdkp->early_zone_info.nr_zones = nr_zones;
sdkp->early_zone_info.zone_blocks = zone_blocks;
+ /* The drive satisfies the kernel restrictions: set it up */
+ if (sdkp->zones_max_open == U32_MAX)
+ lim->max_open_zones = 0;
+ else
+ lim->max_open_zones = sdkp->zones_max_open;
+ lim->max_active_zones = 0;
+ lim->chunk_sectors = logical_to_sectors(sdkp->device, zone_blocks);
+ /* Enable block layer zone append emulation */
+ lim->max_zone_append_sectors = 0;
+
return 0;
err:
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 7ab000942b97..3f491019103e 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -111,7 +111,7 @@ static struct lock_class_key sr_bio_compl_lkclass;
static int sr_open(struct cdrom_device_info *, int);
static void sr_release(struct cdrom_device_info *);
-static void get_sectorsize(struct scsi_cd *);
+static int get_sectorsize(struct scsi_cd *);
static int get_capabilities(struct scsi_cd *);
static unsigned int sr_check_events(struct cdrom_device_info *cdi,
@@ -473,15 +473,15 @@ static blk_status_t sr_init_command(struct scsi_cmnd *SCpnt)
return BLK_STS_IOERR;
}
-static void sr_revalidate_disk(struct scsi_cd *cd)
+static int sr_revalidate_disk(struct scsi_cd *cd)
{
struct scsi_sense_hdr sshdr;
/* if the unit is not ready, nothing more to do */
if (scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr))
- return;
+ return 0;
sr_cd_check(&cd->cdi);
- get_sectorsize(cd);
+ return get_sectorsize(cd);
}
static int sr_block_open(struct gendisk *disk, blk_mode_t mode)
@@ -494,13 +494,16 @@ static int sr_block_open(struct gendisk *disk, blk_mode_t mode)
return -ENXIO;
scsi_autopm_get_device(sdev);
- if (disk_check_media_change(disk))
- sr_revalidate_disk(cd);
+ if (disk_check_media_change(disk)) {
+ ret = sr_revalidate_disk(cd);
+ if (ret)
+ goto out;
+ }
mutex_lock(&cd->lock);
ret = cdrom_open(&cd->cdi, mode);
mutex_unlock(&cd->lock);
-
+out:
scsi_autopm_put_device(sdev);
if (ret)
scsi_device_put(cd->device);
@@ -685,7 +688,9 @@ static int sr_probe(struct device *dev)
blk_pm_runtime_init(sdev->request_queue, dev);
dev_set_drvdata(dev, cd);
- sr_revalidate_disk(cd);
+ error = sr_revalidate_disk(cd);
+ if (error)
+ goto unregister_cdrom;
error = device_add_disk(&sdev->sdev_gendev, disk, NULL);
if (error)
@@ -714,13 +719,14 @@ fail:
}
-static void get_sectorsize(struct scsi_cd *cd)
+static int get_sectorsize(struct scsi_cd *cd)
{
+ struct request_queue *q = cd->device->request_queue;
static const u8 cmd[10] = { READ_CAPACITY };
unsigned char buffer[8] = { };
- int the_result;
+ struct queue_limits lim;
+ int err;
int sector_size;
- struct request_queue *queue;
struct scsi_failure failure_defs[] = {
{
.result = SCMD_FAILURE_RESULT_ANY,
@@ -736,10 +742,10 @@ static void get_sectorsize(struct scsi_cd *cd)
};
/* Do the command and wait.. */
- the_result = scsi_execute_cmd(cd->device, cmd, REQ_OP_DRV_IN, buffer,
+ err = scsi_execute_cmd(cd->device, cmd, REQ_OP_DRV_IN, buffer,
sizeof(buffer), SR_TIMEOUT, MAX_RETRIES,
&exec_args);
- if (the_result) {
+ if (err) {
cd->capacity = 0x1fffff;
sector_size = 2048; /* A guess, just in case */
} else {
@@ -789,10 +795,12 @@ static void get_sectorsize(struct scsi_cd *cd)
set_capacity(cd->disk, cd->capacity);
}
- queue = cd->device->request_queue;
- blk_queue_logical_block_size(queue, sector_size);
-
- return;
+ lim = queue_limits_start_update(q);
+ lim.logical_block_size = sector_size;
+ blk_mq_freeze_queue(q);
+ err = queue_limits_commit_update(q, &lim);
+ blk_mq_unfreeze_queue(q);
+ return err;
}
static int get_capabilities(struct scsi_cd *cd)
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 7f6ca8177845..a3e09adc4e76 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -148,35 +148,38 @@ static int iblock_configure_device(struct se_device *dev)
dev->dev_attrib.is_nonrot = 1;
bi = bdev_get_integrity(bd);
- if (bi) {
- struct bio_set *bs = &ib_dev->ibd_bio_set;
-
- if (!strcmp(bi->profile->name, "T10-DIF-TYPE3-IP") ||
- !strcmp(bi->profile->name, "T10-DIF-TYPE1-IP")) {
- pr_err("IBLOCK export of blk_integrity: %s not"
- " supported\n", bi->profile->name);
- ret = -ENOSYS;
- goto out_blkdev_put;
- }
+ if (!bi)
+ return 0;
- if (!strcmp(bi->profile->name, "T10-DIF-TYPE3-CRC")) {
- dev->dev_attrib.pi_prot_type = TARGET_DIF_TYPE3_PROT;
- } else if (!strcmp(bi->profile->name, "T10-DIF-TYPE1-CRC")) {
+ switch (bi->csum_type) {
+ case BLK_INTEGRITY_CSUM_IP:
+ pr_err("IBLOCK export of blk_integrity: %s not supported\n",
+ blk_integrity_profile_name(bi));
+ ret = -ENOSYS;
+ goto out_blkdev_put;
+ case BLK_INTEGRITY_CSUM_CRC:
+ if (bi->flags & BLK_INTEGRITY_REF_TAG)
dev->dev_attrib.pi_prot_type = TARGET_DIF_TYPE1_PROT;
- }
+ else
+ dev->dev_attrib.pi_prot_type = TARGET_DIF_TYPE3_PROT;
+ break;
+ default:
+ break;
+ }
- if (dev->dev_attrib.pi_prot_type) {
- if (bioset_integrity_create(bs, IBLOCK_BIO_POOL_SIZE) < 0) {
- pr_err("Unable to allocate bioset for PI\n");
- ret = -ENOMEM;
- goto out_blkdev_put;
- }
- pr_debug("IBLOCK setup BIP bs->bio_integrity_pool: %p\n",
- &bs->bio_integrity_pool);
+ if (dev->dev_attrib.pi_prot_type) {
+ struct bio_set *bs = &ib_dev->ibd_bio_set;
+
+ if (bioset_integrity_create(bs, IBLOCK_BIO_POOL_SIZE) < 0) {
+ pr_err("Unable to allocate bioset for PI\n");
+ ret = -ENOMEM;
+ goto out_blkdev_put;
}
- dev->dev_attrib.hw_pi_prot_type = dev->dev_attrib.pi_prot_type;
+ pr_debug("IBLOCK setup BIP bs->bio_integrity_pool: %p\n",
+ &bs->bio_integrity_pool);
}
+ dev->dev_attrib.hw_pi_prot_type = dev->dev_attrib.pi_prot_type;
return 0;
out_blkdev_put:
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 46433ecf0c4d..702fa1996992 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -5193,17 +5193,19 @@ static int ufshcd_change_queue_depth(struct scsi_device *sdev, int depth)
}
/**
- * ufshcd_slave_configure - adjust SCSI device configurations
+ * ufshcd_device_configure - adjust SCSI device configurations
* @sdev: pointer to SCSI device
+ * @lim: queue limits
*
* Return: 0 (success).
*/
-static int ufshcd_slave_configure(struct scsi_device *sdev)
+static int ufshcd_device_configure(struct scsi_device *sdev,
+ struct queue_limits *lim)
{
struct ufs_hba *hba = shost_priv(sdev->host);
struct request_queue *q = sdev->request_queue;
- blk_queue_update_dma_pad(q, PRDT_DATA_BYTE_COUNT_PAD - 1);
+ lim->dma_pad_mask = PRDT_DATA_BYTE_COUNT_PAD - 1;
/*
* Block runtime-pm until all consumers are added.
@@ -8910,7 +8912,7 @@ static const struct scsi_host_template ufshcd_driver_template = {
.queuecommand = ufshcd_queuecommand,
.mq_poll = ufshcd_poll,
.slave_alloc = ufshcd_slave_alloc,
- .slave_configure = ufshcd_slave_configure,
+ .device_configure = ufshcd_device_configure,
.slave_destroy = ufshcd_slave_destroy,
.change_queue_depth = ufshcd_change_queue_depth,
.eh_abort_handler = ufshcd_abort,