summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-08-31 04:01:46 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2021-08-31 04:01:46 +0200
commit9a1d6c9e3f53732f2f48f4424e028642db616663 (patch)
tree5308c856a8b7a56b705cde940eb946ee88e1ca88 /drivers
parentMerge tag 'for-5.15/block-2021-08-30' of git://git.kernel.dk/linux-block (diff)
parentMerge tag 'floppy-for-5.15' of https://github.com/evdenis/linux-floppy into f... (diff)
downloadlinux-9a1d6c9e3f53732f2f48f4424e028642db616663.tar.xz
linux-9a1d6c9e3f53732f2f48f4424e028642db616663.zip
Merge tag 'for-5.15/drivers-2021-08-30' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe: "Sitting on top of the core block changes, here are the driver changes for the 5.15 merge window: - NVMe updates via Christoph: - suspend improvements for devices with an HMB (Keith Busch) - handle double completions more gacefull (Sagi Grimberg) - cleanup the selects for the nvme core code a bit (Sagi Grimberg) - don't update queue count when failing to set io queues (Ruozhu Li) - various nvmet connect fixes (Amit Engel) - cleanup lightnvm leftovers (Keith Busch, me) - small cleanups (Colin Ian King, Hou Pu) - add tracing for the Set Features command (Hou Pu) - CMB sysfs cleanups (Keith Busch) - add a mutex_destroy call (Keith Busch) - remove lightnvm subsystem. It's served its purpose and ultimately led to zoned nvme support, we no longer need it (Christoph) - revert floppy O_NDELAY fix (Denis) - nbd fixes (Hou, Pavel, Baokun) - nbd locking fixes (Tetsuo) - nbd device removal fixes (Christoph) - raid10 rcu warning fix (Xiao) - raid1 write behind fix (Guoqing) - rnbd fixes (Gioh, Md Haris) - misc fixes (Colin)" * tag 'for-5.15/drivers-2021-08-30' of git://git.kernel.dk/linux-block: (42 commits) Revert "floppy: reintroduce O_NDELAY fix" raid1: ensure write behind bio has less than BIO_MAX_VECS sectors md/raid10: Remove unnecessary rcu_dereference in raid10_handle_discard nbd: remove nbd->destroy_complete nbd: only return usable devices from nbd_find_unused nbd: set nbd->index before releasing nbd_index_mutex nbd: prevent IDR lookups from finding partially initialized devices nbd: reset NBD to NULL when restarting in nbd_genl_connect nbd: add missing locking to the nbd_dev_add error path nvme: remove the unused NVME_NS_* enum nvme: remove nvm_ndev from ns nvme: Have NVME_FABRICS select NVME_CORE instead of transport drivers block: nbd: add sanity check for first_minor nvmet: check that host sqsize does not exceed ctrl MQES nvmet: avoid duplicate qid in connect cmd nvmet: pass back cntlid on successful completion nvme-rdma: don't update queue count when failing to set io queues nvme-tcp: don't update queue count when failing to set io queues nvme-tcp: pair send_mutex init with destroy nvme: allow user toggling hmb usage ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/block/floppy.c30
-rw-r--r--drivers/block/nbd.c178
-rw-r--r--drivers/block/rnbd/rnbd-clt-sysfs.c33
-rw-r--r--drivers/block/rnbd/rnbd-clt.c2
-rw-r--r--drivers/block/rnbd/rnbd-srv-sysfs.c14
-rw-r--r--drivers/block/xen-blkfront.c1
-rw-r--r--drivers/lightnvm/Kconfig44
-rw-r--r--drivers/lightnvm/Makefile11
-rw-r--r--drivers/lightnvm/core.c1440
-rw-r--r--drivers/lightnvm/pblk-cache.c137
-rw-r--r--drivers/lightnvm/pblk-core.c2151
-rw-r--r--drivers/lightnvm/pblk-gc.c726
-rw-r--r--drivers/lightnvm/pblk-init.c1324
-rw-r--r--drivers/lightnvm/pblk-map.c210
-rw-r--r--drivers/lightnvm/pblk-rb.c858
-rw-r--r--drivers/lightnvm/pblk-read.c474
-rw-r--r--drivers/lightnvm/pblk-recovery.c874
-rw-r--r--drivers/lightnvm/pblk-rl.c254
-rw-r--r--drivers/lightnvm/pblk-sysfs.c728
-rw-r--r--drivers/lightnvm/pblk-trace.h145
-rw-r--r--drivers/lightnvm/pblk-write.c665
-rw-r--r--drivers/lightnvm/pblk.h1358
-rw-r--r--drivers/md/raid1.c19
-rw-r--r--drivers/md/raid10.c14
-rw-r--r--drivers/nvme/host/Kconfig4
-rw-r--r--drivers/nvme/host/Makefile1
-rw-r--r--drivers/nvme/host/core.c16
-rw-r--r--drivers/nvme/host/fabrics.c1
-rw-r--r--drivers/nvme/host/ioctl.c4
-rw-r--r--drivers/nvme/host/lightnvm.c1274
-rw-r--r--drivers/nvme/host/nvme.h79
-rw-r--r--drivers/nvme/host/pci.c187
-rw-r--r--drivers/nvme/host/rdma.c8
-rw-r--r--drivers/nvme/host/tcp.c44
-rw-r--r--drivers/nvme/host/trace.c18
-rw-r--r--drivers/nvme/target/Kconfig2
-rw-r--r--drivers/nvme/target/core.c1
-rw-r--r--drivers/nvme/target/fabrics-cmd.c38
-rw-r--r--drivers/nvme/target/loop.c4
-rw-r--r--drivers/nvme/target/trace.c18
-rw-r--r--drivers/nvme/target/zns.c5
43 files changed, 442 insertions, 12955 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 8bad63417a50..30d2db37cc87 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -51,8 +51,6 @@ source "drivers/net/Kconfig"
source "drivers/isdn/Kconfig"
-source "drivers/lightnvm/Kconfig"
-
# input before char - char/joystick depends on it. As does USB.
source "drivers/input/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 27c018bdf4de..be5d40ae1488 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -70,7 +70,6 @@ obj-$(CONFIG_FB_I810) += video/fbdev/i810/
obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
obj-$(CONFIG_PARPORT) += parport/
-obj-$(CONFIG_NVM) += lightnvm/
obj-y += base/ block/ misc/ mfd/ nfc/
obj-$(CONFIG_LIBNVDIMM) += nvdimm/
obj-$(CONFIG_DAX) += dax/
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 87460e0e5c72..fef79ea52e3e 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4029,23 +4029,23 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
if (fdc_state[FDC(drive)].rawcmd == 1)
fdc_state[FDC(drive)].rawcmd = 2;
- if (mode & (FMODE_READ|FMODE_WRITE)) {
- drive_state[drive].last_checked = 0;
- clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags);
- if (bdev_check_media_change(bdev))
- floppy_revalidate(bdev->bd_disk);
- if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags))
- goto out;
- if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags))
+ if (!(mode & FMODE_NDELAY)) {
+ if (mode & (FMODE_READ|FMODE_WRITE)) {
+ drive_state[drive].last_checked = 0;
+ clear_bit(FD_OPEN_SHOULD_FAIL_BIT,
+ &drive_state[drive].flags);
+ if (bdev_check_media_change(bdev))
+ floppy_revalidate(bdev->bd_disk);
+ if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags))
+ goto out;
+ if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags))
+ goto out;
+ }
+ res = -EROFS;
+ if ((mode & FMODE_WRITE) &&
+ !test_bit(FD_DISK_WRITABLE_BIT, &drive_state[drive].flags))
goto out;
}
-
- res = -EROFS;
-
- if ((mode & FMODE_WRITE) &&
- !test_bit(FD_DISK_WRITABLE_BIT, &drive_state[drive].flags))
- goto out;
-
mutex_unlock(&open_lock);
mutex_unlock(&floppy_mutex);
return 0;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 19f5d5a8b16a..5170a630778d 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -49,6 +49,7 @@
static DEFINE_IDR(nbd_index_idr);
static DEFINE_MUTEX(nbd_index_mutex);
+static struct workqueue_struct *nbd_del_wq;
static int nbd_total_devices = 0;
struct nbd_sock {
@@ -113,12 +114,12 @@ struct nbd_device {
struct mutex config_lock;
struct gendisk *disk;
struct workqueue_struct *recv_workq;
+ struct work_struct remove_work;
struct list_head list;
struct task_struct *task_recv;
struct task_struct *task_setup;
- struct completion *destroy_complete;
unsigned long flags;
char *backend;
@@ -237,32 +238,36 @@ static void nbd_dev_remove(struct nbd_device *nbd)
{
struct gendisk *disk = nbd->disk;
- if (disk) {
- del_gendisk(disk);
- blk_cleanup_disk(disk);
- blk_mq_free_tag_set(&nbd->tag_set);
- }
+ del_gendisk(disk);
+ blk_cleanup_disk(disk);
+ blk_mq_free_tag_set(&nbd->tag_set);
/*
- * Place this in the last just before the nbd is freed to
- * make sure that the disk and the related kobject are also
- * totally removed to avoid duplicate creation of the same
- * one.
+ * Remove from idr after del_gendisk() completes, so if the same ID is
+ * reused, the following add_disk() will succeed.
*/
- if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) && nbd->destroy_complete)
- complete(nbd->destroy_complete);
+ mutex_lock(&nbd_index_mutex);
+ idr_remove(&nbd_index_idr, nbd->index);
+ mutex_unlock(&nbd_index_mutex);
kfree(nbd);
}
+static void nbd_dev_remove_work(struct work_struct *work)
+{
+ nbd_dev_remove(container_of(work, struct nbd_device, remove_work));
+}
+
static void nbd_put(struct nbd_device *nbd)
{
- if (refcount_dec_and_mutex_lock(&nbd->refs,
- &nbd_index_mutex)) {
- idr_remove(&nbd_index_idr, nbd->index);
+ if (!refcount_dec_and_test(&nbd->refs))
+ return;
+
+ /* Call del_gendisk() asynchrounously to prevent deadlock */
+ if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags))
+ queue_work(nbd_del_wq, &nbd->remove_work);
+ else
nbd_dev_remove(nbd);
- mutex_unlock(&nbd_index_mutex);
- }
}
static int nbd_disconnected(struct nbd_config *config)
@@ -1388,6 +1393,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
unsigned int cmd, unsigned long arg)
{
struct nbd_config *config = nbd->config;
+ loff_t bytesize;
switch (cmd) {
case NBD_DISCONNECT:
@@ -1402,8 +1408,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
case NBD_SET_SIZE:
return nbd_set_size(nbd, arg, config->blksize);
case NBD_SET_SIZE_BLOCKS:
- return nbd_set_size(nbd, arg * config->blksize,
- config->blksize);
+ if (check_mul_overflow((loff_t)arg, config->blksize, &bytesize))
+ return -EINVAL;
+ return nbd_set_size(nbd, bytesize, config->blksize);
case NBD_SET_TIMEOUT:
nbd_set_cmd_timeout(nbd, arg);
return 0;
@@ -1665,7 +1672,7 @@ static const struct blk_mq_ops nbd_mq_ops = {
.timeout = nbd_xmit_timeout,
};
-static int nbd_dev_add(int index)
+static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
{
struct nbd_device *nbd;
struct gendisk *disk;
@@ -1683,13 +1690,14 @@ static int nbd_dev_add(int index)
nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
BLK_MQ_F_BLOCKING;
nbd->tag_set.driver_data = nbd;
- nbd->destroy_complete = NULL;
+ INIT_WORK(&nbd->remove_work, nbd_dev_remove_work);
nbd->backend = NULL;
err = blk_mq_alloc_tag_set(&nbd->tag_set);
if (err)
goto out_free_nbd;
+ mutex_lock(&nbd_index_mutex);
if (index >= 0) {
err = idr_alloc(&nbd_index_idr, nbd, index, index + 1,
GFP_KERNEL);
@@ -1700,9 +1708,10 @@ static int nbd_dev_add(int index)
if (err >= 0)
index = err;
}
+ nbd->index = index;
+ mutex_unlock(&nbd_index_mutex);
if (err < 0)
goto out_free_tags;
- nbd->index = index;
disk = blk_mq_alloc_disk(&nbd->tag_set, NULL);
if (IS_ERR(disk)) {
@@ -1726,38 +1735,65 @@ static int nbd_dev_add(int index)
mutex_init(&nbd->config_lock);
refcount_set(&nbd->config_refs, 0);
- refcount_set(&nbd->refs, 1);
+ /*
+ * Start out with a zero references to keep other threads from using
+ * this device until it is fully initialized.
+ */
+ refcount_set(&nbd->refs, 0);
INIT_LIST_HEAD(&nbd->list);
disk->major = NBD_MAJOR;
+
+ /* Too big first_minor can cause duplicate creation of
+ * sysfs files/links, since first_minor will be truncated to
+ * byte in __device_add_disk().
+ */
disk->first_minor = index << part_shift;
+ if (disk->first_minor > 0xff) {
+ err = -EINVAL;
+ goto out_free_idr;
+ }
+
disk->minors = 1 << part_shift;
disk->fops = &nbd_fops;
disk->private_data = nbd;
sprintf(disk->disk_name, "nbd%d", index);
add_disk(disk);
+
+ /*
+ * Now publish the device.
+ */
+ refcount_set(&nbd->refs, refs);
nbd_total_devices++;
- return index;
+ return nbd;
out_free_idr:
+ mutex_lock(&nbd_index_mutex);
idr_remove(&nbd_index_idr, index);
+ mutex_unlock(&nbd_index_mutex);
out_free_tags:
blk_mq_free_tag_set(&nbd->tag_set);
out_free_nbd:
kfree(nbd);
out:
- return err;
+ return ERR_PTR(err);
}
-static int find_free_cb(int id, void *ptr, void *data)
+static struct nbd_device *nbd_find_get_unused(void)
{
- struct nbd_device *nbd = ptr;
- struct nbd_device **found = data;
+ struct nbd_device *nbd;
+ int id;
- if (!refcount_read(&nbd->config_refs)) {
- *found = nbd;
- return 1;
+ lockdep_assert_held(&nbd_index_mutex);
+
+ idr_for_each_entry(&nbd_index_idr, nbd, id) {
+ if (refcount_read(&nbd->config_refs) ||
+ test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags))
+ continue;
+ if (refcount_inc_not_zero(&nbd->refs))
+ return nbd;
}
- return 0;
+
+ return NULL;
}
/* Netlink interface. */
@@ -1806,8 +1842,7 @@ static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
{
- DECLARE_COMPLETION_ONSTACK(destroy_complete);
- struct nbd_device *nbd = NULL;
+ struct nbd_device *nbd;
struct nbd_config *config;
int index = -1;
int ret;
@@ -1829,55 +1864,29 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
again:
mutex_lock(&nbd_index_mutex);
if (index == -1) {
- ret = idr_for_each(&nbd_index_idr, &find_free_cb, &nbd);
- if (ret == 0) {
- int new_index;
- new_index = nbd_dev_add(-1);
- if (new_index < 0) {
- mutex_unlock(&nbd_index_mutex);
- printk(KERN_ERR "nbd: failed to add new device\n");
- return new_index;
- }
- nbd = idr_find(&nbd_index_idr, new_index);
- }
+ nbd = nbd_find_get_unused();
} else {
nbd = idr_find(&nbd_index_idr, index);
- if (!nbd) {
- ret = nbd_dev_add(index);
- if (ret < 0) {
+ if (nbd) {
+ if ((test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
+ test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) ||
+ !refcount_inc_not_zero(&nbd->refs)) {
mutex_unlock(&nbd_index_mutex);
- printk(KERN_ERR "nbd: failed to add new device\n");
- return ret;
+ pr_err("nbd: device at index %d is going down\n",
+ index);
+ return -EINVAL;
}
- nbd = idr_find(&nbd_index_idr, index);
}
}
- if (!nbd) {
- printk(KERN_ERR "nbd: couldn't find device at index %d\n",
- index);
- mutex_unlock(&nbd_index_mutex);
- return -EINVAL;
- }
-
- if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
- test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) {
- nbd->destroy_complete = &destroy_complete;
- mutex_unlock(&nbd_index_mutex);
-
- /* Wait untill the the nbd stuff is totally destroyed */
- wait_for_completion(&destroy_complete);
- goto again;
- }
+ mutex_unlock(&nbd_index_mutex);
- if (!refcount_inc_not_zero(&nbd->refs)) {
- mutex_unlock(&nbd_index_mutex);
- if (index == -1)
- goto again;
- printk(KERN_ERR "nbd: device at index %d is going down\n",
- index);
- return -EINVAL;
+ if (!nbd) {
+ nbd = nbd_dev_add(index, 2);
+ if (IS_ERR(nbd)) {
+ pr_err("nbd: failed to add new device\n");
+ return PTR_ERR(nbd);
+ }
}
- mutex_unlock(&nbd_index_mutex);
mutex_lock(&nbd->config_lock);
if (refcount_read(&nbd->config_refs)) {
@@ -2424,16 +2433,21 @@ static int __init nbd_init(void)
if (register_blkdev(NBD_MAJOR, "nbd"))
return -EIO;
+ nbd_del_wq = alloc_workqueue("nbd-del", WQ_UNBOUND, 0);
+ if (!nbd_del_wq) {
+ unregister_blkdev(NBD_MAJOR, "nbd");
+ return -ENOMEM;
+ }
+
if (genl_register_family(&nbd_genl_family)) {
+ destroy_workqueue(nbd_del_wq);
unregister_blkdev(NBD_MAJOR, "nbd");
return -EINVAL;
}
nbd_dbg_init();
- mutex_lock(&nbd_index_mutex);
for (i = 0; i < nbds_max; i++)
- nbd_dev_add(i);
- mutex_unlock(&nbd_index_mutex);
+ nbd_dev_add(i, 1);
return 0;
}
@@ -2442,7 +2456,10 @@ static int nbd_exit_cb(int id, void *ptr, void *data)
struct list_head *list = (struct list_head *)data;
struct nbd_device *nbd = ptr;
- list_add_tail(&nbd->list, list);
+ /* Skip nbd that is being removed asynchronously */
+ if (refcount_read(&nbd->refs))
+ list_add_tail(&nbd->list, list);
+
return 0;
}
@@ -2465,6 +2482,9 @@ static void __exit nbd_cleanup(void)
nbd_put(nbd);
}
+ /* Also wait for nbd_dev_remove_work() completes */
+ destroy_workqueue(nbd_del_wq);
+
idr_destroy(&nbd_index_idr);
genl_unregister_family(&nbd_genl_family);
unregister_blkdev(NBD_MAJOR, "nbd");
diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c
index 324afdd63a96..4b93fd83bf79 100644
--- a/drivers/block/rnbd/rnbd-clt-sysfs.c
+++ b/drivers/block/rnbd/rnbd-clt-sysfs.c
@@ -227,17 +227,17 @@ static ssize_t state_show(struct kobject *kobj,
switch (dev->dev_state) {
case DEV_STATE_INIT:
- return snprintf(page, PAGE_SIZE, "init\n");
+ return sysfs_emit(page, "init\n");
case DEV_STATE_MAPPED:
/* TODO fix cli tool before changing to proper state */
- return snprintf(page, PAGE_SIZE, "open\n");
+ return sysfs_emit(page, "open\n");
case DEV_STATE_MAPPED_DISCONNECTED:
/* TODO fix cli tool before changing to proper state */
- return snprintf(page, PAGE_SIZE, "closed\n");
+ return sysfs_emit(page, "closed\n");
case DEV_STATE_UNMAPPED:
- return snprintf(page, PAGE_SIZE, "unmapped\n");
+ return sysfs_emit(page, "unmapped\n");
default:
- return snprintf(page, PAGE_SIZE, "unknown\n");
+ return sysfs_emit(page, "unknown\n");
}
}
@@ -263,7 +263,7 @@ static ssize_t mapping_path_show(struct kobject *kobj,
dev = container_of(kobj, struct rnbd_clt_dev, kobj);
- return scnprintf(page, PAGE_SIZE, "%s\n", dev->pathname);
+ return sysfs_emit(page, "%s\n", dev->pathname);
}
static struct kobj_attribute rnbd_clt_mapping_path_attr =
@@ -276,8 +276,7 @@ static ssize_t access_mode_show(struct kobject *kobj,
dev = container_of(kobj, struct rnbd_clt_dev, kobj);
- return snprintf(page, PAGE_SIZE, "%s\n",
- rnbd_access_mode_str(dev->access_mode));
+ return sysfs_emit(page, "%s\n", rnbd_access_mode_str(dev->access_mode));
}
static struct kobj_attribute rnbd_clt_access_mode =
@@ -286,8 +285,8 @@ static struct kobj_attribute rnbd_clt_access_mode =
static ssize_t rnbd_clt_unmap_dev_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return scnprintf(page, PAGE_SIZE, "Usage: echo <normal|force> > %s\n",
- attr->attr.name);
+ return sysfs_emit(page, "Usage: echo <normal|force> > %s\n",
+ attr->attr.name);
}
static ssize_t rnbd_clt_unmap_dev_store(struct kobject *kobj,
@@ -357,9 +356,8 @@ static ssize_t rnbd_clt_resize_dev_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *page)
{
- return scnprintf(page, PAGE_SIZE,
- "Usage: echo <new size in sectors> > %s\n",
- attr->attr.name);
+ return sysfs_emit(page, "Usage: echo <new size in sectors> > %s\n",
+ attr->attr.name);
}
static ssize_t rnbd_clt_resize_dev_store(struct kobject *kobj,
@@ -390,8 +388,7 @@ static struct kobj_attribute rnbd_clt_resize_dev_attr =
static ssize_t rnbd_clt_remap_dev_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return scnprintf(page, PAGE_SIZE, "Usage: echo <1> > %s\n",
- attr->attr.name);
+ return sysfs_emit(page, "Usage: echo <1> > %s\n", attr->attr.name);
}
static ssize_t rnbd_clt_remap_dev_store(struct kobject *kobj,
@@ -436,7 +433,7 @@ static ssize_t session_show(struct kobject *kobj, struct kobj_attribute *attr,
dev = container_of(kobj, struct rnbd_clt_dev, kobj);
- return scnprintf(page, PAGE_SIZE, "%s\n", dev->sess->sessname);
+ return sysfs_emit(page, "%s\n", dev->sess->sessname);
}
static struct kobj_attribute rnbd_clt_session_attr =
@@ -499,8 +496,8 @@ static ssize_t rnbd_clt_map_device_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *page)
{
- return scnprintf(page, PAGE_SIZE,
- "Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>] [nr_poll_queues=<number of queues>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n",
+ return sysfs_emit(page,
+ "Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>] [nr_poll_queues=<number of queues>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n",
attr->attr.name);
}
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index e9cc413495f0..bd4a41afbbfc 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -271,7 +271,7 @@ unlock:
*/
if (cpu_q)
*cpup = cpu_q->cpu;
- put_cpu_var(sess->cpu_rr);
+ put_cpu_ptr(sess->cpu_rr);
if (q)
rnbd_clt_dev_requeue(q);
diff --git a/drivers/block/rnbd/rnbd-srv-sysfs.c b/drivers/block/rnbd/rnbd-srv-sysfs.c
index acf5fced11ef..4db98e0e76f0 100644
--- a/drivers/block/rnbd/rnbd-srv-sysfs.c
+++ b/drivers/block/rnbd/rnbd-srv-sysfs.c
@@ -90,8 +90,8 @@ static ssize_t read_only_show(struct kobject *kobj, struct kobj_attribute *attr,
sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj);
- return scnprintf(page, PAGE_SIZE, "%d\n",
- !(sess_dev->open_flags & FMODE_WRITE));
+ return sysfs_emit(page, "%d\n",
+ !(sess_dev->open_flags & FMODE_WRITE));
}
static struct kobj_attribute rnbd_srv_dev_session_ro_attr =
@@ -105,8 +105,8 @@ static ssize_t access_mode_show(struct kobject *kobj,
sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj);
- return scnprintf(page, PAGE_SIZE, "%s\n",
- rnbd_access_mode_str(sess_dev->access_mode));
+ return sysfs_emit(page, "%s\n",
+ rnbd_access_mode_str(sess_dev->access_mode));
}
static struct kobj_attribute rnbd_srv_dev_session_access_mode_attr =
@@ -119,7 +119,7 @@ static ssize_t mapping_path_show(struct kobject *kobj,
sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj);
- return scnprintf(page, PAGE_SIZE, "%s\n", sess_dev->pathname);
+ return sysfs_emit(page, "%s\n", sess_dev->pathname);
}
static struct kobj_attribute rnbd_srv_dev_session_mapping_path_attr =
@@ -128,8 +128,8 @@ static struct kobj_attribute rnbd_srv_dev_session_mapping_path_attr =
static ssize_t rnbd_srv_dev_session_force_close_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n",
- attr->attr.name);
+ return sysfs_emit(page, "Usage: echo 1 > %s\n",
+ attr->attr.name);
}
static ssize_t rnbd_srv_dev_session_force_close_store(struct kobject *kobj,
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index d83fee21f6c5..715bfa8aca7f 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1092,7 +1092,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
err = xlbd_reserve_minors(minor, nr_minors);
if (err)
return err;
- err = -ENODEV;
memset(&info->tag_set, 0, sizeof(info->tag_set));
info->tag_set.ops = &blkfront_mq_ops;
diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig
deleted file mode 100644
index 04caa0f2d445..000000000000
--- a/drivers/lightnvm/Kconfig
+++ /dev/null
@@ -1,44 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Open-Channel SSD NVM configuration
-#
-
-menuconfig NVM
- bool "Open-Channel SSD target support (DEPRECATED)"
- depends on BLOCK
- help
- Say Y here to get to enable Open-channel SSDs.
-
- Open-Channel SSDs implement a set of extension to SSDs, that
- exposes direct access to the underlying non-volatile memory.
-
- If you say N, all options in this submenu will be skipped and disabled
- only do this if you know what you are doing.
-
- This code is deprecated and will be removed in Linux 5.15.
-
-if NVM
-
-config NVM_PBLK
- tristate "Physical Block Device Open-Channel SSD target"
- select CRC32
- help
- Allows an open-channel SSD to be exposed as a block device to the
- host. The target assumes the device exposes raw flash and must be
- explicitly managed by the host.
-
- Please note the disk format is considered EXPERIMENTAL for now.
-
-if NVM_PBLK
-
-config NVM_PBLK_DEBUG
- bool "PBlk Debug Support"
- default n
- help
- Enables debug support for pblk. This includes extra checks, more
- vocal error messages, and extra tracking fields in the pblk sysfs
- entries.
-
-endif # NVM_PBLK_DEBUG
-
-endif # NVM
diff --git a/drivers/lightnvm/Makefile b/drivers/lightnvm/Makefile
deleted file mode 100644
index 97d9d7c71550..000000000000
--- a/drivers/lightnvm/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for Open-Channel SSDs.
-#
-
-obj-$(CONFIG_NVM) := core.o
-obj-$(CONFIG_NVM_PBLK) += pblk.o
-pblk-y := pblk-init.o pblk-core.o pblk-rb.o \
- pblk-write.o pblk-cache.o pblk-read.o \
- pblk-gc.o pblk-recovery.o pblk-map.o \
- pblk-rl.o pblk-sysfs.o
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
deleted file mode 100644
index cf8a75494833..000000000000
--- a/drivers/lightnvm/core.c
+++ /dev/null
@@ -1,1440 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2015 IT University of Copenhagen. All rights reserved.
- * Initial release: Matias Bjorling <m@bjorling.me>
- */
-
-#define pr_fmt(fmt) "nvm: " fmt
-
-#include <linux/list.h>
-#include <linux/types.h>
-#include <linux/sem.h>
-#include <linux/bitmap.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/miscdevice.h>
-#include <linux/lightnvm.h>
-#include <linux/sched/sysctl.h>
-
-static LIST_HEAD(nvm_tgt_types);
-static DECLARE_RWSEM(nvm_tgtt_lock);
-static LIST_HEAD(nvm_devices);
-static DECLARE_RWSEM(nvm_lock);
-
-/* Map between virtual and physical channel and lun */
-struct nvm_ch_map {
- int ch_off;
- int num_lun;
- int *lun_offs;
-};
-
-struct nvm_dev_map {
- struct nvm_ch_map *chnls;
- int num_ch;
-};
-
-static void nvm_free(struct kref *ref);
-
-static struct nvm_target *nvm_find_target(struct nvm_dev *dev, const char *name)
-{
- struct nvm_target *tgt;
-
- list_for_each_entry(tgt, &dev->targets, list)
- if (!strcmp(name, tgt->disk->disk_name))
- return tgt;
-
- return NULL;
-}
-
-static bool nvm_target_exists(const char *name)
-{
- struct nvm_dev *dev;
- struct nvm_target *tgt;
- bool ret = false;
-
- down_write(&nvm_lock);
- list_for_each_entry(dev, &nvm_devices, devices) {
- mutex_lock(&dev->mlock);
- list_for_each_entry(tgt, &dev->targets, list) {
- if (!strcmp(name, tgt->disk->disk_name)) {
- ret = true;
- mutex_unlock(&dev->mlock);
- goto out;
- }
- }
- mutex_unlock(&dev->mlock);
- }
-
-out:
- up_write(&nvm_lock);
- return ret;
-}
-
-static int nvm_reserve_luns(struct nvm_dev *dev, int lun_begin, int lun_end)
-{
- int i;
-
- for (i = lun_begin; i <= lun_end; i++) {
- if (test_and_set_bit(i, dev->lun_map)) {
- pr_err("lun %d already allocated\n", i);
- goto err;
- }
- }
-
- return 0;
-err:
- while (--i >= lun_begin)
- clear_bit(i, dev->lun_map);
-
- return -EBUSY;
-}
-
-static void nvm_release_luns_err(struct nvm_dev *dev, int lun_begin,
- int lun_end)
-{
- int i;
-
- for (i = lun_begin; i <= lun_end; i++)
- WARN_ON(!test_and_clear_bit(i, dev->lun_map));
-}
-
-static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev, int clear)
-{
- struct nvm_dev *dev = tgt_dev->parent;
- struct nvm_dev_map *dev_map = tgt_dev->map;
- int i, j;
-
- for (i = 0; i < dev_map->num_ch; i++) {
- struct nvm_ch_map *ch_map = &dev_map->chnls[i];
- int *lun_offs = ch_map->lun_offs;
- int ch = i + ch_map->ch_off;
-
- if (clear) {
- for (j = 0; j < ch_map->num_lun; j++) {
- int lun = j + lun_offs[j];
- int lunid = (ch * dev->geo.num_lun) + lun;
-
- WARN_ON(!test_and_clear_bit(lunid,
- dev->lun_map));
- }
- }
-
- kfree(ch_map->lun_offs);
- }
-
- kfree(dev_map->chnls);
- kfree(dev_map);
-
- kfree(tgt_dev->luns);
- kfree(tgt_dev);
-}
-
-static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev,
- u16 lun_begin, u16 lun_end,
- u16 op)
-{
- struct nvm_tgt_dev *tgt_dev = NULL;
- struct nvm_dev_map *dev_rmap = dev->rmap;
- struct nvm_dev_map *dev_map;
- struct ppa_addr *luns;
- int num_lun = lun_end - lun_begin + 1;
- int luns_left = num_lun;
- int num_ch = num_lun / dev->geo.num_lun;
- int num_ch_mod = num_lun % dev->geo.num_lun;
- int bch = lun_begin / dev->geo.num_lun;
- int blun = lun_begin % dev->geo.num_lun;
- int lunid = 0;
- int lun_balanced = 1;
- int sec_per_lun, prev_num_lun;
- int i, j;
-
- num_ch = (num_ch_mod == 0) ? num_ch : num_ch + 1;
-
- dev_map = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL);
- if (!dev_map)
- goto err_dev;
-
- dev_map->chnls = kcalloc(num_ch, sizeof(struct nvm_ch_map), GFP_KERNEL);
- if (!dev_map->chnls)
- goto err_chnls;
-
- luns = kcalloc(num_lun, sizeof(struct ppa_addr), GFP_KERNEL);
- if (!luns)
- goto err_luns;
-
- prev_num_lun = (luns_left > dev->geo.num_lun) ?
- dev->geo.num_lun : luns_left;
- for (i = 0; i < num_ch; i++) {
- struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[i + bch];
- int *lun_roffs = ch_rmap->lun_offs;
- struct nvm_ch_map *ch_map = &dev_map->chnls[i];
- int *lun_offs;
- int luns_in_chnl = (luns_left > dev->geo.num_lun) ?
- dev->geo.num_lun : luns_left;
-
- if (lun_balanced && prev_num_lun != luns_in_chnl)
- lun_balanced = 0;
-
- ch_map->ch_off = ch_rmap->ch_off = bch;
- ch_map->num_lun = luns_in_chnl;
-
- lun_offs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL);
- if (!lun_offs)
- goto err_ch;
-
- for (j = 0; j < luns_in_chnl; j++) {
- luns[lunid].ppa = 0;
- luns[lunid].a.ch = i;
- luns[lunid++].a.lun = j;
-
- lun_offs[j] = blun;
- lun_roffs[j + blun] = blun;
- }
-
- ch_map->lun_offs = lun_offs;
-
- /* when starting a new channel, lun offset is reset */
- blun = 0;
- luns_left -= luns_in_chnl;
- }
-
- dev_map->num_ch = num_ch;
-
- tgt_dev = kmalloc(sizeof(struct nvm_tgt_dev), GFP_KERNEL);
- if (!tgt_dev)
- goto err_ch;
-
- /* Inherit device geometry from parent */
- memcpy(&tgt_dev->geo, &dev->geo, sizeof(struct nvm_geo));
-
- /* Target device only owns a portion of the physical device */
- tgt_dev->geo.num_ch = num_ch;
- tgt_dev->geo.num_lun = (lun_balanced) ? prev_num_lun : -1;
- tgt_dev->geo.all_luns = num_lun;
- tgt_dev->geo.all_chunks = num_lun * dev->geo.num_chk;
-
- tgt_dev->geo.op = op;
-
- sec_per_lun = dev->geo.clba * dev->geo.num_chk;
- tgt_dev->geo.total_secs = num_lun * sec_per_lun;
-
- tgt_dev->q = dev->q;
- tgt_dev->map = dev_map;
- tgt_dev->luns = luns;
- tgt_dev->parent = dev;
-
- return tgt_dev;
-err_ch:
- while (--i >= 0)
- kfree(dev_map->chnls[i].lun_offs);
- kfree(luns);
-err_luns:
- kfree(dev_map->chnls);
-err_chnls:
- kfree(dev_map);
-err_dev:
- return tgt_dev;
-}
-
-static struct nvm_tgt_type *__nvm_find_target_type(const char *name)
-{
- struct nvm_tgt_type *tt;
-
- list_for_each_entry(tt, &nvm_tgt_types, list)
- if (!strcmp(name, tt->name))
- return tt;
-
- return NULL;
-}
-
-static struct nvm_tgt_type *nvm_find_target_type(const char *name)
-{
- struct nvm_tgt_type *tt;
-
- down_write(&nvm_tgtt_lock);
- tt = __nvm_find_target_type(name);
- up_write(&nvm_tgtt_lock);
-
- return tt;
-}
-
-static int nvm_config_check_luns(struct nvm_geo *geo, int lun_begin,
- int lun_end)
-{
- if (lun_begin > lun_end || lun_end >= geo->all_luns) {
- pr_err("lun out of bound (%u:%u > %u)\n",
- lun_begin, lun_end, geo->all_luns - 1);
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int __nvm_config_simple(struct nvm_dev *dev,
- struct nvm_ioctl_create_simple *s)
-{
- struct nvm_geo *geo = &dev->geo;
-
- if (s->lun_begin == -1 && s->lun_end == -1) {
- s->lun_begin = 0;
- s->lun_end = geo->all_luns - 1;
- }
-
- return nvm_config_check_luns(geo, s->lun_begin, s->lun_end);
-}
-
-static int __nvm_config_extended(struct nvm_dev *dev,
- struct nvm_ioctl_create_extended *e)
-{
- if (e->lun_begin == 0xFFFF && e->lun_end == 0xFFFF) {
- e->lun_begin = 0;
- e->lun_end = dev->geo.all_luns - 1;
- }
-
- /* op not set falls into target's default */
- if (e->op == 0xFFFF) {
- e->op = NVM_TARGET_DEFAULT_OP;
- } else if (e->op < NVM_TARGET_MIN_OP || e->op > NVM_TARGET_MAX_OP) {
- pr_err("invalid over provisioning value\n");
- return -EINVAL;
- }
-
- return nvm_config_check_luns(&dev->geo, e->lun_begin, e->lun_end);
-}
-
-static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
-{
- struct nvm_ioctl_create_extended e;
- struct gendisk *tdisk;
- struct nvm_tgt_type *tt;
- struct nvm_target *t;
- struct nvm_tgt_dev *tgt_dev;
- void *targetdata;
- unsigned int mdts;
- int ret;
-
- switch (create->conf.type) {
- case NVM_CONFIG_TYPE_SIMPLE:
- ret = __nvm_config_simple(dev, &create->conf.s);
- if (ret)
- return ret;
-
- e.lun_begin = create->conf.s.lun_begin;
- e.lun_end = create->conf.s.lun_end;
- e.op = NVM_TARGET_DEFAULT_OP;
- break;
- case NVM_CONFIG_TYPE_EXTENDED:
- ret = __nvm_config_extended(dev, &create->conf.e);
- if (ret)
- return ret;
-
- e = create->conf.e;
- break;
- default:
- pr_err("config type not valid\n");
- return -EINVAL;
- }
-
- tt = nvm_find_target_type(create->tgttype);
- if (!tt) {
- pr_err("target type %s not found\n", create->tgttype);
- return -EINVAL;
- }
-
- if ((tt->flags & NVM_TGT_F_HOST_L2P) != (dev->geo.dom & NVM_RSP_L2P)) {
- pr_err("device is incompatible with target L2P type.\n");
- return -EINVAL;
- }
-
- if (nvm_target_exists(create->tgtname)) {
- pr_err("target name already exists (%s)\n",
- create->tgtname);
- return -EINVAL;
- }
-
- ret = nvm_reserve_luns(dev, e.lun_begin, e.lun_end);
- if (ret)
- return ret;
-
- t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL);
- if (!t) {
- ret = -ENOMEM;
- goto err_reserve;
- }
-
- tgt_dev = nvm_create_tgt_dev(dev, e.lun_begin, e.lun_end, e.op);
- if (!tgt_dev) {
- pr_err("could not create target device\n");
- ret = -ENOMEM;
- goto err_t;
- }
-
- tdisk = blk_alloc_disk(dev->q->node);
- if (!tdisk) {
- ret = -ENOMEM;
- goto err_dev;
- }
-
- strlcpy(tdisk->disk_name, create->tgtname, sizeof(tdisk->disk_name));
- tdisk->major = 0;
- tdisk->first_minor = 0;
- tdisk->fops = tt->bops;
-
- targetdata = tt->init(tgt_dev, tdisk, create->flags);
- if (IS_ERR(targetdata)) {
- ret = PTR_ERR(targetdata);
- goto err_init;
- }
-
- tdisk->private_data = targetdata;
- tdisk->queue->queuedata = targetdata;
-
- mdts = (dev->geo.csecs >> 9) * NVM_MAX_VLBA;
- if (dev->geo.mdts) {
- mdts = min_t(u32, dev->geo.mdts,
- (dev->geo.csecs >> 9) * NVM_MAX_VLBA);
- }
- blk_queue_max_hw_sectors(tdisk->queue, mdts);
-
- set_capacity(tdisk, tt->capacity(targetdata));
- add_disk(tdisk);
-
- if (tt->sysfs_init && tt->sysfs_init(tdisk)) {
- ret = -ENOMEM;
- goto err_sysfs;
- }
-
- t->type = tt;
- t->disk = tdisk;
- t->dev = tgt_dev;
-
- mutex_lock(&dev->mlock);
- list_add_tail(&t->list, &dev->targets);
- mutex_unlock(&dev->mlock);
-
- __module_get(tt->owner);
-
- return 0;
-err_sysfs:
- if (tt->exit)
- tt->exit(targetdata, true);
-err_init:
- blk_cleanup_disk(tdisk);
-err_dev:
- nvm_remove_tgt_dev(tgt_dev, 0);
-err_t:
- kfree(t);
-err_reserve:
- nvm_release_luns_err(dev, e.lun_begin, e.lun_end);
- return ret;
-}
-
-static void __nvm_remove_target(struct nvm_target *t, bool graceful)
-{
- struct nvm_tgt_type *tt = t->type;
- struct gendisk *tdisk = t->disk;
-
- del_gendisk(tdisk);
-
- if (tt->sysfs_exit)
- tt->sysfs_exit(tdisk);
-
- if (tt->exit)
- tt->exit(tdisk->private_data, graceful);
-
- nvm_remove_tgt_dev(t->dev, 1);
- blk_cleanup_disk(tdisk);
- module_put(t->type->owner);
-
- list_del(&t->list);
- kfree(t);
-}
-
-/**
- * nvm_remove_tgt - Removes a target from the media manager
- * @remove: ioctl structure with target name to remove.
- *
- * Returns:
- * 0: on success
- * 1: on not found
- * <0: on error
- */
-static int nvm_remove_tgt(struct nvm_ioctl_remove *remove)
-{
- struct nvm_target *t = NULL;
- struct nvm_dev *dev;
-
- down_read(&nvm_lock);
- list_for_each_entry(dev, &nvm_devices, devices) {
- mutex_lock(&dev->mlock);
- t = nvm_find_target(dev, remove->tgtname);
- if (t) {
- mutex_unlock(&dev->mlock);
- break;
- }
- mutex_unlock(&dev->mlock);
- }
- up_read(&nvm_lock);
-
- if (!t) {
- pr_err("failed to remove target %s\n",
- remove->tgtname);
- return 1;
- }
-
- __nvm_remove_target(t, true);
- kref_put(&dev->ref, nvm_free);
-
- return 0;
-}
-
-static int nvm_register_map(struct nvm_dev *dev)
-{
- struct nvm_dev_map *rmap;
- int i, j;
-
- rmap = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL);
- if (!rmap)
- goto err_rmap;
-
- rmap->chnls = kcalloc(dev->geo.num_ch, sizeof(struct nvm_ch_map),
- GFP_KERNEL);
- if (!rmap->chnls)
- goto err_chnls;
-
- for (i = 0; i < dev->geo.num_ch; i++) {
- struct nvm_ch_map *ch_rmap;
- int *lun_roffs;
- int luns_in_chnl = dev->geo.num_lun;
-
- ch_rmap = &rmap->chnls[i];
-
- ch_rmap->ch_off = -1;
- ch_rmap->num_lun = luns_in_chnl;
-
- lun_roffs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL);
- if (!lun_roffs)
- goto err_ch;
-
- for (j = 0; j < luns_in_chnl; j++)
- lun_roffs[j] = -1;
-
- ch_rmap->lun_offs = lun_roffs;
- }
-
- dev->rmap = rmap;
-
- return 0;
-err_ch:
- while (--i >= 0)
- kfree(rmap->chnls[i].lun_offs);
-err_chnls:
- kfree(rmap);
-err_rmap:
- return -ENOMEM;
-}
-
-static void nvm_unregister_map(struct nvm_dev *dev)
-{
- struct nvm_dev_map *rmap = dev->rmap;
- int i;
-
- for (i = 0; i < dev->geo.num_ch; i++)
- kfree(rmap->chnls[i].lun_offs);
-
- kfree(rmap->chnls);
- kfree(rmap);
-}
-
-static void nvm_map_to_dev(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p)
-{
- struct nvm_dev_map *dev_map = tgt_dev->map;
- struct nvm_ch_map *ch_map = &dev_map->chnls[p->a.ch];
- int lun_off = ch_map->lun_offs[p->a.lun];
-
- p->a.ch += ch_map->ch_off;
- p->a.lun += lun_off;
-}
-
-static void nvm_map_to_tgt(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p)
-{
- struct nvm_dev *dev = tgt_dev->parent;
- struct nvm_dev_map *dev_rmap = dev->rmap;
- struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[p->a.ch];
- int lun_roff = ch_rmap->lun_offs[p->a.lun];
-
- p->a.ch -= ch_rmap->ch_off;
- p->a.lun -= lun_roff;
-}
-
-static void nvm_ppa_tgt_to_dev(struct nvm_tgt_dev *tgt_dev,
- struct ppa_addr *ppa_list, int nr_ppas)
-{
- int i;
-
- for (i = 0; i < nr_ppas; i++) {
- nvm_map_to_dev(tgt_dev, &ppa_list[i]);
- ppa_list[i] = generic_to_dev_addr(tgt_dev->parent, ppa_list[i]);
- }
-}
-
-static void nvm_ppa_dev_to_tgt(struct nvm_tgt_dev *tgt_dev,
- struct ppa_addr *ppa_list, int nr_ppas)
-{
- int i;
-
- for (i = 0; i < nr_ppas; i++) {
- ppa_list[i] = dev_to_generic_addr(tgt_dev->parent, ppa_list[i]);
- nvm_map_to_tgt(tgt_dev, &ppa_list[i]);
- }
-}
-
-static void nvm_rq_tgt_to_dev(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
-{
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
-
- nvm_ppa_tgt_to_dev(tgt_dev, ppa_list, rqd->nr_ppas);
-}
-
-static void nvm_rq_dev_to_tgt(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
-{
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
-
- nvm_ppa_dev_to_tgt(tgt_dev, ppa_list, rqd->nr_ppas);
-}
-
-int nvm_register_tgt_type(struct nvm_tgt_type *tt)
-{
- int ret = 0;
-
- down_write(&nvm_tgtt_lock);
- if (__nvm_find_target_type(tt->name))
- ret = -EEXIST;
- else
- list_add(&tt->list, &nvm_tgt_types);
- up_write(&nvm_tgtt_lock);
-
- return ret;
-}
-EXPORT_SYMBOL(nvm_register_tgt_type);
-
-void nvm_unregister_tgt_type(struct nvm_tgt_type *tt)
-{
- if (!tt)
- return;
-
- down_write(&nvm_tgtt_lock);
- list_del(&tt->list);
- up_write(&nvm_tgtt_lock);
-}
-EXPORT_SYMBOL(nvm_unregister_tgt_type);
-
-void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags,
- dma_addr_t *dma_handler)
-{
- return dev->ops->dev_dma_alloc(dev, dev->dma_pool, mem_flags,
- dma_handler);
-}
-EXPORT_SYMBOL(nvm_dev_dma_alloc);
-
-void nvm_dev_dma_free(struct nvm_dev *dev, void *addr, dma_addr_t dma_handler)
-{
- dev->ops->dev_dma_free(dev->dma_pool, addr, dma_handler);
-}
-EXPORT_SYMBOL(nvm_dev_dma_free);
-
-static struct nvm_dev *nvm_find_nvm_dev(const char *name)
-{
- struct nvm_dev *dev;
-
- list_for_each_entry(dev, &nvm_devices, devices)
- if (!strcmp(name, dev->name))
- return dev;
-
- return NULL;
-}
-
-static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
- const struct ppa_addr *ppas, int nr_ppas)
-{
- struct nvm_dev *dev = tgt_dev->parent;
- struct nvm_geo *geo = &tgt_dev->geo;
- int i, plane_cnt, pl_idx;
- struct ppa_addr ppa;
-
- if (geo->pln_mode == NVM_PLANE_SINGLE && nr_ppas == 1) {
- rqd->nr_ppas = nr_ppas;
- rqd->ppa_addr = ppas[0];
-
- return 0;
- }
-
- rqd->nr_ppas = nr_ppas;
- rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list);
- if (!rqd->ppa_list) {
- pr_err("failed to allocate dma memory\n");
- return -ENOMEM;
- }
-
- plane_cnt = geo->pln_mode;
- rqd->nr_ppas *= plane_cnt;
-
- for (i = 0; i < nr_ppas; i++) {
- for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) {
- ppa = ppas[i];
- ppa.g.pl = pl_idx;
- rqd->ppa_list[(pl_idx * nr_ppas) + i] = ppa;
- }
- }
-
- return 0;
-}
-
-static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev,
- struct nvm_rq *rqd)
-{
- if (!rqd->ppa_list)
- return;
-
- nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
-}
-
-static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd)
-{
- int flags = 0;
-
- if (geo->version == NVM_OCSSD_SPEC_20)
- return 0;
-
- if (rqd->is_seq)
- flags |= geo->pln_mode >> 1;
-
- if (rqd->opcode == NVM_OP_PREAD)
- flags |= (NVM_IO_SCRAMBLE_ENABLE | NVM_IO_SUSPEND);
- else if (rqd->opcode == NVM_OP_PWRITE)
- flags |= NVM_IO_SCRAMBLE_ENABLE;
-
- return flags;
-}
-
-int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd, void *buf)
-{
- struct nvm_dev *dev = tgt_dev->parent;
- int ret;
-
- if (!dev->ops->submit_io)
- return -ENODEV;
-
- nvm_rq_tgt_to_dev(tgt_dev, rqd);
-
- rqd->dev = tgt_dev;
- rqd->flags = nvm_set_flags(&tgt_dev->geo, rqd);
-
- /* In case of error, fail with right address format */
- ret = dev->ops->submit_io(dev, rqd, buf);
- if (ret)
- nvm_rq_dev_to_tgt(tgt_dev, rqd);
- return ret;
-}
-EXPORT_SYMBOL(nvm_submit_io);
-
-static void nvm_sync_end_io(struct nvm_rq *rqd)
-{
- struct completion *waiting = rqd->private;
-
- complete(waiting);
-}
-
-static int nvm_submit_io_wait(struct nvm_dev *dev, struct nvm_rq *rqd,
- void *buf)
-{
- DECLARE_COMPLETION_ONSTACK(wait);
- int ret = 0;
-
- rqd->end_io = nvm_sync_end_io;
- rqd->private = &wait;
-
- ret = dev->ops->submit_io(dev, rqd, buf);
- if (ret)
- return ret;
-
- wait_for_completion_io(&wait);
-
- return 0;
-}
-
-int nvm_submit_io_sync(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
- void *buf)
-{
- struct nvm_dev *dev = tgt_dev->parent;
- int ret;
-
- if (!dev->ops->submit_io)
- return -ENODEV;
-
- nvm_rq_tgt_to_dev(tgt_dev, rqd);
-
- rqd->dev = tgt_dev;
- rqd->flags = nvm_set_flags(&tgt_dev->geo, rqd);
-
- ret = nvm_submit_io_wait(dev, rqd, buf);
-
- return ret;
-}
-EXPORT_SYMBOL(nvm_submit_io_sync);
-
-void nvm_end_io(struct nvm_rq *rqd)
-{
- struct nvm_tgt_dev *tgt_dev = rqd->dev;
-
- /* Convert address space */
- if (tgt_dev)
- nvm_rq_dev_to_tgt(tgt_dev, rqd);
-
- if (rqd->end_io)
- rqd->end_io(rqd);
-}
-EXPORT_SYMBOL(nvm_end_io);
-
-static int nvm_submit_io_sync_raw(struct nvm_dev *dev, struct nvm_rq *rqd)
-{
- if (!dev->ops->submit_io)
- return -ENODEV;
-
- rqd->dev = NULL;
- rqd->flags = nvm_set_flags(&dev->geo, rqd);
-
- return nvm_submit_io_wait(dev, rqd, NULL);
-}
-
-static int nvm_bb_chunk_sense(struct nvm_dev *dev, struct ppa_addr ppa)
-{
- struct nvm_rq rqd = { NULL };
- struct bio bio;
- struct bio_vec bio_vec;
- struct page *page;
- int ret;
-
- page = alloc_page(GFP_KERNEL);
- if (!page)
- return -ENOMEM;
-
- bio_init(&bio, &bio_vec, 1);
- bio_add_page(&bio, page, PAGE_SIZE, 0);
- bio_set_op_attrs(&bio, REQ_OP_READ, 0);
-
- rqd.bio = &bio;
- rqd.opcode = NVM_OP_PREAD;
- rqd.is_seq = 1;
- rqd.nr_ppas = 1;
- rqd.ppa_addr = generic_to_dev_addr(dev, ppa);
-
- ret = nvm_submit_io_sync_raw(dev, &rqd);
- __free_page(page);
- if (ret)
- return ret;
-
- return rqd.error;
-}
-
-/*
- * Scans a 1.2 chunk first and last page to determine if its state.
- * If the chunk is found to be open, also scan it to update the write
- * pointer.
- */
-static int nvm_bb_chunk_scan(struct nvm_dev *dev, struct ppa_addr ppa,
- struct nvm_chk_meta *meta)
-{
- struct nvm_geo *geo = &dev->geo;
- int ret, pg, pl;
-
- /* sense first page */
- ret = nvm_bb_chunk_sense(dev, ppa);
- if (ret < 0) /* io error */
- return ret;
- else if (ret == 0) /* valid data */
- meta->state = NVM_CHK_ST_OPEN;
- else if (ret > 0) {
- /*
- * If empty page, the chunk is free, else it is an
- * actual io error. In that case, mark it offline.
- */
- switch (ret) {
- case NVM_RSP_ERR_EMPTYPAGE:
- meta->state = NVM_CHK_ST_FREE;
- return 0;
- case NVM_RSP_ERR_FAILCRC:
- case NVM_RSP_ERR_FAILECC:
- case NVM_RSP_WARN_HIGHECC:
- meta->state = NVM_CHK_ST_OPEN;
- goto scan;
- default:
- return -ret; /* other io error */
- }
- }
-
- /* sense last page */
- ppa.g.pg = geo->num_pg - 1;
- ppa.g.pl = geo->num_pln - 1;
-
- ret = nvm_bb_chunk_sense(dev, ppa);
- if (ret < 0) /* io error */
- return ret;
- else if (ret == 0) { /* Chunk fully written */
- meta->state = NVM_CHK_ST_CLOSED;
- meta->wp = geo->clba;
- return 0;
- } else if (ret > 0) {
- switch (ret) {
- case NVM_RSP_ERR_EMPTYPAGE:
- case NVM_RSP_ERR_FAILCRC:
- case NVM_RSP_ERR_FAILECC:
- case NVM_RSP_WARN_HIGHECC:
- meta->state = NVM_CHK_ST_OPEN;
- break;
- default:
- return -ret; /* other io error */
- }
- }
-
-scan:
- /*
- * chunk is open, we scan sequentially to update the write pointer.
- * We make the assumption that targets write data across all planes
- * before moving to the next page.
- */
- for (pg = 0; pg < geo->num_pg; pg++) {
- for (pl = 0; pl < geo->num_pln; pl++) {
- ppa.g.pg = pg;
- ppa.g.pl = pl;
-
- ret = nvm_bb_chunk_sense(dev, ppa);
- if (ret < 0) /* io error */
- return ret;
- else if (ret == 0) {
- meta->wp += geo->ws_min;
- } else if (ret > 0) {
- switch (ret) {
- case NVM_RSP_ERR_EMPTYPAGE:
- return 0;
- case NVM_RSP_ERR_FAILCRC:
- case NVM_RSP_ERR_FAILECC:
- case NVM_RSP_WARN_HIGHECC:
- meta->wp += geo->ws_min;
- break;
- default:
- return -ret; /* other io error */
- }
- }
- }
- }
-
- return 0;
-}
-
-/*
- * folds a bad block list from its plane representation to its
- * chunk representation.
- *
- * If any of the planes status are bad or grown bad, the chunk is marked
- * offline. If not bad, the first plane state acts as the chunk state.
- */
-static int nvm_bb_to_chunk(struct nvm_dev *dev, struct ppa_addr ppa,
- u8 *blks, int nr_blks, struct nvm_chk_meta *meta)
-{
- struct nvm_geo *geo = &dev->geo;
- int ret, blk, pl, offset, blktype;
-
- for (blk = 0; blk < geo->num_chk; blk++) {
- offset = blk * geo->pln_mode;
- blktype = blks[offset];
-
- for (pl = 0; pl < geo->pln_mode; pl++) {
- if (blks[offset + pl] &
- (NVM_BLK_T_BAD|NVM_BLK_T_GRWN_BAD)) {
- blktype = blks[offset + pl];
- break;
- }
- }
-
- ppa.g.blk = blk;
-
- meta->wp = 0;
- meta->type = NVM_CHK_TP_W_SEQ;
- meta->wi = 0;
- meta->slba = generic_to_dev_addr(dev, ppa).ppa;
- meta->cnlb = dev->geo.clba;
-
- if (blktype == NVM_BLK_T_FREE) {
- ret = nvm_bb_chunk_scan(dev, ppa, meta);
- if (ret)
- return ret;
- } else {
- meta->state = NVM_CHK_ST_OFFLINE;
- }
-
- meta++;
- }
-
- return 0;
-}
-
-static int nvm_get_bb_meta(struct nvm_dev *dev, sector_t slba,
- int nchks, struct nvm_chk_meta *meta)
-{
- struct nvm_geo *geo = &dev->geo;
- struct ppa_addr ppa;
- u8 *blks;
- int ch, lun, nr_blks;
- int ret = 0;
-
- ppa.ppa = slba;
- ppa = dev_to_generic_addr(dev, ppa);
-
- if (ppa.g.blk != 0)
- return -EINVAL;
-
- if ((nchks % geo->num_chk) != 0)
- return -EINVAL;
-
- nr_blks = geo->num_chk * geo->pln_mode;
-
- blks = kmalloc(nr_blks, GFP_KERNEL);
- if (!blks)
- return -ENOMEM;
-
- for (ch = ppa.g.ch; ch < geo->num_ch; ch++) {
- for (lun = ppa.g.lun; lun < geo->num_lun; lun++) {
- struct ppa_addr ppa_gen, ppa_dev;
-
- if (!nchks)
- goto done;
-
- ppa_gen.ppa = 0;
- ppa_gen.g.ch = ch;
- ppa_gen.g.lun = lun;
- ppa_dev = generic_to_dev_addr(dev, ppa_gen);
-
- ret = dev->ops->get_bb_tbl(dev, ppa_dev, blks);
- if (ret)
- goto done;
-
- ret = nvm_bb_to_chunk(dev, ppa_gen, blks, nr_blks,
- meta);
- if (ret)
- goto done;
-
- meta += geo->num_chk;
- nchks -= geo->num_chk;
- }
- }
-done:
- kfree(blks);
- return ret;
-}
-
-int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa,
- int nchks, struct nvm_chk_meta *meta)
-{
- struct nvm_dev *dev = tgt_dev->parent;
-
- nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1);
-
- if (dev->geo.version == NVM_OCSSD_SPEC_12)
- return nvm_get_bb_meta(dev, (sector_t)ppa.ppa, nchks, meta);
-
- return dev->ops->get_chk_meta(dev, (sector_t)ppa.ppa, nchks, meta);
-}
-EXPORT_SYMBOL_GPL(nvm_get_chunk_meta);
-
-int nvm_set_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
- int nr_ppas, int type)
-{
- struct nvm_dev *dev = tgt_dev->parent;
- struct nvm_rq rqd;
- int ret;
-
- if (dev->geo.version == NVM_OCSSD_SPEC_20)
- return 0;
-
- if (nr_ppas > NVM_MAX_VLBA) {
- pr_err("unable to update all blocks atomically\n");
- return -EINVAL;
- }
-
- memset(&rqd, 0, sizeof(struct nvm_rq));
-
- nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas);
- nvm_rq_tgt_to_dev(tgt_dev, &rqd);
-
- ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type);
- nvm_free_rqd_ppalist(tgt_dev, &rqd);
- if (ret)
- return -EINVAL;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(nvm_set_chunk_meta);
-
-static int nvm_core_init(struct nvm_dev *dev)
-{
- struct nvm_geo *geo = &dev->geo;
- int ret;
-
- dev->lun_map = kcalloc(BITS_TO_LONGS(geo->all_luns),
- sizeof(unsigned long), GFP_KERNEL);
- if (!dev->lun_map)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&dev->area_list);
- INIT_LIST_HEAD(&dev->targets);
- mutex_init(&dev->mlock);
- spin_lock_init(&dev->lock);
-
- ret = nvm_register_map(dev);
- if (ret)
- goto err_fmtype;
-
- return 0;
-err_fmtype:
- kfree(dev->lun_map);
- return ret;
-}
-
-static void nvm_free(struct kref *ref)
-{
- struct nvm_dev *dev = container_of(ref, struct nvm_dev, ref);
-
- if (dev->dma_pool)
- dev->ops->destroy_dma_pool(dev->dma_pool);
-
- if (dev->rmap)
- nvm_unregister_map(dev);
-
- kfree(dev->lun_map);
- kfree(dev);
-}
-
-static int nvm_init(struct nvm_dev *dev)
-{
- struct nvm_geo *geo = &dev->geo;
- int ret = -EINVAL;
-
- if (dev->ops->identity(dev)) {
- pr_err("device could not be identified\n");
- goto err;
- }
-
- pr_debug("ver:%u.%u nvm_vendor:%x\n", geo->major_ver_id,
- geo->minor_ver_id, geo->vmnt);
-
- ret = nvm_core_init(dev);
- if (ret) {
- pr_err("could not initialize core structures.\n");
- goto err;
- }
-
- pr_info("registered %s [%u/%u/%u/%u/%u]\n",
- dev->name, dev->geo.ws_min, dev->geo.ws_opt,
- dev->geo.num_chk, dev->geo.all_luns,
- dev->geo.num_ch);
- return 0;
-err:
- pr_err("failed to initialize nvm\n");
- return ret;
-}
-
-struct nvm_dev *nvm_alloc_dev(int node)
-{
- struct nvm_dev *dev;
-
- dev = kzalloc_node(sizeof(struct nvm_dev), GFP_KERNEL, node);
- if (dev)
- kref_init(&dev->ref);
-
- return dev;
-}
-EXPORT_SYMBOL(nvm_alloc_dev);
-
-int nvm_register(struct nvm_dev *dev)
-{
- int ret, exp_pool_size;
-
- pr_warn_once("lightnvm support is deprecated and will be removed in Linux 5.15.\n");
-
- if (!dev->q || !dev->ops) {
- kref_put(&dev->ref, nvm_free);
- return -EINVAL;
- }
-
- ret = nvm_init(dev);
- if (ret) {
- kref_put(&dev->ref, nvm_free);
- return ret;
- }
-
- exp_pool_size = max_t(int, PAGE_SIZE,
- (NVM_MAX_VLBA * (sizeof(u64) + dev->geo.sos)));
- exp_pool_size = round_up(exp_pool_size, PAGE_SIZE);
-
- dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist",
- exp_pool_size);
- if (!dev->dma_pool) {
- pr_err("could not create dma pool\n");
- kref_put(&dev->ref, nvm_free);
- return -ENOMEM;
- }
-
- /* register device with a supported media manager */
- down_write(&nvm_lock);
- list_add(&dev->devices, &nvm_devices);
- up_write(&nvm_lock);
-
- return 0;
-}
-EXPORT_SYMBOL(nvm_register);
-
-void nvm_unregister(struct nvm_dev *dev)
-{
- struct nvm_target *t, *tmp;
-
- mutex_lock(&dev->mlock);
- list_for_each_entry_safe(t, tmp, &dev->targets, list) {
- if (t->dev->parent != dev)
- continue;
- __nvm_remove_target(t, false);
- kref_put(&dev->ref, nvm_free);
- }
- mutex_unlock(&dev->mlock);
-
- down_write(&nvm_lock);
- list_del(&dev->devices);
- up_write(&nvm_lock);
-
- kref_put(&dev->ref, nvm_free);
-}
-EXPORT_SYMBOL(nvm_unregister);
-
-static int __nvm_configure_create(struct nvm_ioctl_create *create)
-{
- struct nvm_dev *dev;
- int ret;
-
- down_write(&nvm_lock);
- dev = nvm_find_nvm_dev(create->dev);
- up_write(&nvm_lock);
-
- if (!dev) {
- pr_err("device not found\n");
- return -EINVAL;
- }
-
- kref_get(&dev->ref);
- ret = nvm_create_tgt(dev, create);
- if (ret)
- kref_put(&dev->ref, nvm_free);
-
- return ret;
-}
-
-static long nvm_ioctl_info(struct file *file, void __user *arg)
-{
- struct nvm_ioctl_info *info;
- struct nvm_tgt_type *tt;
- int tgt_iter = 0;
-
- info = memdup_user(arg, sizeof(struct nvm_ioctl_info));
- if (IS_ERR(info))
- return PTR_ERR(info);
-
- info->version[0] = NVM_VERSION_MAJOR;
- info->version[1] = NVM_VERSION_MINOR;
- info->version[2] = NVM_VERSION_PATCH;
-
- down_write(&nvm_tgtt_lock);
- list_for_each_entry(tt, &nvm_tgt_types, list) {
- struct nvm_ioctl_info_tgt *tgt = &info->tgts[tgt_iter];
-
- tgt->version[0] = tt->version[0];
- tgt->version[1] = tt->version[1];
- tgt->version[2] = tt->version[2];
- strncpy(tgt->tgtname, tt->name, NVM_TTYPE_NAME_MAX);
-
- tgt_iter++;
- }
-
- info->tgtsize = tgt_iter;
- up_write(&nvm_tgtt_lock);
-
- if (copy_to_user(arg, info, sizeof(struct nvm_ioctl_info))) {
- kfree(info);
- return -EFAULT;
- }
-
- kfree(info);
- return 0;
-}
-
-static long nvm_ioctl_get_devices(struct file *file, void __user *arg)
-{
- struct nvm_ioctl_get_devices *devices;
- struct nvm_dev *dev;
- int i = 0;
-
- devices = kzalloc(sizeof(struct nvm_ioctl_get_devices), GFP_KERNEL);
- if (!devices)
- return -ENOMEM;
-
- down_write(&nvm_lock);
- list_for_each_entry(dev, &nvm_devices, devices) {
- struct nvm_ioctl_device_info *info = &devices->info[i];
-
- strlcpy(info->devname, dev->name, sizeof(info->devname));
-
- /* kept for compatibility */
- info->bmversion[0] = 1;
- info->bmversion[1] = 0;
- info->bmversion[2] = 0;
- strlcpy(info->bmname, "gennvm", sizeof(info->bmname));
- i++;
-
- if (i >= ARRAY_SIZE(devices->info)) {
- pr_err("max %zd devices can be reported.\n",
- ARRAY_SIZE(devices->info));
- break;
- }
- }
- up_write(&nvm_lock);
-
- devices->nr_devices = i;
-
- if (copy_to_user(arg, devices,
- sizeof(struct nvm_ioctl_get_devices))) {
- kfree(devices);
- return -EFAULT;
- }
-
- kfree(devices);
- return 0;
-}
-
-static long nvm_ioctl_dev_create(struct file *file, void __user *arg)
-{
- struct nvm_ioctl_create create;
-
- if (copy_from_user(&create, arg, sizeof(struct nvm_ioctl_create)))
- return -EFAULT;
-
- if (create.conf.type == NVM_CONFIG_TYPE_EXTENDED &&
- create.conf.e.rsv != 0) {
- pr_err("reserved config field in use\n");
- return -EINVAL;
- }
-
- create.dev[DISK_NAME_LEN - 1] = '\0';
- create.tgttype[NVM_TTYPE_NAME_MAX - 1] = '\0';
- create.tgtname[DISK_NAME_LEN - 1] = '\0';
-
- if (create.flags != 0) {
- __u32 flags = create.flags;
-
- /* Check for valid flags */
- if (flags & NVM_TARGET_FACTORY)
- flags &= ~NVM_TARGET_FACTORY;
-
- if (flags) {
- pr_err("flag not supported\n");
- return -EINVAL;
- }
- }
-
- return __nvm_configure_create(&create);
-}
-
-static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
-{
- struct nvm_ioctl_remove remove;
-
- if (copy_from_user(&remove, arg, sizeof(struct nvm_ioctl_remove)))
- return -EFAULT;
-
- remove.tgtname[DISK_NAME_LEN - 1] = '\0';
-
- if (remove.flags != 0) {
- pr_err("no flags supported\n");
- return -EINVAL;
- }
-
- return nvm_remove_tgt(&remove);
-}
-
-/* kept for compatibility reasons */
-static long nvm_ioctl_dev_init(struct file *file, void __user *arg)
-{
- struct nvm_ioctl_dev_init init;
-
- if (copy_from_user(&init, arg, sizeof(struct nvm_ioctl_dev_init)))
- return -EFAULT;
-
- if (init.flags != 0) {
- pr_err("no flags supported\n");
- return -EINVAL;
- }
-
- return 0;
-}
-
-/* Kept for compatibility reasons */
-static long nvm_ioctl_dev_factory(struct file *file, void __user *arg)
-{
- struct nvm_ioctl_dev_factory fact;
-
- if (copy_from_user(&fact, arg, sizeof(struct nvm_ioctl_dev_factory)))
- return -EFAULT;
-
- fact.dev[DISK_NAME_LEN - 1] = '\0';
-
- if (fact.flags & ~(NVM_FACTORY_NR_BITS - 1))
- return -EINVAL;
-
- return 0;
-}
-
-static long nvm_ctl_ioctl(struct file *file, uint cmd, unsigned long arg)
-{
- void __user *argp = (void __user *)arg;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- switch (cmd) {
- case NVM_INFO:
- return nvm_ioctl_info(file, argp);
- case NVM_GET_DEVICES:
- return nvm_ioctl_get_devices(file, argp);
- case NVM_DEV_CREATE:
- return nvm_ioctl_dev_create(file, argp);
- case NVM_DEV_REMOVE:
- return nvm_ioctl_dev_remove(file, argp);
- case NVM_DEV_INIT:
- return nvm_ioctl_dev_init(file, argp);
- case NVM_DEV_FACTORY:
- return nvm_ioctl_dev_factory(file, argp);
- }
- return 0;
-}
-
-static const struct file_operations _ctl_fops = {
- .open = nonseekable_open,
- .unlocked_ioctl = nvm_ctl_ioctl,
- .owner = THIS_MODULE,
- .llseek = noop_llseek,
-};
-
-static struct miscdevice _nvm_misc = {
- .minor = MISC_DYNAMIC_MINOR,
- .name = "lightnvm",
- .nodename = "lightnvm/control",
- .fops = &_ctl_fops,
-};
-builtin_misc_device(_nvm_misc);
diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c
deleted file mode 100644
index f185f1a00008..000000000000
--- a/drivers/lightnvm/pblk-cache.c
+++ /dev/null
@@ -1,137 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2016 CNEX Labs
- * Initial release: Javier Gonzalez <javier@cnexlabs.com>
- * Matias Bjorling <matias@cnexlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * pblk-cache.c - pblk's write cache
- */
-
-#include "pblk.h"
-
-void pblk_write_to_cache(struct pblk *pblk, struct bio *bio,
- unsigned long flags)
-{
- struct pblk_w_ctx w_ctx;
- sector_t lba = pblk_get_lba(bio);
- unsigned long start_time;
- unsigned int bpos, pos;
- int nr_entries = pblk_get_secs(bio);
- int i, ret;
-
- start_time = bio_start_io_acct(bio);
-
- /* Update the write buffer head (mem) with the entries that we can
- * write. The write in itself cannot fail, so there is no need to
- * rollback from here on.
- */
-retry:
- ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos);
- switch (ret) {
- case NVM_IO_REQUEUE:
- io_schedule();
- goto retry;
- case NVM_IO_ERR:
- pblk_pipeline_stop(pblk);
- bio_io_error(bio);
- goto out;
- }
-
- pblk_ppa_set_empty(&w_ctx.ppa);
- w_ctx.flags = flags;
- if (bio->bi_opf & REQ_PREFLUSH) {
- w_ctx.flags |= PBLK_FLUSH_ENTRY;
- pblk_write_kick(pblk);
- }
-
- if (unlikely(!bio_has_data(bio)))
- goto out;
-
- for (i = 0; i < nr_entries; i++) {
- void *data = bio_data(bio);
-
- w_ctx.lba = lba + i;
-
- pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + i);
- pblk_rb_write_entry_user(&pblk->rwb, data, w_ctx, pos);
-
- bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
- }
-
- atomic64_add(nr_entries, &pblk->user_wa);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_add(nr_entries, &pblk->inflight_writes);
- atomic_long_add(nr_entries, &pblk->req_writes);
-#endif
-
- pblk_rl_inserted(&pblk->rl, nr_entries);
-
-out:
- bio_end_io_acct(bio, start_time);
- pblk_write_should_kick(pblk);
-
- if (ret == NVM_IO_DONE)
- bio_endio(bio);
-}
-
-/*
- * On GC the incoming lbas are not necessarily sequential. Also, some of the
- * lbas might not be valid entries, which are marked as empty by the GC thread
- */
-int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
-{
- struct pblk_w_ctx w_ctx;
- unsigned int bpos, pos;
- void *data = gc_rq->data;
- int i, valid_entries;
-
- /* Update the write buffer head (mem) with the entries that we can
- * write. The write in itself cannot fail, so there is no need to
- * rollback from here on.
- */
-retry:
- if (!pblk_rb_may_write_gc(&pblk->rwb, gc_rq->secs_to_gc, &bpos)) {
- io_schedule();
- goto retry;
- }
-
- w_ctx.flags = PBLK_IOTYPE_GC;
- pblk_ppa_set_empty(&w_ctx.ppa);
-
- for (i = 0, valid_entries = 0; i < gc_rq->nr_secs; i++) {
- if (gc_rq->lba_list[i] == ADDR_EMPTY)
- continue;
-
- w_ctx.lba = gc_rq->lba_list[i];
-
- pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + valid_entries);
- pblk_rb_write_entry_gc(&pblk->rwb, data, w_ctx, gc_rq->line,
- gc_rq->paddr_list[i], pos);
-
- data += PBLK_EXPOSED_PAGE_SIZE;
- valid_entries++;
- }
-
- WARN_ONCE(gc_rq->secs_to_gc != valid_entries,
- "pblk: inconsistent GC write\n");
-
- atomic64_add(valid_entries, &pblk->gc_wa);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_add(valid_entries, &pblk->inflight_writes);
- atomic_long_add(valid_entries, &pblk->recov_gc_writes);
-#endif
-
- pblk_write_should_kick(pblk);
- return NVM_IO_OK;
-}
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
deleted file mode 100644
index 33d39d3dd343..000000000000
--- a/drivers/lightnvm/pblk-core.c
+++ /dev/null
@@ -1,2151 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2016 CNEX Labs
- * Initial release: Javier Gonzalez <javier@cnexlabs.com>
- * Matias Bjorling <matias@cnexlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * pblk-core.c - pblk's core functionality
- *
- */
-
-#define CREATE_TRACE_POINTS
-
-#include "pblk.h"
-#include "pblk-trace.h"
-
-static void pblk_line_mark_bb(struct work_struct *work)
-{
- struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
- ws);
- struct pblk *pblk = line_ws->pblk;
- struct nvm_tgt_dev *dev = pblk->dev;
- struct ppa_addr *ppa = line_ws->priv;
- int ret;
-
- ret = nvm_set_chunk_meta(dev, ppa, 1, NVM_BLK_T_GRWN_BAD);
- if (ret) {
- struct pblk_line *line;
- int pos;
-
- line = pblk_ppa_to_line(pblk, *ppa);
- pos = pblk_ppa_to_pos(&dev->geo, *ppa);
-
- pblk_err(pblk, "failed to mark bb, line:%d, pos:%d\n",
- line->id, pos);
- }
-
- kfree(ppa);
- mempool_free(line_ws, &pblk->gen_ws_pool);
-}
-
-static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
- struct ppa_addr ppa_addr)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct ppa_addr *ppa;
- int pos = pblk_ppa_to_pos(geo, ppa_addr);
-
- pblk_debug(pblk, "erase failed: line:%d, pos:%d\n", line->id, pos);
- atomic_long_inc(&pblk->erase_failed);
-
- atomic_dec(&line->blk_in_line);
- if (test_and_set_bit(pos, line->blk_bitmap))
- pblk_err(pblk, "attempted to erase bb: line:%d, pos:%d\n",
- line->id, pos);
-
- /* Not necessary to mark bad blocks on 2.0 spec. */
- if (geo->version == NVM_OCSSD_SPEC_20)
- return;
-
- ppa = kmalloc(sizeof(struct ppa_addr), GFP_ATOMIC);
- if (!ppa)
- return;
-
- *ppa = ppa_addr;
- pblk_gen_run_ws(pblk, NULL, ppa, pblk_line_mark_bb,
- GFP_ATOMIC, pblk->bb_wq);
-}
-
-static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct nvm_chk_meta *chunk;
- struct pblk_line *line;
- int pos;
-
- line = pblk_ppa_to_line(pblk, rqd->ppa_addr);
- pos = pblk_ppa_to_pos(geo, rqd->ppa_addr);
- chunk = &line->chks[pos];
-
- atomic_dec(&line->left_seblks);
-
- if (rqd->error) {
- trace_pblk_chunk_reset(pblk_disk_name(pblk),
- &rqd->ppa_addr, PBLK_CHUNK_RESET_FAILED);
-
- chunk->state = NVM_CHK_ST_OFFLINE;
- pblk_mark_bb(pblk, line, rqd->ppa_addr);
- } else {
- trace_pblk_chunk_reset(pblk_disk_name(pblk),
- &rqd->ppa_addr, PBLK_CHUNK_RESET_DONE);
-
- chunk->state = NVM_CHK_ST_FREE;
- }
-
- trace_pblk_chunk_state(pblk_disk_name(pblk), &rqd->ppa_addr,
- chunk->state);
-
- atomic_dec(&pblk->inflight_io);
-}
-
-/* Erase completion assumes that only one block is erased at the time */
-static void pblk_end_io_erase(struct nvm_rq *rqd)
-{
- struct pblk *pblk = rqd->private;
-
- __pblk_end_io_erase(pblk, rqd);
- mempool_free(rqd, &pblk->e_rq_pool);
-}
-
-/*
- * Get information for all chunks from the device.
- *
- * The caller is responsible for freeing (vmalloc) the returned structure
- */
-struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct nvm_chk_meta *meta;
- struct ppa_addr ppa;
- unsigned long len;
- int ret;
-
- ppa.ppa = 0;
-
- len = geo->all_chunks * sizeof(*meta);
- meta = vzalloc(len);
- if (!meta)
- return ERR_PTR(-ENOMEM);
-
- ret = nvm_get_chunk_meta(dev, ppa, geo->all_chunks, meta);
- if (ret) {
- vfree(meta);
- return ERR_PTR(-EIO);
- }
-
- return meta;
-}
-
-struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk,
- struct nvm_chk_meta *meta,
- struct ppa_addr ppa)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- int ch_off = ppa.m.grp * geo->num_chk * geo->num_lun;
- int lun_off = ppa.m.pu * geo->num_chk;
- int chk_off = ppa.m.chk;
-
- return meta + ch_off + lun_off + chk_off;
-}
-
-void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
- u64 paddr)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct list_head *move_list = NULL;
-
- /* Lines being reclaimed (GC'ed) cannot be invalidated. Before the L2P
- * table is modified with reclaimed sectors, a check is done to endure
- * that newer updates are not overwritten.
- */
- spin_lock(&line->lock);
- WARN_ON(line->state == PBLK_LINESTATE_FREE);
-
- if (test_and_set_bit(paddr, line->invalid_bitmap)) {
- WARN_ONCE(1, "pblk: double invalidate\n");
- spin_unlock(&line->lock);
- return;
- }
- le32_add_cpu(line->vsc, -1);
-
- if (line->state == PBLK_LINESTATE_CLOSED)
- move_list = pblk_line_gc_list(pblk, line);
- spin_unlock(&line->lock);
-
- if (move_list) {
- spin_lock(&l_mg->gc_lock);
- spin_lock(&line->lock);
- /* Prevent moving a line that has just been chosen for GC */
- if (line->state == PBLK_LINESTATE_GC) {
- spin_unlock(&line->lock);
- spin_unlock(&l_mg->gc_lock);
- return;
- }
- spin_unlock(&line->lock);
-
- list_move_tail(&line->list, move_list);
- spin_unlock(&l_mg->gc_lock);
- }
-}
-
-void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa)
-{
- struct pblk_line *line;
- u64 paddr;
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- /* Callers must ensure that the ppa points to a device address */
- BUG_ON(pblk_addr_in_cache(ppa));
- BUG_ON(pblk_ppa_empty(ppa));
-#endif
-
- line = pblk_ppa_to_line(pblk, ppa);
- paddr = pblk_dev_ppa_to_line_addr(pblk, ppa);
-
- __pblk_map_invalidate(pblk, line, paddr);
-}
-
-static void pblk_invalidate_range(struct pblk *pblk, sector_t slba,
- unsigned int nr_secs)
-{
- sector_t lba;
-
- spin_lock(&pblk->trans_lock);
- for (lba = slba; lba < slba + nr_secs; lba++) {
- struct ppa_addr ppa;
-
- ppa = pblk_trans_map_get(pblk, lba);
-
- if (!pblk_addr_in_cache(ppa) && !pblk_ppa_empty(ppa))
- pblk_map_invalidate(pblk, ppa);
-
- pblk_ppa_set_empty(&ppa);
- pblk_trans_map_set(pblk, lba, ppa);
- }
- spin_unlock(&pblk->trans_lock);
-}
-
-int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
-
- rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
- &rqd->dma_meta_list);
- if (!rqd->meta_list)
- return -ENOMEM;
-
- if (rqd->nr_ppas == 1)
- return 0;
-
- rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size(pblk);
- rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size(pblk);
-
- return 0;
-}
-
-void pblk_free_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
-
- if (rqd->meta_list)
- nvm_dev_dma_free(dev->parent, rqd->meta_list,
- rqd->dma_meta_list);
-}
-
-/* Caller must guarantee that the request is a valid type */
-struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type)
-{
- mempool_t *pool;
- struct nvm_rq *rqd;
- int rq_size;
-
- switch (type) {
- case PBLK_WRITE:
- case PBLK_WRITE_INT:
- pool = &pblk->w_rq_pool;
- rq_size = pblk_w_rq_size;
- break;
- case PBLK_READ:
- pool = &pblk->r_rq_pool;
- rq_size = pblk_g_rq_size;
- break;
- default:
- pool = &pblk->e_rq_pool;
- rq_size = pblk_g_rq_size;
- }
-
- rqd = mempool_alloc(pool, GFP_KERNEL);
- memset(rqd, 0, rq_size);
-
- return rqd;
-}
-
-/* Typically used on completion path. Cannot guarantee request consistency */
-void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type)
-{
- mempool_t *pool;
-
- switch (type) {
- case PBLK_WRITE:
- kfree(((struct pblk_c_ctx *)nvm_rq_to_pdu(rqd))->lun_bitmap);
- fallthrough;
- case PBLK_WRITE_INT:
- pool = &pblk->w_rq_pool;
- break;
- case PBLK_READ:
- pool = &pblk->r_rq_pool;
- break;
- case PBLK_ERASE:
- pool = &pblk->e_rq_pool;
- break;
- default:
- pblk_err(pblk, "trying to free unknown rqd type\n");
- return;
- }
-
- pblk_free_rqd_meta(pblk, rqd);
- mempool_free(rqd, pool);
-}
-
-void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
- int nr_pages)
-{
- struct bio_vec *bv;
- struct page *page;
- int i, e, nbv = 0;
-
- for (i = 0; i < bio->bi_vcnt; i++) {
- bv = &bio->bi_io_vec[i];
- page = bv->bv_page;
- for (e = 0; e < bv->bv_len; e += PBLK_EXPOSED_PAGE_SIZE, nbv++)
- if (nbv >= off)
- mempool_free(page++, &pblk->page_bio_pool);
- }
-}
-
-int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
- int nr_pages)
-{
- struct request_queue *q = pblk->dev->q;
- struct page *page;
- int i, ret;
-
- for (i = 0; i < nr_pages; i++) {
- page = mempool_alloc(&pblk->page_bio_pool, flags);
-
- ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0);
- if (ret != PBLK_EXPOSED_PAGE_SIZE) {
- pblk_err(pblk, "could not add page to bio\n");
- mempool_free(page, &pblk->page_bio_pool);
- goto err;
- }
- }
-
- return 0;
-err:
- pblk_bio_free_pages(pblk, bio, (bio->bi_vcnt - i), i);
- return -1;
-}
-
-void pblk_write_kick(struct pblk *pblk)
-{
- wake_up_process(pblk->writer_ts);
- mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(1000));
-}
-
-void pblk_write_timer_fn(struct timer_list *t)
-{
- struct pblk *pblk = from_timer(pblk, t, wtimer);
-
- /* kick the write thread every tick to flush outstanding data */
- pblk_write_kick(pblk);
-}
-
-void pblk_write_should_kick(struct pblk *pblk)
-{
- unsigned int secs_avail = pblk_rb_read_count(&pblk->rwb);
-
- if (secs_avail >= pblk->min_write_pgs_data)
- pblk_write_kick(pblk);
-}
-
-static void pblk_wait_for_meta(struct pblk *pblk)
-{
- do {
- if (!atomic_read(&pblk->inflight_io))
- break;
-
- schedule();
- } while (1);
-}
-
-static void pblk_flush_writer(struct pblk *pblk)
-{
- pblk_rb_flush(&pblk->rwb);
- do {
- if (!pblk_rb_sync_count(&pblk->rwb))
- break;
-
- pblk_write_kick(pblk);
- schedule();
- } while (1);
-}
-
-struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct list_head *move_list = NULL;
- int packed_meta = (le32_to_cpu(*line->vsc) / pblk->min_write_pgs_data)
- * (pblk->min_write_pgs - pblk->min_write_pgs_data);
- int vsc = le32_to_cpu(*line->vsc) + packed_meta;
-
- lockdep_assert_held(&line->lock);
-
- if (line->w_err_gc->has_write_err) {
- if (line->gc_group != PBLK_LINEGC_WERR) {
- line->gc_group = PBLK_LINEGC_WERR;
- move_list = &l_mg->gc_werr_list;
- pblk_rl_werr_line_in(&pblk->rl);
- }
- } else if (!vsc) {
- if (line->gc_group != PBLK_LINEGC_FULL) {
- line->gc_group = PBLK_LINEGC_FULL;
- move_list = &l_mg->gc_full_list;
- }
- } else if (vsc < lm->high_thrs) {
- if (line->gc_group != PBLK_LINEGC_HIGH) {
- line->gc_group = PBLK_LINEGC_HIGH;
- move_list = &l_mg->gc_high_list;
- }
- } else if (vsc < lm->mid_thrs) {
- if (line->gc_group != PBLK_LINEGC_MID) {
- line->gc_group = PBLK_LINEGC_MID;
- move_list = &l_mg->gc_mid_list;
- }
- } else if (vsc < line->sec_in_line) {
- if (line->gc_group != PBLK_LINEGC_LOW) {
- line->gc_group = PBLK_LINEGC_LOW;
- move_list = &l_mg->gc_low_list;
- }
- } else if (vsc == line->sec_in_line) {
- if (line->gc_group != PBLK_LINEGC_EMPTY) {
- line->gc_group = PBLK_LINEGC_EMPTY;
- move_list = &l_mg->gc_empty_list;
- }
- } else {
- line->state = PBLK_LINESTATE_CORRUPT;
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
-
- line->gc_group = PBLK_LINEGC_NONE;
- move_list = &l_mg->corrupt_list;
- pblk_err(pblk, "corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n",
- line->id, vsc,
- line->sec_in_line,
- lm->high_thrs, lm->mid_thrs);
- }
-
- return move_list;
-}
-
-void pblk_discard(struct pblk *pblk, struct bio *bio)
-{
- sector_t slba = pblk_get_lba(bio);
- sector_t nr_secs = pblk_get_secs(bio);
-
- pblk_invalidate_range(pblk, slba, nr_secs);
-}
-
-void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd)
-{
- atomic_long_inc(&pblk->write_failed);
-#ifdef CONFIG_NVM_PBLK_DEBUG
- pblk_print_failed_rqd(pblk, rqd, rqd->error);
-#endif
-}
-
-void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd)
-{
- /* Empty page read is not necessarily an error (e.g., L2P recovery) */
- if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) {
- atomic_long_inc(&pblk->read_empty);
- return;
- }
-
- switch (rqd->error) {
- case NVM_RSP_WARN_HIGHECC:
- atomic_long_inc(&pblk->read_high_ecc);
- break;
- case NVM_RSP_ERR_FAILECC:
- case NVM_RSP_ERR_FAILCRC:
- atomic_long_inc(&pblk->read_failed);
- break;
- default:
- pblk_err(pblk, "unknown read error:%d\n", rqd->error);
- }
-#ifdef CONFIG_NVM_PBLK_DEBUG
- pblk_print_failed_rqd(pblk, rqd, rqd->error);
-#endif
-}
-
-void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write)
-{
- pblk->sec_per_write = sec_per_write;
-}
-
-int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd, void *buf)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
-
- atomic_inc(&pblk->inflight_io);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- if (pblk_check_io(pblk, rqd))
- return NVM_IO_ERR;
-#endif
-
- return nvm_submit_io(dev, rqd, buf);
-}
-
-void pblk_check_chunk_state_update(struct pblk *pblk, struct nvm_rq *rqd)
-{
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
-
- int i;
-
- for (i = 0; i < rqd->nr_ppas; i++) {
- struct ppa_addr *ppa = &ppa_list[i];
- struct nvm_chk_meta *chunk = pblk_dev_ppa_to_chunk(pblk, *ppa);
- u64 caddr = pblk_dev_ppa_to_chunk_addr(pblk, *ppa);
-
- if (caddr == 0)
- trace_pblk_chunk_state(pblk_disk_name(pblk),
- ppa, NVM_CHK_ST_OPEN);
- else if (caddr == (chunk->cnlb - 1))
- trace_pblk_chunk_state(pblk_disk_name(pblk),
- ppa, NVM_CHK_ST_CLOSED);
- }
-}
-
-int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd, void *buf)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- int ret;
-
- atomic_inc(&pblk->inflight_io);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- if (pblk_check_io(pblk, rqd))
- return NVM_IO_ERR;
-#endif
-
- ret = nvm_submit_io_sync(dev, rqd, buf);
-
- if (trace_pblk_chunk_state_enabled() && !ret &&
- rqd->opcode == NVM_OP_PWRITE)
- pblk_check_chunk_state_update(pblk, rqd);
-
- return ret;
-}
-
-static int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd,
- void *buf)
-{
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
- int ret;
-
- pblk_down_chunk(pblk, ppa_list[0]);
- ret = pblk_submit_io_sync(pblk, rqd, buf);
- pblk_up_chunk(pblk, ppa_list[0]);
-
- return ret;
-}
-
-int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
- unsigned long secs_to_flush, bool skip_meta)
-{
- int max = pblk->sec_per_write;
- int min = pblk->min_write_pgs;
- int secs_to_sync = 0;
-
- if (skip_meta && pblk->min_write_pgs_data != pblk->min_write_pgs)
- min = max = pblk->min_write_pgs_data;
-
- if (secs_avail >= max)
- secs_to_sync = max;
- else if (secs_avail >= min)
- secs_to_sync = min * (secs_avail / min);
- else if (secs_to_flush)
- secs_to_sync = min;
-
- return secs_to_sync;
-}
-
-void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
-{
- u64 addr;
- int i;
-
- spin_lock(&line->lock);
- addr = find_next_zero_bit(line->map_bitmap,
- pblk->lm.sec_per_line, line->cur_sec);
- line->cur_sec = addr - nr_secs;
-
- for (i = 0; i < nr_secs; i++, line->cur_sec--)
- WARN_ON(!test_and_clear_bit(line->cur_sec, line->map_bitmap));
- spin_unlock(&line->lock);
-}
-
-u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
-{
- u64 addr;
- int i;
-
- lockdep_assert_held(&line->lock);
-
- /* logic error: ppa out-of-bounds. Prevent generating bad address */
- if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) {
- WARN(1, "pblk: page allocation out of bounds\n");
- nr_secs = pblk->lm.sec_per_line - line->cur_sec;
- }
-
- line->cur_sec = addr = find_next_zero_bit(line->map_bitmap,
- pblk->lm.sec_per_line, line->cur_sec);
- for (i = 0; i < nr_secs; i++, line->cur_sec++)
- WARN_ON(test_and_set_bit(line->cur_sec, line->map_bitmap));
-
- return addr;
-}
-
-u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
-{
- u64 addr;
-
- /* Lock needed in case a write fails and a recovery needs to remap
- * failed write buffer entries
- */
- spin_lock(&line->lock);
- addr = __pblk_alloc_page(pblk, line, nr_secs);
- line->left_msecs -= nr_secs;
- WARN(line->left_msecs < 0, "pblk: page allocation out of bounds\n");
- spin_unlock(&line->lock);
-
- return addr;
-}
-
-u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line)
-{
- u64 paddr;
-
- spin_lock(&line->lock);
- paddr = find_next_zero_bit(line->map_bitmap,
- pblk->lm.sec_per_line, line->cur_sec);
- spin_unlock(&line->lock);
-
- return paddr;
-}
-
-u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- int bit;
-
- /* This usually only happens on bad lines */
- bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
- if (bit >= lm->blk_per_line)
- return -1;
-
- return bit * geo->ws_opt;
-}
-
-int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- struct ppa_addr *ppa_list;
- struct nvm_rq rqd;
- u64 paddr = pblk_line_smeta_start(pblk, line);
- int i, ret;
-
- memset(&rqd, 0, sizeof(struct nvm_rq));
-
- ret = pblk_alloc_rqd_meta(pblk, &rqd);
- if (ret)
- return ret;
-
- rqd.opcode = NVM_OP_PREAD;
- rqd.nr_ppas = lm->smeta_sec;
- rqd.is_seq = 1;
- ppa_list = nvm_rq_to_ppa_list(&rqd);
-
- for (i = 0; i < lm->smeta_sec; i++, paddr++)
- ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
-
- ret = pblk_submit_io_sync(pblk, &rqd, line->smeta);
- if (ret) {
- pblk_err(pblk, "smeta I/O submission failed: %d\n", ret);
- goto clear_rqd;
- }
-
- atomic_dec(&pblk->inflight_io);
-
- if (rqd.error && rqd.error != NVM_RSP_WARN_HIGHECC) {
- pblk_log_read_err(pblk, &rqd);
- ret = -EIO;
- }
-
-clear_rqd:
- pblk_free_rqd_meta(pblk, &rqd);
- return ret;
-}
-
-static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
- u64 paddr)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- struct ppa_addr *ppa_list;
- struct nvm_rq rqd;
- __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
- __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
- int i, ret;
-
- memset(&rqd, 0, sizeof(struct nvm_rq));
-
- ret = pblk_alloc_rqd_meta(pblk, &rqd);
- if (ret)
- return ret;
-
- rqd.opcode = NVM_OP_PWRITE;
- rqd.nr_ppas = lm->smeta_sec;
- rqd.is_seq = 1;
- ppa_list = nvm_rq_to_ppa_list(&rqd);
-
- for (i = 0; i < lm->smeta_sec; i++, paddr++) {
- struct pblk_sec_meta *meta = pblk_get_meta(pblk,
- rqd.meta_list, i);
-
- ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
- meta->lba = lba_list[paddr] = addr_empty;
- }
-
- ret = pblk_submit_io_sync_sem(pblk, &rqd, line->smeta);
- if (ret) {
- pblk_err(pblk, "smeta I/O submission failed: %d\n", ret);
- goto clear_rqd;
- }
-
- atomic_dec(&pblk->inflight_io);
-
- if (rqd.error) {
- pblk_log_write_err(pblk, &rqd);
- ret = -EIO;
- }
-
-clear_rqd:
- pblk_free_rqd_meta(pblk, &rqd);
- return ret;
-}
-
-int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
- void *emeta_buf)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- void *ppa_list_buf, *meta_list;
- struct ppa_addr *ppa_list;
- struct nvm_rq rqd;
- u64 paddr = line->emeta_ssec;
- dma_addr_t dma_ppa_list, dma_meta_list;
- int min = pblk->min_write_pgs;
- int left_ppas = lm->emeta_sec[0];
- int line_id = line->id;
- int rq_ppas, rq_len;
- int i, j;
- int ret;
-
- meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
- &dma_meta_list);
- if (!meta_list)
- return -ENOMEM;
-
- ppa_list_buf = meta_list + pblk_dma_meta_size(pblk);
- dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk);
-
-next_rq:
- memset(&rqd, 0, sizeof(struct nvm_rq));
-
- rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
- rq_len = rq_ppas * geo->csecs;
-
- rqd.meta_list = meta_list;
- rqd.ppa_list = ppa_list_buf;
- rqd.dma_meta_list = dma_meta_list;
- rqd.dma_ppa_list = dma_ppa_list;
- rqd.opcode = NVM_OP_PREAD;
- rqd.nr_ppas = rq_ppas;
- ppa_list = nvm_rq_to_ppa_list(&rqd);
-
- for (i = 0; i < rqd.nr_ppas; ) {
- struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, line_id);
- int pos = pblk_ppa_to_pos(geo, ppa);
-
- if (pblk_io_aligned(pblk, rq_ppas))
- rqd.is_seq = 1;
-
- while (test_bit(pos, line->blk_bitmap)) {
- paddr += min;
- if (pblk_boundary_paddr_checks(pblk, paddr)) {
- ret = -EINTR;
- goto free_rqd_dma;
- }
-
- ppa = addr_to_gen_ppa(pblk, paddr, line_id);
- pos = pblk_ppa_to_pos(geo, ppa);
- }
-
- if (pblk_boundary_paddr_checks(pblk, paddr + min)) {
- ret = -EINTR;
- goto free_rqd_dma;
- }
-
- for (j = 0; j < min; j++, i++, paddr++)
- ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line_id);
- }
-
- ret = pblk_submit_io_sync(pblk, &rqd, emeta_buf);
- if (ret) {
- pblk_err(pblk, "emeta I/O submission failed: %d\n", ret);
- goto free_rqd_dma;
- }
-
- atomic_dec(&pblk->inflight_io);
-
- if (rqd.error && rqd.error != NVM_RSP_WARN_HIGHECC) {
- pblk_log_read_err(pblk, &rqd);
- ret = -EIO;
- goto free_rqd_dma;
- }
-
- emeta_buf += rq_len;
- left_ppas -= rq_ppas;
- if (left_ppas)
- goto next_rq;
-
-free_rqd_dma:
- nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
- return ret;
-}
-
-static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
- struct ppa_addr ppa)
-{
- rqd->opcode = NVM_OP_ERASE;
- rqd->ppa_addr = ppa;
- rqd->nr_ppas = 1;
- rqd->is_seq = 1;
- rqd->bio = NULL;
-}
-
-static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
-{
- struct nvm_rq rqd = {NULL};
- int ret;
-
- trace_pblk_chunk_reset(pblk_disk_name(pblk), &ppa,
- PBLK_CHUNK_RESET_START);
-
- pblk_setup_e_rq(pblk, &rqd, ppa);
-
- /* The write thread schedules erases so that it minimizes disturbances
- * with writes. Thus, there is no need to take the LUN semaphore.
- */
- ret = pblk_submit_io_sync(pblk, &rqd, NULL);
- rqd.private = pblk;
- __pblk_end_io_erase(pblk, &rqd);
-
- return ret;
-}
-
-int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- struct ppa_addr ppa;
- int ret, bit = -1;
-
- /* Erase only good blocks, one at a time */
- do {
- spin_lock(&line->lock);
- bit = find_next_zero_bit(line->erase_bitmap, lm->blk_per_line,
- bit + 1);
- if (bit >= lm->blk_per_line) {
- spin_unlock(&line->lock);
- break;
- }
-
- ppa = pblk->luns[bit].bppa; /* set ch and lun */
- ppa.a.blk = line->id;
-
- atomic_dec(&line->left_eblks);
- WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
- spin_unlock(&line->lock);
-
- ret = pblk_blk_erase_sync(pblk, ppa);
- if (ret) {
- pblk_err(pblk, "failed to erase line %d\n", line->id);
- return ret;
- }
- } while (1);
-
- return 0;
-}
-
-static void pblk_line_setup_metadata(struct pblk_line *line,
- struct pblk_line_mgmt *l_mg,
- struct pblk_line_meta *lm)
-{
- int meta_line;
-
- lockdep_assert_held(&l_mg->free_lock);
-
-retry_meta:
- meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
- if (meta_line == PBLK_DATA_LINES) {
- spin_unlock(&l_mg->free_lock);
- io_schedule();
- spin_lock(&l_mg->free_lock);
- goto retry_meta;
- }
-
- set_bit(meta_line, &l_mg->meta_bitmap);
- line->meta_line = meta_line;
-
- line->smeta = l_mg->sline_meta[meta_line];
- line->emeta = l_mg->eline_meta[meta_line];
-
- memset(line->smeta, 0, lm->smeta_len);
- memset(line->emeta->buf, 0, lm->emeta_len[0]);
-
- line->emeta->mem = 0;
- atomic_set(&line->emeta->sync, 0);
-}
-
-/* For now lines are always assumed full lines. Thus, smeta former and current
- * lun bitmaps are omitted.
- */
-static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
- struct pblk_line *cur)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_emeta *emeta = line->emeta;
- struct line_emeta *emeta_buf = emeta->buf;
- struct line_smeta *smeta_buf = (struct line_smeta *)line->smeta;
- int nr_blk_line;
-
- /* After erasing the line, new bad blocks might appear and we risk
- * having an invalid line
- */
- nr_blk_line = lm->blk_per_line -
- bitmap_weight(line->blk_bitmap, lm->blk_per_line);
- if (nr_blk_line < lm->min_blk_line) {
- spin_lock(&l_mg->free_lock);
- spin_lock(&line->lock);
- line->state = PBLK_LINESTATE_BAD;
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
- spin_unlock(&line->lock);
-
- list_add_tail(&line->list, &l_mg->bad_list);
- spin_unlock(&l_mg->free_lock);
-
- pblk_debug(pblk, "line %d is bad\n", line->id);
-
- return 0;
- }
-
- /* Run-time metadata */
- line->lun_bitmap = ((void *)(smeta_buf)) + sizeof(struct line_smeta);
-
- /* Mark LUNs allocated in this line (all for now) */
- bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len);
-
- smeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC);
- export_guid(smeta_buf->header.uuid, &pblk->instance_uuid);
- smeta_buf->header.id = cpu_to_le32(line->id);
- smeta_buf->header.type = cpu_to_le16(line->type);
- smeta_buf->header.version_major = SMETA_VERSION_MAJOR;
- smeta_buf->header.version_minor = SMETA_VERSION_MINOR;
-
- /* Start metadata */
- smeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
- smeta_buf->window_wr_lun = cpu_to_le32(geo->all_luns);
-
- /* Fill metadata among lines */
- if (cur) {
- memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len);
- smeta_buf->prev_id = cpu_to_le32(cur->id);
- cur->emeta->buf->next_id = cpu_to_le32(line->id);
- } else {
- smeta_buf->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
- }
-
- /* All smeta must be set at this point */
- smeta_buf->header.crc = cpu_to_le32(
- pblk_calc_meta_header_crc(pblk, &smeta_buf->header));
- smeta_buf->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta_buf));
-
- /* End metadata */
- memcpy(&emeta_buf->header, &smeta_buf->header,
- sizeof(struct line_header));
-
- emeta_buf->header.version_major = EMETA_VERSION_MAJOR;
- emeta_buf->header.version_minor = EMETA_VERSION_MINOR;
- emeta_buf->header.crc = cpu_to_le32(
- pblk_calc_meta_header_crc(pblk, &emeta_buf->header));
-
- emeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
- emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line);
- emeta_buf->nr_valid_lbas = cpu_to_le64(0);
- emeta_buf->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
- emeta_buf->crc = cpu_to_le32(0);
- emeta_buf->prev_id = smeta_buf->prev_id;
-
- return 1;
-}
-
-static int pblk_line_alloc_bitmaps(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
-
- line->map_bitmap = mempool_alloc(l_mg->bitmap_pool, GFP_KERNEL);
- if (!line->map_bitmap)
- return -ENOMEM;
-
- memset(line->map_bitmap, 0, lm->sec_bitmap_len);
-
- /* will be initialized using bb info from map_bitmap */
- line->invalid_bitmap = mempool_alloc(l_mg->bitmap_pool, GFP_KERNEL);
- if (!line->invalid_bitmap) {
- mempool_free(line->map_bitmap, l_mg->bitmap_pool);
- line->map_bitmap = NULL;
- return -ENOMEM;
- }
-
- return 0;
-}
-
-/* For now lines are always assumed full lines. Thus, smeta former and current
- * lun bitmaps are omitted.
- */
-static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
- int init)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- u64 off;
- int bit = -1;
- int emeta_secs;
-
- line->sec_in_line = lm->sec_per_line;
-
- /* Capture bad block information on line mapping bitmaps */
- while ((bit = find_next_bit(line->blk_bitmap, lm->blk_per_line,
- bit + 1)) < lm->blk_per_line) {
- off = bit * geo->ws_opt;
- bitmap_shift_left(l_mg->bb_aux, l_mg->bb_template, off,
- lm->sec_per_line);
- bitmap_or(line->map_bitmap, line->map_bitmap, l_mg->bb_aux,
- lm->sec_per_line);
- line->sec_in_line -= geo->clba;
- }
-
- /* Mark smeta metadata sectors as bad sectors */
- bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
- off = bit * geo->ws_opt;
- bitmap_set(line->map_bitmap, off, lm->smeta_sec);
- line->sec_in_line -= lm->smeta_sec;
- line->cur_sec = off + lm->smeta_sec;
-
- if (init && pblk_line_smeta_write(pblk, line, off)) {
- pblk_debug(pblk, "line smeta I/O failed. Retry\n");
- return 0;
- }
-
- bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
-
- /* Mark emeta metadata sectors as bad sectors. We need to consider bad
- * blocks to make sure that there are enough sectors to store emeta
- */
- emeta_secs = lm->emeta_sec[0];
- off = lm->sec_per_line;
- while (emeta_secs) {
- off -= geo->ws_opt;
- if (!test_bit(off, line->invalid_bitmap)) {
- bitmap_set(line->invalid_bitmap, off, geo->ws_opt);
- emeta_secs -= geo->ws_opt;
- }
- }
-
- line->emeta_ssec = off;
- line->sec_in_line -= lm->emeta_sec[0];
- line->nr_valid_lbas = 0;
- line->left_msecs = line->sec_in_line;
- *line->vsc = cpu_to_le32(line->sec_in_line);
-
- if (lm->sec_per_line - line->sec_in_line !=
- bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) {
- spin_lock(&line->lock);
- line->state = PBLK_LINESTATE_BAD;
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
- spin_unlock(&line->lock);
-
- list_add_tail(&line->list, &l_mg->bad_list);
- pblk_err(pblk, "unexpected line %d is bad\n", line->id);
-
- return 0;
- }
-
- return 1;
-}
-
-static int pblk_prepare_new_line(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- int blk_to_erase = atomic_read(&line->blk_in_line);
- int i;
-
- for (i = 0; i < lm->blk_per_line; i++) {
- struct pblk_lun *rlun = &pblk->luns[i];
- int pos = pblk_ppa_to_pos(geo, rlun->bppa);
- int state = line->chks[pos].state;
-
- /* Free chunks should not be erased */
- if (state & NVM_CHK_ST_FREE) {
- set_bit(pblk_ppa_to_pos(geo, rlun->bppa),
- line->erase_bitmap);
- blk_to_erase--;
- }
- }
-
- return blk_to_erase;
-}
-
-static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- int blk_in_line = atomic_read(&line->blk_in_line);
- int blk_to_erase;
-
- /* Bad blocks do not need to be erased */
- bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line);
-
- spin_lock(&line->lock);
-
- /* If we have not written to this line, we need to mark up free chunks
- * as already erased
- */
- if (line->state == PBLK_LINESTATE_NEW) {
- blk_to_erase = pblk_prepare_new_line(pblk, line);
- line->state = PBLK_LINESTATE_FREE;
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
- } else {
- blk_to_erase = blk_in_line;
- }
-
- if (blk_in_line < lm->min_blk_line) {
- spin_unlock(&line->lock);
- return -EAGAIN;
- }
-
- if (line->state != PBLK_LINESTATE_FREE) {
- WARN(1, "pblk: corrupted line %d, state %d\n",
- line->id, line->state);
- spin_unlock(&line->lock);
- return -EINTR;
- }
-
- line->state = PBLK_LINESTATE_OPEN;
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
-
- atomic_set(&line->left_eblks, blk_to_erase);
- atomic_set(&line->left_seblks, blk_to_erase);
-
- line->meta_distance = lm->meta_distance;
- spin_unlock(&line->lock);
-
- kref_init(&line->ref);
- atomic_set(&line->sec_to_update, 0);
-
- return 0;
-}
-
-/* Line allocations in the recovery path are always single threaded */
-int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- int ret;
-
- spin_lock(&l_mg->free_lock);
- l_mg->data_line = line;
- list_del(&line->list);
-
- ret = pblk_line_prepare(pblk, line);
- if (ret) {
- list_add(&line->list, &l_mg->free_list);
- spin_unlock(&l_mg->free_lock);
- return ret;
- }
- spin_unlock(&l_mg->free_lock);
-
- ret = pblk_line_alloc_bitmaps(pblk, line);
- if (ret)
- goto fail;
-
- if (!pblk_line_init_bb(pblk, line, 0)) {
- ret = -EINTR;
- goto fail;
- }
-
- pblk_rl_free_lines_dec(&pblk->rl, line, true);
- return 0;
-
-fail:
- spin_lock(&l_mg->free_lock);
- list_add(&line->list, &l_mg->free_list);
- spin_unlock(&l_mg->free_lock);
-
- return ret;
-}
-
-void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
-
- mempool_free(line->map_bitmap, l_mg->bitmap_pool);
- line->map_bitmap = NULL;
- line->smeta = NULL;
- line->emeta = NULL;
-}
-
-static void pblk_line_reinit(struct pblk_line *line)
-{
- *line->vsc = cpu_to_le32(EMPTY_ENTRY);
-
- line->map_bitmap = NULL;
- line->invalid_bitmap = NULL;
- line->smeta = NULL;
- line->emeta = NULL;
-}
-
-void pblk_line_free(struct pblk_line *line)
-{
- struct pblk *pblk = line->pblk;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
-
- mempool_free(line->map_bitmap, l_mg->bitmap_pool);
- mempool_free(line->invalid_bitmap, l_mg->bitmap_pool);
-
- pblk_line_reinit(line);
-}
-
-struct pblk_line *pblk_line_get(struct pblk *pblk)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line *line;
- int ret, bit;
-
- lockdep_assert_held(&l_mg->free_lock);
-
-retry:
- if (list_empty(&l_mg->free_list)) {
- pblk_err(pblk, "no free lines\n");
- return NULL;
- }
-
- line = list_first_entry(&l_mg->free_list, struct pblk_line, list);
- list_del(&line->list);
- l_mg->nr_free_lines--;
-
- bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
- if (unlikely(bit >= lm->blk_per_line)) {
- spin_lock(&line->lock);
- line->state = PBLK_LINESTATE_BAD;
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
- spin_unlock(&line->lock);
-
- list_add_tail(&line->list, &l_mg->bad_list);
-
- pblk_debug(pblk, "line %d is bad\n", line->id);
- goto retry;
- }
-
- ret = pblk_line_prepare(pblk, line);
- if (ret) {
- switch (ret) {
- case -EAGAIN:
- list_add(&line->list, &l_mg->bad_list);
- goto retry;
- case -EINTR:
- list_add(&line->list, &l_mg->corrupt_list);
- goto retry;
- default:
- pblk_err(pblk, "failed to prepare line %d\n", line->id);
- list_add(&line->list, &l_mg->free_list);
- l_mg->nr_free_lines++;
- return NULL;
- }
- }
-
- return line;
-}
-
-static struct pblk_line *pblk_line_retry(struct pblk *pblk,
- struct pblk_line *line)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line *retry_line;
-
-retry:
- spin_lock(&l_mg->free_lock);
- retry_line = pblk_line_get(pblk);
- if (!retry_line) {
- l_mg->data_line = NULL;
- spin_unlock(&l_mg->free_lock);
- return NULL;
- }
-
- retry_line->map_bitmap = line->map_bitmap;
- retry_line->invalid_bitmap = line->invalid_bitmap;
- retry_line->smeta = line->smeta;
- retry_line->emeta = line->emeta;
- retry_line->meta_line = line->meta_line;
-
- pblk_line_reinit(line);
-
- l_mg->data_line = retry_line;
- spin_unlock(&l_mg->free_lock);
-
- pblk_rl_free_lines_dec(&pblk->rl, line, false);
-
- if (pblk_line_erase(pblk, retry_line))
- goto retry;
-
- return retry_line;
-}
-
-static void pblk_set_space_limit(struct pblk *pblk)
-{
- struct pblk_rl *rl = &pblk->rl;
-
- atomic_set(&rl->rb_space, 0);
-}
-
-struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line *line;
-
- spin_lock(&l_mg->free_lock);
- line = pblk_line_get(pblk);
- if (!line) {
- spin_unlock(&l_mg->free_lock);
- return NULL;
- }
-
- line->seq_nr = l_mg->d_seq_nr++;
- line->type = PBLK_LINETYPE_DATA;
- l_mg->data_line = line;
-
- pblk_line_setup_metadata(line, l_mg, &pblk->lm);
-
- /* Allocate next line for preparation */
- l_mg->data_next = pblk_line_get(pblk);
- if (!l_mg->data_next) {
- /* If we cannot get a new line, we need to stop the pipeline.
- * Only allow as many writes in as we can store safely and then
- * fail gracefully
- */
- pblk_set_space_limit(pblk);
-
- l_mg->data_next = NULL;
- } else {
- l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
- l_mg->data_next->type = PBLK_LINETYPE_DATA;
- }
- spin_unlock(&l_mg->free_lock);
-
- if (pblk_line_alloc_bitmaps(pblk, line))
- return NULL;
-
- if (pblk_line_erase(pblk, line)) {
- line = pblk_line_retry(pblk, line);
- if (!line)
- return NULL;
- }
-
-retry_setup:
- if (!pblk_line_init_metadata(pblk, line, NULL)) {
- line = pblk_line_retry(pblk, line);
- if (!line)
- return NULL;
-
- goto retry_setup;
- }
-
- if (!pblk_line_init_bb(pblk, line, 1)) {
- line = pblk_line_retry(pblk, line);
- if (!line)
- return NULL;
-
- goto retry_setup;
- }
-
- pblk_rl_free_lines_dec(&pblk->rl, line, true);
-
- return line;
-}
-
-void pblk_ppa_to_line_put(struct pblk *pblk, struct ppa_addr ppa)
-{
- struct pblk_line *line;
-
- line = pblk_ppa_to_line(pblk, ppa);
- kref_put(&line->ref, pblk_line_put_wq);
-}
-
-void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd)
-{
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
- int i;
-
- for (i = 0; i < rqd->nr_ppas; i++)
- pblk_ppa_to_line_put(pblk, ppa_list[i]);
-}
-
-static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line)
-{
- lockdep_assert_held(&pblk->l_mg.free_lock);
-
- pblk_set_space_limit(pblk);
- pblk->state = PBLK_STATE_STOPPING;
- trace_pblk_state(pblk_disk_name(pblk), pblk->state);
-}
-
-static void pblk_line_close_meta_sync(struct pblk *pblk)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line *line, *tline;
- LIST_HEAD(list);
-
- spin_lock(&l_mg->close_lock);
- if (list_empty(&l_mg->emeta_list)) {
- spin_unlock(&l_mg->close_lock);
- return;
- }
-
- list_cut_position(&list, &l_mg->emeta_list, l_mg->emeta_list.prev);
- spin_unlock(&l_mg->close_lock);
-
- list_for_each_entry_safe(line, tline, &list, list) {
- struct pblk_emeta *emeta = line->emeta;
-
- while (emeta->mem < lm->emeta_len[0]) {
- int ret;
-
- ret = pblk_submit_meta_io(pblk, line);
- if (ret) {
- pblk_err(pblk, "sync meta line %d failed (%d)\n",
- line->id, ret);
- return;
- }
- }
- }
-
- pblk_wait_for_meta(pblk);
- flush_workqueue(pblk->close_wq);
-}
-
-void __pblk_pipeline_flush(struct pblk *pblk)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- int ret;
-
- spin_lock(&l_mg->free_lock);
- if (pblk->state == PBLK_STATE_RECOVERING ||
- pblk->state == PBLK_STATE_STOPPED) {
- spin_unlock(&l_mg->free_lock);
- return;
- }
- pblk->state = PBLK_STATE_RECOVERING;
- trace_pblk_state(pblk_disk_name(pblk), pblk->state);
- spin_unlock(&l_mg->free_lock);
-
- pblk_flush_writer(pblk);
- pblk_wait_for_meta(pblk);
-
- ret = pblk_recov_pad(pblk);
- if (ret) {
- pblk_err(pblk, "could not close data on teardown(%d)\n", ret);
- return;
- }
-
- flush_workqueue(pblk->bb_wq);
- pblk_line_close_meta_sync(pblk);
-}
-
-void __pblk_pipeline_stop(struct pblk *pblk)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
-
- spin_lock(&l_mg->free_lock);
- pblk->state = PBLK_STATE_STOPPED;
- trace_pblk_state(pblk_disk_name(pblk), pblk->state);
- l_mg->data_line = NULL;
- l_mg->data_next = NULL;
- spin_unlock(&l_mg->free_lock);
-}
-
-void pblk_pipeline_stop(struct pblk *pblk)
-{
- __pblk_pipeline_flush(pblk);
- __pblk_pipeline_stop(pblk);
-}
-
-struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line *cur, *new = NULL;
- unsigned int left_seblks;
-
- new = l_mg->data_next;
- if (!new)
- goto out;
-
- spin_lock(&l_mg->free_lock);
- cur = l_mg->data_line;
- l_mg->data_line = new;
-
- pblk_line_setup_metadata(new, l_mg, &pblk->lm);
- spin_unlock(&l_mg->free_lock);
-
-retry_erase:
- left_seblks = atomic_read(&new->left_seblks);
- if (left_seblks) {
- /* If line is not fully erased, erase it */
- if (atomic_read(&new->left_eblks)) {
- if (pblk_line_erase(pblk, new))
- goto out;
- } else {
- io_schedule();
- }
- goto retry_erase;
- }
-
- if (pblk_line_alloc_bitmaps(pblk, new))
- return NULL;
-
-retry_setup:
- if (!pblk_line_init_metadata(pblk, new, cur)) {
- new = pblk_line_retry(pblk, new);
- if (!new)
- goto out;
-
- goto retry_setup;
- }
-
- if (!pblk_line_init_bb(pblk, new, 1)) {
- new = pblk_line_retry(pblk, new);
- if (!new)
- goto out;
-
- goto retry_setup;
- }
-
- pblk_rl_free_lines_dec(&pblk->rl, new, true);
-
- /* Allocate next line for preparation */
- spin_lock(&l_mg->free_lock);
- l_mg->data_next = pblk_line_get(pblk);
- if (!l_mg->data_next) {
- /* If we cannot get a new line, we need to stop the pipeline.
- * Only allow as many writes in as we can store safely and then
- * fail gracefully
- */
- pblk_stop_writes(pblk, new);
- l_mg->data_next = NULL;
- } else {
- l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
- l_mg->data_next->type = PBLK_LINETYPE_DATA;
- }
- spin_unlock(&l_mg->free_lock);
-
-out:
- return new;
-}
-
-static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_gc *gc = &pblk->gc;
-
- spin_lock(&line->lock);
- WARN_ON(line->state != PBLK_LINESTATE_GC);
- if (line->w_err_gc->has_gc_err) {
- spin_unlock(&line->lock);
- pblk_err(pblk, "line %d had errors during GC\n", line->id);
- pblk_put_line_back(pblk, line);
- line->w_err_gc->has_gc_err = 0;
- return;
- }
-
- line->state = PBLK_LINESTATE_FREE;
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
- line->gc_group = PBLK_LINEGC_NONE;
- pblk_line_free(line);
-
- if (line->w_err_gc->has_write_err) {
- pblk_rl_werr_line_out(&pblk->rl);
- line->w_err_gc->has_write_err = 0;
- }
-
- spin_unlock(&line->lock);
- atomic_dec(&gc->pipeline_gc);
-
- spin_lock(&l_mg->free_lock);
- list_add_tail(&line->list, &l_mg->free_list);
- l_mg->nr_free_lines++;
- spin_unlock(&l_mg->free_lock);
-
- pblk_rl_free_lines_inc(&pblk->rl, line);
-}
-
-static void pblk_line_put_ws(struct work_struct *work)
-{
- struct pblk_line_ws *line_put_ws = container_of(work,
- struct pblk_line_ws, ws);
- struct pblk *pblk = line_put_ws->pblk;
- struct pblk_line *line = line_put_ws->line;
-
- __pblk_line_put(pblk, line);
- mempool_free(line_put_ws, &pblk->gen_ws_pool);
-}
-
-void pblk_line_put(struct kref *ref)
-{
- struct pblk_line *line = container_of(ref, struct pblk_line, ref);
- struct pblk *pblk = line->pblk;
-
- __pblk_line_put(pblk, line);
-}
-
-void pblk_line_put_wq(struct kref *ref)
-{
- struct pblk_line *line = container_of(ref, struct pblk_line, ref);
- struct pblk *pblk = line->pblk;
- struct pblk_line_ws *line_put_ws;
-
- line_put_ws = mempool_alloc(&pblk->gen_ws_pool, GFP_ATOMIC);
- if (!line_put_ws)
- return;
-
- line_put_ws->pblk = pblk;
- line_put_ws->line = line;
- line_put_ws->priv = NULL;
-
- INIT_WORK(&line_put_ws->ws, pblk_line_put_ws);
- queue_work(pblk->r_end_wq, &line_put_ws->ws);
-}
-
-int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
-{
- struct nvm_rq *rqd;
- int err;
-
- rqd = pblk_alloc_rqd(pblk, PBLK_ERASE);
-
- pblk_setup_e_rq(pblk, rqd, ppa);
-
- rqd->end_io = pblk_end_io_erase;
- rqd->private = pblk;
-
- trace_pblk_chunk_reset(pblk_disk_name(pblk),
- &ppa, PBLK_CHUNK_RESET_START);
-
- /* The write thread schedules erases so that it minimizes disturbances
- * with writes. Thus, there is no need to take the LUN semaphore.
- */
- err = pblk_submit_io(pblk, rqd, NULL);
- if (err) {
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
-
- pblk_err(pblk, "could not async erase line:%d,blk:%d\n",
- pblk_ppa_to_line_id(ppa),
- pblk_ppa_to_pos(geo, ppa));
- }
-
- return err;
-}
-
-struct pblk_line *pblk_line_get_data(struct pblk *pblk)
-{
- return pblk->l_mg.data_line;
-}
-
-/* For now, always erase next line */
-struct pblk_line *pblk_line_get_erase(struct pblk *pblk)
-{
- return pblk->l_mg.data_next;
-}
-
-int pblk_line_is_full(struct pblk_line *line)
-{
- return (line->left_msecs == 0);
-}
-
-static void pblk_line_should_sync_meta(struct pblk *pblk)
-{
- if (pblk_rl_is_limit(&pblk->rl))
- pblk_line_close_meta_sync(pblk);
-}
-
-void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct list_head *move_list;
- int i;
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line),
- "pblk: corrupt closed line %d\n", line->id);
-#endif
-
- spin_lock(&l_mg->free_lock);
- WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap));
- spin_unlock(&l_mg->free_lock);
-
- spin_lock(&l_mg->gc_lock);
- spin_lock(&line->lock);
- WARN_ON(line->state != PBLK_LINESTATE_OPEN);
- line->state = PBLK_LINESTATE_CLOSED;
- move_list = pblk_line_gc_list(pblk, line);
- list_add_tail(&line->list, move_list);
-
- mempool_free(line->map_bitmap, l_mg->bitmap_pool);
- line->map_bitmap = NULL;
- line->smeta = NULL;
- line->emeta = NULL;
-
- for (i = 0; i < lm->blk_per_line; i++) {
- struct pblk_lun *rlun = &pblk->luns[i];
- int pos = pblk_ppa_to_pos(geo, rlun->bppa);
- int state = line->chks[pos].state;
-
- if (!(state & NVM_CHK_ST_OFFLINE))
- state = NVM_CHK_ST_CLOSED;
- }
-
- spin_unlock(&line->lock);
- spin_unlock(&l_mg->gc_lock);
-
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
-}
-
-void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_emeta *emeta = line->emeta;
- struct line_emeta *emeta_buf = emeta->buf;
- struct wa_counters *wa = emeta_to_wa(lm, emeta_buf);
-
- /* No need for exact vsc value; avoid a big line lock and take aprox. */
- memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len);
- memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len);
-
- wa->user = cpu_to_le64(atomic64_read(&pblk->user_wa));
- wa->pad = cpu_to_le64(atomic64_read(&pblk->pad_wa));
- wa->gc = cpu_to_le64(atomic64_read(&pblk->gc_wa));
-
- if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC) {
- emeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC);
- export_guid(emeta_buf->header.uuid, &pblk->instance_uuid);
- emeta_buf->header.id = cpu_to_le32(line->id);
- emeta_buf->header.type = cpu_to_le16(line->type);
- emeta_buf->header.version_major = EMETA_VERSION_MAJOR;
- emeta_buf->header.version_minor = EMETA_VERSION_MINOR;
- emeta_buf->header.crc = cpu_to_le32(
- pblk_calc_meta_header_crc(pblk, &emeta_buf->header));
- }
-
- emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas);
- emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf));
-
- spin_lock(&l_mg->close_lock);
- spin_lock(&line->lock);
-
- /* Update the in-memory start address for emeta, in case it has
- * shifted due to write errors
- */
- if (line->emeta_ssec != line->cur_sec)
- line->emeta_ssec = line->cur_sec;
-
- list_add_tail(&line->list, &l_mg->emeta_list);
- spin_unlock(&line->lock);
- spin_unlock(&l_mg->close_lock);
-
- pblk_line_should_sync_meta(pblk);
-}
-
-static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- unsigned int lba_list_size = lm->emeta_len[2];
- struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
- struct pblk_emeta *emeta = line->emeta;
-
- w_err_gc->lba_list = kvmalloc(lba_list_size, GFP_KERNEL);
- memcpy(w_err_gc->lba_list, emeta_to_lbas(pblk, emeta->buf),
- lba_list_size);
-}
-
-void pblk_line_close_ws(struct work_struct *work)
-{
- struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
- ws);
- struct pblk *pblk = line_ws->pblk;
- struct pblk_line *line = line_ws->line;
- struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
-
- /* Write errors makes the emeta start address stored in smeta invalid,
- * so keep a copy of the lba list until we've gc'd the line
- */
- if (w_err_gc->has_write_err)
- pblk_save_lba_list(pblk, line);
-
- pblk_line_close(pblk, line);
- mempool_free(line_ws, &pblk->gen_ws_pool);
-}
-
-void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
- void (*work)(struct work_struct *), gfp_t gfp_mask,
- struct workqueue_struct *wq)
-{
- struct pblk_line_ws *line_ws;
-
- line_ws = mempool_alloc(&pblk->gen_ws_pool, gfp_mask);
- if (!line_ws) {
- pblk_err(pblk, "pblk: could not allocate memory\n");
- return;
- }
-
- line_ws->pblk = pblk;
- line_ws->line = line;
- line_ws->priv = priv;
-
- INIT_WORK(&line_ws->ws, work);
- queue_work(wq, &line_ws->ws);
-}
-
-static void __pblk_down_chunk(struct pblk *pblk, int pos)
-{
- struct pblk_lun *rlun = &pblk->luns[pos];
- int ret;
-
- /*
- * Only send one inflight I/O per LUN. Since we map at a page
- * granurality, all ppas in the I/O will map to the same LUN
- */
-
- ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(30000));
- if (ret == -ETIME || ret == -EINTR)
- pblk_err(pblk, "taking lun semaphore timed out: err %d\n",
- -ret);
-}
-
-void pblk_down_chunk(struct pblk *pblk, struct ppa_addr ppa)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- int pos = pblk_ppa_to_pos(geo, ppa);
-
- __pblk_down_chunk(pblk, pos);
-}
-
-void pblk_down_rq(struct pblk *pblk, struct ppa_addr ppa,
- unsigned long *lun_bitmap)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- int pos = pblk_ppa_to_pos(geo, ppa);
-
- /* If the LUN has been locked for this same request, do no attempt to
- * lock it again
- */
- if (test_and_set_bit(pos, lun_bitmap))
- return;
-
- __pblk_down_chunk(pblk, pos);
-}
-
-void pblk_up_chunk(struct pblk *pblk, struct ppa_addr ppa)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_lun *rlun;
- int pos = pblk_ppa_to_pos(geo, ppa);
-
- rlun = &pblk->luns[pos];
- up(&rlun->wr_sem);
-}
-
-void pblk_up_rq(struct pblk *pblk, unsigned long *lun_bitmap)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_lun *rlun;
- int num_lun = geo->all_luns;
- int bit = -1;
-
- while ((bit = find_next_bit(lun_bitmap, num_lun, bit + 1)) < num_lun) {
- rlun = &pblk->luns[bit];
- up(&rlun->wr_sem);
- }
-}
-
-void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
-{
- struct ppa_addr ppa_l2p;
-
- /* logic error: lba out-of-bounds. Ignore update */
- if (!(lba < pblk->capacity)) {
- WARN(1, "pblk: corrupted L2P map request\n");
- return;
- }
-
- spin_lock(&pblk->trans_lock);
- ppa_l2p = pblk_trans_map_get(pblk, lba);
-
- if (!pblk_addr_in_cache(ppa_l2p) && !pblk_ppa_empty(ppa_l2p))
- pblk_map_invalidate(pblk, ppa_l2p);
-
- pblk_trans_map_set(pblk, lba, ppa);
- spin_unlock(&pblk->trans_lock);
-}
-
-void pblk_update_map_cache(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
-{
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- /* Callers must ensure that the ppa points to a cache address */
- BUG_ON(!pblk_addr_in_cache(ppa));
- BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa)));
-#endif
-
- pblk_update_map(pblk, lba, ppa);
-}
-
-int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa_new,
- struct pblk_line *gc_line, u64 paddr_gc)
-{
- struct ppa_addr ppa_l2p, ppa_gc;
- int ret = 1;
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- /* Callers must ensure that the ppa points to a cache address */
- BUG_ON(!pblk_addr_in_cache(ppa_new));
- BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa_new)));
-#endif
-
- /* logic error: lba out-of-bounds. Ignore update */
- if (!(lba < pblk->capacity)) {
- WARN(1, "pblk: corrupted L2P map request\n");
- return 0;
- }
-
- spin_lock(&pblk->trans_lock);
- ppa_l2p = pblk_trans_map_get(pblk, lba);
- ppa_gc = addr_to_gen_ppa(pblk, paddr_gc, gc_line->id);
-
- if (!pblk_ppa_comp(ppa_l2p, ppa_gc)) {
- spin_lock(&gc_line->lock);
- WARN(!test_bit(paddr_gc, gc_line->invalid_bitmap),
- "pblk: corrupted GC update");
- spin_unlock(&gc_line->lock);
-
- ret = 0;
- goto out;
- }
-
- pblk_trans_map_set(pblk, lba, ppa_new);
-out:
- spin_unlock(&pblk->trans_lock);
- return ret;
-}
-
-void pblk_update_map_dev(struct pblk *pblk, sector_t lba,
- struct ppa_addr ppa_mapped, struct ppa_addr ppa_cache)
-{
- struct ppa_addr ppa_l2p;
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- /* Callers must ensure that the ppa points to a device address */
- BUG_ON(pblk_addr_in_cache(ppa_mapped));
-#endif
- /* Invalidate and discard padded entries */
- if (lba == ADDR_EMPTY) {
- atomic64_inc(&pblk->pad_wa);
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_inc(&pblk->padded_wb);
-#endif
- if (!pblk_ppa_empty(ppa_mapped))
- pblk_map_invalidate(pblk, ppa_mapped);
- return;
- }
-
- /* logic error: lba out-of-bounds. Ignore update */
- if (!(lba < pblk->capacity)) {
- WARN(1, "pblk: corrupted L2P map request\n");
- return;
- }
-
- spin_lock(&pblk->trans_lock);
- ppa_l2p = pblk_trans_map_get(pblk, lba);
-
- /* Do not update L2P if the cacheline has been updated. In this case,
- * the mapped ppa must be invalidated
- */
- if (!pblk_ppa_comp(ppa_l2p, ppa_cache)) {
- if (!pblk_ppa_empty(ppa_mapped))
- pblk_map_invalidate(pblk, ppa_mapped);
- goto out;
- }
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- WARN_ON(!pblk_addr_in_cache(ppa_l2p) && !pblk_ppa_empty(ppa_l2p));
-#endif
-
- pblk_trans_map_set(pblk, lba, ppa_mapped);
-out:
- spin_unlock(&pblk->trans_lock);
-}
-
-int pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
- sector_t blba, int nr_secs, bool *from_cache)
-{
- int i;
-
- spin_lock(&pblk->trans_lock);
- for (i = 0; i < nr_secs; i++) {
- struct ppa_addr ppa;
-
- ppa = ppas[i] = pblk_trans_map_get(pblk, blba + i);
-
- /* If the L2P entry maps to a line, the reference is valid */
- if (!pblk_ppa_empty(ppa) && !pblk_addr_in_cache(ppa)) {
- struct pblk_line *line = pblk_ppa_to_line(pblk, ppa);
-
- if (i > 0 && *from_cache)
- break;
- *from_cache = false;
-
- kref_get(&line->ref);
- } else {
- if (i > 0 && !*from_cache)
- break;
- *from_cache = true;
- }
- }
- spin_unlock(&pblk->trans_lock);
- return i;
-}
-
-void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
- u64 *lba_list, int nr_secs)
-{
- u64 lba;
- int i;
-
- spin_lock(&pblk->trans_lock);
- for (i = 0; i < nr_secs; i++) {
- lba = lba_list[i];
- if (lba != ADDR_EMPTY) {
- /* logic error: lba out-of-bounds. Ignore update */
- if (!(lba < pblk->capacity)) {
- WARN(1, "pblk: corrupted L2P map request\n");
- continue;
- }
- ppas[i] = pblk_trans_map_get(pblk, lba);
- }
- }
- spin_unlock(&pblk->trans_lock);
-}
-
-void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd)
-{
- void *buffer;
-
- if (pblk_is_oob_meta_supported(pblk)) {
- /* Just use OOB metadata buffer as always */
- buffer = rqd->meta_list;
- } else {
- /* We need to reuse last page of request (packed metadata)
- * in similar way as traditional oob metadata
- */
- buffer = page_to_virt(
- rqd->bio->bi_io_vec[rqd->bio->bi_vcnt - 1].bv_page);
- }
-
- return buffer;
-}
-
-void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd)
-{
- void *meta_list = rqd->meta_list;
- void *page;
- int i = 0;
-
- if (pblk_is_oob_meta_supported(pblk))
- return;
-
- page = page_to_virt(rqd->bio->bi_io_vec[rqd->bio->bi_vcnt - 1].bv_page);
- /* We need to fill oob meta buffer with data from packed metadata */
- for (; i < rqd->nr_ppas; i++)
- memcpy(pblk_get_meta(pblk, meta_list, i),
- page + (i * sizeof(struct pblk_sec_meta)),
- sizeof(struct pblk_sec_meta));
-}
diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
deleted file mode 100644
index b31658be35a7..000000000000
--- a/drivers/lightnvm/pblk-gc.c
+++ /dev/null
@@ -1,726 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2016 CNEX Labs
- * Initial release: Javier Gonzalez <javier@cnexlabs.com>
- * Matias Bjorling <matias@cnexlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * pblk-gc.c - pblk's garbage collector
- */
-
-#include "pblk.h"
-#include "pblk-trace.h"
-#include <linux/delay.h>
-
-
-static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
-{
- vfree(gc_rq->data);
- kfree(gc_rq);
-}
-
-static int pblk_gc_write(struct pblk *pblk)
-{
- struct pblk_gc *gc = &pblk->gc;
- struct pblk_gc_rq *gc_rq, *tgc_rq;
- LIST_HEAD(w_list);
-
- spin_lock(&gc->w_lock);
- if (list_empty(&gc->w_list)) {
- spin_unlock(&gc->w_lock);
- return 1;
- }
-
- list_cut_position(&w_list, &gc->w_list, gc->w_list.prev);
- gc->w_entries = 0;
- spin_unlock(&gc->w_lock);
-
- list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
- pblk_write_gc_to_cache(pblk, gc_rq);
- list_del(&gc_rq->list);
- kref_put(&gc_rq->line->ref, pblk_line_put);
- pblk_gc_free_gc_rq(gc_rq);
- }
-
- return 0;
-}
-
-static void pblk_gc_writer_kick(struct pblk_gc *gc)
-{
- wake_up_process(gc->gc_writer_ts);
-}
-
-void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct list_head *move_list;
-
- spin_lock(&l_mg->gc_lock);
- spin_lock(&line->lock);
- WARN_ON(line->state != PBLK_LINESTATE_GC);
- line->state = PBLK_LINESTATE_CLOSED;
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
-
- /* We need to reset gc_group in order to ensure that
- * pblk_line_gc_list will return proper move_list
- * since right now current line is not on any of the
- * gc lists.
- */
- line->gc_group = PBLK_LINEGC_NONE;
- move_list = pblk_line_gc_list(pblk, line);
- spin_unlock(&line->lock);
- list_add_tail(&line->list, move_list);
- spin_unlock(&l_mg->gc_lock);
-}
-
-static void pblk_gc_line_ws(struct work_struct *work)
-{
- struct pblk_line_ws *gc_rq_ws = container_of(work,
- struct pblk_line_ws, ws);
- struct pblk *pblk = gc_rq_ws->pblk;
- struct pblk_gc *gc = &pblk->gc;
- struct pblk_line *line = gc_rq_ws->line;
- struct pblk_gc_rq *gc_rq = gc_rq_ws->priv;
- int ret;
-
- up(&gc->gc_sem);
-
- /* Read from GC victim block */
- ret = pblk_submit_read_gc(pblk, gc_rq);
- if (ret) {
- line->w_err_gc->has_gc_err = 1;
- goto out;
- }
-
- if (!gc_rq->secs_to_gc)
- goto out;
-
-retry:
- spin_lock(&gc->w_lock);
- if (gc->w_entries >= PBLK_GC_RQ_QD) {
- spin_unlock(&gc->w_lock);
- pblk_gc_writer_kick(&pblk->gc);
- usleep_range(128, 256);
- goto retry;
- }
- gc->w_entries++;
- list_add_tail(&gc_rq->list, &gc->w_list);
- spin_unlock(&gc->w_lock);
-
- pblk_gc_writer_kick(&pblk->gc);
-
- kfree(gc_rq_ws);
- return;
-
-out:
- pblk_gc_free_gc_rq(gc_rq);
- kref_put(&line->ref, pblk_line_put);
- kfree(gc_rq_ws);
-}
-
-static __le64 *get_lba_list_from_emeta(struct pblk *pblk,
- struct pblk_line *line)
-{
- struct line_emeta *emeta_buf;
- struct pblk_line_meta *lm = &pblk->lm;
- unsigned int lba_list_size = lm->emeta_len[2];
- __le64 *lba_list;
- int ret;
-
- emeta_buf = kvmalloc(lm->emeta_len[0], GFP_KERNEL);
- if (!emeta_buf)
- return NULL;
-
- ret = pblk_line_emeta_read(pblk, line, emeta_buf);
- if (ret) {
- pblk_err(pblk, "line %d read emeta failed (%d)\n",
- line->id, ret);
- kvfree(emeta_buf);
- return NULL;
- }
-
- /* If this read fails, it means that emeta is corrupted.
- * For now, leave the line untouched.
- * TODO: Implement a recovery routine that scans and moves
- * all sectors on the line.
- */
-
- ret = pblk_recov_check_emeta(pblk, emeta_buf);
- if (ret) {
- pblk_err(pblk, "inconsistent emeta (line %d)\n",
- line->id);
- kvfree(emeta_buf);
- return NULL;
- }
-
- lba_list = kvmalloc(lba_list_size, GFP_KERNEL);
-
- if (lba_list)
- memcpy(lba_list, emeta_to_lbas(pblk, emeta_buf), lba_list_size);
-
- kvfree(emeta_buf);
-
- return lba_list;
-}
-
-static void pblk_gc_line_prepare_ws(struct work_struct *work)
-{
- struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
- ws);
- struct pblk *pblk = line_ws->pblk;
- struct pblk_line *line = line_ws->line;
- struct pblk_line_meta *lm = &pblk->lm;
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_gc *gc = &pblk->gc;
- struct pblk_line_ws *gc_rq_ws;
- struct pblk_gc_rq *gc_rq;
- __le64 *lba_list;
- unsigned long *invalid_bitmap;
- int sec_left, nr_secs, bit;
-
- invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL);
- if (!invalid_bitmap)
- goto fail_free_ws;
-
- if (line->w_err_gc->has_write_err) {
- lba_list = line->w_err_gc->lba_list;
- line->w_err_gc->lba_list = NULL;
- } else {
- lba_list = get_lba_list_from_emeta(pblk, line);
- if (!lba_list) {
- pblk_err(pblk, "could not interpret emeta (line %d)\n",
- line->id);
- goto fail_free_invalid_bitmap;
- }
- }
-
- spin_lock(&line->lock);
- bitmap_copy(invalid_bitmap, line->invalid_bitmap, lm->sec_per_line);
- sec_left = pblk_line_vsc(line);
- spin_unlock(&line->lock);
-
- if (sec_left < 0) {
- pblk_err(pblk, "corrupted GC line (%d)\n", line->id);
- goto fail_free_lba_list;
- }
-
- bit = -1;
-next_rq:
- gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
- if (!gc_rq)
- goto fail_free_lba_list;
-
- nr_secs = 0;
- do {
- bit = find_next_zero_bit(invalid_bitmap, lm->sec_per_line,
- bit + 1);
- if (bit > line->emeta_ssec)
- break;
-
- gc_rq->paddr_list[nr_secs] = bit;
- gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]);
- } while (nr_secs < pblk->max_write_pgs);
-
- if (unlikely(!nr_secs)) {
- kfree(gc_rq);
- goto out;
- }
-
- gc_rq->nr_secs = nr_secs;
- gc_rq->line = line;
-
- gc_rq->data = vmalloc(array_size(gc_rq->nr_secs, geo->csecs));
- if (!gc_rq->data)
- goto fail_free_gc_rq;
-
- gc_rq_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL);
- if (!gc_rq_ws)
- goto fail_free_gc_data;
-
- gc_rq_ws->pblk = pblk;
- gc_rq_ws->line = line;
- gc_rq_ws->priv = gc_rq;
-
- /* The write GC path can be much slower than the read GC one due to
- * the budget imposed by the rate-limiter. Balance in case that we get
- * back pressure from the write GC path.
- */
- while (down_timeout(&gc->gc_sem, msecs_to_jiffies(30000)))
- io_schedule();
-
- kref_get(&line->ref);
-
- INIT_WORK(&gc_rq_ws->ws, pblk_gc_line_ws);
- queue_work(gc->gc_line_reader_wq, &gc_rq_ws->ws);
-
- sec_left -= nr_secs;
- if (sec_left > 0)
- goto next_rq;
-
-out:
- kvfree(lba_list);
- kfree(line_ws);
- kfree(invalid_bitmap);
-
- kref_put(&line->ref, pblk_line_put);
- atomic_dec(&gc->read_inflight_gc);
-
- return;
-
-fail_free_gc_data:
- vfree(gc_rq->data);
-fail_free_gc_rq:
- kfree(gc_rq);
-fail_free_lba_list:
- kvfree(lba_list);
-fail_free_invalid_bitmap:
- kfree(invalid_bitmap);
-fail_free_ws:
- kfree(line_ws);
-
- /* Line goes back to closed state, so we cannot release additional
- * reference for line, since we do that only when we want to do
- * gc to free line state transition.
- */
- pblk_put_line_back(pblk, line);
- atomic_dec(&gc->read_inflight_gc);
-
- pblk_err(pblk, "failed to GC line %d\n", line->id);
-}
-
-static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_gc *gc = &pblk->gc;
- struct pblk_line_ws *line_ws;
-
- pblk_debug(pblk, "line '%d' being reclaimed for GC\n", line->id);
-
- line_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL);
- if (!line_ws)
- return -ENOMEM;
-
- line_ws->pblk = pblk;
- line_ws->line = line;
-
- atomic_inc(&gc->pipeline_gc);
- INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws);
- queue_work(gc->gc_reader_wq, &line_ws->ws);
-
- return 0;
-}
-
-static void pblk_gc_reader_kick(struct pblk_gc *gc)
-{
- wake_up_process(gc->gc_reader_ts);
-}
-
-static void pblk_gc_kick(struct pblk *pblk)
-{
- struct pblk_gc *gc = &pblk->gc;
-
- pblk_gc_writer_kick(gc);
- pblk_gc_reader_kick(gc);
-
- /* If we're shutting down GC, let's not start it up again */
- if (gc->gc_enabled) {
- wake_up_process(gc->gc_ts);
- mod_timer(&gc->gc_timer,
- jiffies + msecs_to_jiffies(GC_TIME_MSECS));
- }
-}
-
-static int pblk_gc_read(struct pblk *pblk)
-{
- struct pblk_gc *gc = &pblk->gc;
- struct pblk_line *line;
-
- spin_lock(&gc->r_lock);
- if (list_empty(&gc->r_list)) {
- spin_unlock(&gc->r_lock);
- return 1;
- }
-
- line = list_first_entry(&gc->r_list, struct pblk_line, list);
- list_del(&line->list);
- spin_unlock(&gc->r_lock);
-
- pblk_gc_kick(pblk);
-
- if (pblk_gc_line(pblk, line)) {
- pblk_err(pblk, "failed to GC line %d\n", line->id);
- /* rollback */
- spin_lock(&gc->r_lock);
- list_add_tail(&line->list, &gc->r_list);
- spin_unlock(&gc->r_lock);
- }
-
- return 0;
-}
-
-static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
- struct list_head *group_list)
-{
- struct pblk_line *line, *victim;
- unsigned int line_vsc = ~0x0L, victim_vsc = ~0x0L;
-
- victim = list_first_entry(group_list, struct pblk_line, list);
-
- list_for_each_entry(line, group_list, list) {
- if (!atomic_read(&line->sec_to_update))
- line_vsc = le32_to_cpu(*line->vsc);
- if (line_vsc < victim_vsc) {
- victim = line;
- victim_vsc = le32_to_cpu(*victim->vsc);
- }
- }
-
- if (victim_vsc == ~0x0)
- return NULL;
-
- return victim;
-}
-
-static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
-{
- unsigned int nr_blocks_free, nr_blocks_need;
- unsigned int werr_lines = atomic_read(&rl->werr_lines);
-
- nr_blocks_need = pblk_rl_high_thrs(rl);
- nr_blocks_free = pblk_rl_nr_free_blks(rl);
-
- /* This is not critical, no need to take lock here */
- return ((werr_lines > 0) ||
- ((gc->gc_active) && (nr_blocks_need > nr_blocks_free)));
-}
-
-void pblk_gc_free_full_lines(struct pblk *pblk)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_gc *gc = &pblk->gc;
- struct pblk_line *line;
-
- do {
- spin_lock(&l_mg->gc_lock);
- if (list_empty(&l_mg->gc_full_list)) {
- spin_unlock(&l_mg->gc_lock);
- return;
- }
-
- line = list_first_entry(&l_mg->gc_full_list,
- struct pblk_line, list);
-
- spin_lock(&line->lock);
- WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
- line->state = PBLK_LINESTATE_GC;
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
- spin_unlock(&line->lock);
-
- list_del(&line->list);
- spin_unlock(&l_mg->gc_lock);
-
- atomic_inc(&gc->pipeline_gc);
- kref_put(&line->ref, pblk_line_put);
- } while (1);
-}
-
-/*
- * Lines with no valid sectors will be returned to the free list immediately. If
- * GC is activated - either because the free block count is under the determined
- * threshold, or because it is being forced from user space - only lines with a
- * high count of invalid sectors will be recycled.
- */
-static void pblk_gc_run(struct pblk *pblk)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_gc *gc = &pblk->gc;
- struct pblk_line *line;
- struct list_head *group_list;
- bool run_gc;
- int read_inflight_gc, gc_group = 0, prev_group = 0;
-
- pblk_gc_free_full_lines(pblk);
-
- run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
- if (!run_gc || (atomic_read(&gc->read_inflight_gc) >= PBLK_GC_L_QD))
- return;
-
-next_gc_group:
- group_list = l_mg->gc_lists[gc_group++];
-
- do {
- spin_lock(&l_mg->gc_lock);
-
- line = pblk_gc_get_victim_line(pblk, group_list);
- if (!line) {
- spin_unlock(&l_mg->gc_lock);
- break;
- }
-
- spin_lock(&line->lock);
- WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
- line->state = PBLK_LINESTATE_GC;
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
- spin_unlock(&line->lock);
-
- list_del(&line->list);
- spin_unlock(&l_mg->gc_lock);
-
- spin_lock(&gc->r_lock);
- list_add_tail(&line->list, &gc->r_list);
- spin_unlock(&gc->r_lock);
-
- read_inflight_gc = atomic_inc_return(&gc->read_inflight_gc);
- pblk_gc_reader_kick(gc);
-
- prev_group = 1;
-
- /* No need to queue up more GC lines than we can handle */
- run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
- if (!run_gc || read_inflight_gc >= PBLK_GC_L_QD)
- break;
- } while (1);
-
- if (!prev_group && pblk->rl.rb_state > gc_group &&
- gc_group < PBLK_GC_NR_LISTS)
- goto next_gc_group;
-}
-
-static void pblk_gc_timer(struct timer_list *t)
-{
- struct pblk *pblk = from_timer(pblk, t, gc.gc_timer);
-
- pblk_gc_kick(pblk);
-}
-
-static int pblk_gc_ts(void *data)
-{
- struct pblk *pblk = data;
-
- while (!kthread_should_stop()) {
- pblk_gc_run(pblk);
- set_current_state(TASK_INTERRUPTIBLE);
- io_schedule();
- }
-
- return 0;
-}
-
-static int pblk_gc_writer_ts(void *data)
-{
- struct pblk *pblk = data;
-
- while (!kthread_should_stop()) {
- if (!pblk_gc_write(pblk))
- continue;
- set_current_state(TASK_INTERRUPTIBLE);
- io_schedule();
- }
-
- return 0;
-}
-
-static int pblk_gc_reader_ts(void *data)
-{
- struct pblk *pblk = data;
- struct pblk_gc *gc = &pblk->gc;
-
- while (!kthread_should_stop()) {
- if (!pblk_gc_read(pblk))
- continue;
- set_current_state(TASK_INTERRUPTIBLE);
- io_schedule();
- }
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- pblk_info(pblk, "flushing gc pipeline, %d lines left\n",
- atomic_read(&gc->pipeline_gc));
-#endif
-
- do {
- if (!atomic_read(&gc->pipeline_gc))
- break;
-
- schedule();
- } while (1);
-
- return 0;
-}
-
-static void pblk_gc_start(struct pblk *pblk)
-{
- pblk->gc.gc_active = 1;
- pblk_debug(pblk, "gc start\n");
-}
-
-void pblk_gc_should_start(struct pblk *pblk)
-{
- struct pblk_gc *gc = &pblk->gc;
-
- if (gc->gc_enabled && !gc->gc_active) {
- pblk_gc_start(pblk);
- pblk_gc_kick(pblk);
- }
-}
-
-void pblk_gc_should_stop(struct pblk *pblk)
-{
- struct pblk_gc *gc = &pblk->gc;
-
- if (gc->gc_active && !gc->gc_forced)
- gc->gc_active = 0;
-}
-
-void pblk_gc_should_kick(struct pblk *pblk)
-{
- pblk_rl_update_rates(&pblk->rl);
-}
-
-void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
- int *gc_active)
-{
- struct pblk_gc *gc = &pblk->gc;
-
- spin_lock(&gc->lock);
- *gc_enabled = gc->gc_enabled;
- *gc_active = gc->gc_active;
- spin_unlock(&gc->lock);
-}
-
-int pblk_gc_sysfs_force(struct pblk *pblk, int force)
-{
- struct pblk_gc *gc = &pblk->gc;
-
- if (force < 0 || force > 1)
- return -EINVAL;
-
- spin_lock(&gc->lock);
- gc->gc_forced = force;
-
- if (force)
- gc->gc_enabled = 1;
- else
- gc->gc_enabled = 0;
- spin_unlock(&gc->lock);
-
- pblk_gc_should_start(pblk);
-
- return 0;
-}
-
-int pblk_gc_init(struct pblk *pblk)
-{
- struct pblk_gc *gc = &pblk->gc;
- int ret;
-
- gc->gc_ts = kthread_create(pblk_gc_ts, pblk, "pblk-gc-ts");
- if (IS_ERR(gc->gc_ts)) {
- pblk_err(pblk, "could not allocate GC main kthread\n");
- return PTR_ERR(gc->gc_ts);
- }
-
- gc->gc_writer_ts = kthread_create(pblk_gc_writer_ts, pblk,
- "pblk-gc-writer-ts");
- if (IS_ERR(gc->gc_writer_ts)) {
- pblk_err(pblk, "could not allocate GC writer kthread\n");
- ret = PTR_ERR(gc->gc_writer_ts);
- goto fail_free_main_kthread;
- }
-
- gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk,
- "pblk-gc-reader-ts");
- if (IS_ERR(gc->gc_reader_ts)) {
- pblk_err(pblk, "could not allocate GC reader kthread\n");
- ret = PTR_ERR(gc->gc_reader_ts);
- goto fail_free_writer_kthread;
- }
-
- timer_setup(&gc->gc_timer, pblk_gc_timer, 0);
- mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
-
- gc->gc_active = 0;
- gc->gc_forced = 0;
- gc->gc_enabled = 1;
- gc->w_entries = 0;
- atomic_set(&gc->read_inflight_gc, 0);
- atomic_set(&gc->pipeline_gc, 0);
-
- /* Workqueue that reads valid sectors from a line and submit them to the
- * GC writer to be recycled.
- */
- gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq",
- WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS);
- if (!gc->gc_line_reader_wq) {
- pblk_err(pblk, "could not allocate GC line reader workqueue\n");
- ret = -ENOMEM;
- goto fail_free_reader_kthread;
- }
-
- /* Workqueue that prepare lines for GC */
- gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq",
- WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
- if (!gc->gc_reader_wq) {
- pblk_err(pblk, "could not allocate GC reader workqueue\n");
- ret = -ENOMEM;
- goto fail_free_reader_line_wq;
- }
-
- spin_lock_init(&gc->lock);
- spin_lock_init(&gc->w_lock);
- spin_lock_init(&gc->r_lock);
-
- sema_init(&gc->gc_sem, PBLK_GC_RQ_QD);
-
- INIT_LIST_HEAD(&gc->w_list);
- INIT_LIST_HEAD(&gc->r_list);
-
- return 0;
-
-fail_free_reader_line_wq:
- destroy_workqueue(gc->gc_line_reader_wq);
-fail_free_reader_kthread:
- kthread_stop(gc->gc_reader_ts);
-fail_free_writer_kthread:
- kthread_stop(gc->gc_writer_ts);
-fail_free_main_kthread:
- kthread_stop(gc->gc_ts);
-
- return ret;
-}
-
-void pblk_gc_exit(struct pblk *pblk, bool graceful)
-{
- struct pblk_gc *gc = &pblk->gc;
-
- gc->gc_enabled = 0;
- del_timer_sync(&gc->gc_timer);
- gc->gc_active = 0;
-
- if (gc->gc_ts)
- kthread_stop(gc->gc_ts);
-
- if (gc->gc_reader_ts)
- kthread_stop(gc->gc_reader_ts);
-
- if (graceful) {
- flush_workqueue(gc->gc_reader_wq);
- flush_workqueue(gc->gc_line_reader_wq);
- }
-
- destroy_workqueue(gc->gc_reader_wq);
- destroy_workqueue(gc->gc_line_reader_wq);
-
- if (gc->gc_writer_ts)
- kthread_stop(gc->gc_writer_ts);
-}
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
deleted file mode 100644
index 5924f09c217b..000000000000
--- a/drivers/lightnvm/pblk-init.c
+++ /dev/null
@@ -1,1324 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2015 IT University of Copenhagen (rrpc.c)
- * Copyright (C) 2016 CNEX Labs
- * Initial release: Javier Gonzalez <javier@cnexlabs.com>
- * Matias Bjorling <matias@cnexlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Implementation of a physical block-device target for Open-channel SSDs.
- *
- * pblk-init.c - pblk's initialization.
- */
-
-#include "pblk.h"
-#include "pblk-trace.h"
-
-static unsigned int write_buffer_size;
-
-module_param(write_buffer_size, uint, 0644);
-MODULE_PARM_DESC(write_buffer_size, "number of entries in a write buffer");
-
-struct pblk_global_caches {
- struct kmem_cache *ws;
- struct kmem_cache *rec;
- struct kmem_cache *g_rq;
- struct kmem_cache *w_rq;
-
- struct kref kref;
-
- struct mutex mutex; /* Ensures consistency between
- * caches and kref
- */
-};
-
-static struct pblk_global_caches pblk_caches = {
- .mutex = __MUTEX_INITIALIZER(pblk_caches.mutex),
- .kref = KREF_INIT(0),
-};
-
-struct bio_set pblk_bio_set;
-
-static blk_qc_t pblk_submit_bio(struct bio *bio)
-{
- struct pblk *pblk = bio->bi_bdev->bd_disk->queue->queuedata;
-
- if (bio_op(bio) == REQ_OP_DISCARD) {
- pblk_discard(pblk, bio);
- if (!(bio->bi_opf & REQ_PREFLUSH)) {
- bio_endio(bio);
- return BLK_QC_T_NONE;
- }
- }
-
- /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap
- * constraint. Writes can be of arbitrary size.
- */
- if (bio_data_dir(bio) == READ) {
- blk_queue_split(&bio);
- pblk_submit_read(pblk, bio);
- } else {
- /* Prevent deadlock in the case of a modest LUN configuration
- * and large user I/Os. Unless stalled, the rate limiter
- * leaves at least 256KB available for user I/O.
- */
- if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl))
- blk_queue_split(&bio);
-
- pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
- }
-
- return BLK_QC_T_NONE;
-}
-
-static const struct block_device_operations pblk_bops = {
- .owner = THIS_MODULE,
- .submit_bio = pblk_submit_bio,
-};
-
-
-static size_t pblk_trans_map_size(struct pblk *pblk)
-{
- int entry_size = 8;
-
- if (pblk->addrf_len < 32)
- entry_size = 4;
-
- return entry_size * pblk->capacity;
-}
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
-static u32 pblk_l2p_crc(struct pblk *pblk)
-{
- size_t map_size;
- u32 crc = ~(u32)0;
-
- map_size = pblk_trans_map_size(pblk);
- crc = crc32_le(crc, pblk->trans_map, map_size);
- return crc;
-}
-#endif
-
-static void pblk_l2p_free(struct pblk *pblk)
-{
- vfree(pblk->trans_map);
-}
-
-static int pblk_l2p_recover(struct pblk *pblk, bool factory_init)
-{
- struct pblk_line *line = NULL;
-
- if (factory_init) {
- guid_gen(&pblk->instance_uuid);
- } else {
- line = pblk_recov_l2p(pblk);
- if (IS_ERR(line)) {
- pblk_err(pblk, "could not recover l2p table\n");
- return -EFAULT;
- }
- }
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- pblk_info(pblk, "init: L2P CRC: %x\n", pblk_l2p_crc(pblk));
-#endif
-
- /* Free full lines directly as GC has not been started yet */
- pblk_gc_free_full_lines(pblk);
-
- if (!line) {
- /* Configure next line for user data */
- line = pblk_line_get_first_data(pblk);
- if (!line)
- return -EFAULT;
- }
-
- return 0;
-}
-
-static int pblk_l2p_init(struct pblk *pblk, bool factory_init)
-{
- sector_t i;
- struct ppa_addr ppa;
- size_t map_size;
- int ret = 0;
-
- map_size = pblk_trans_map_size(pblk);
- pblk->trans_map = __vmalloc(map_size, GFP_KERNEL | __GFP_NOWARN |
- __GFP_RETRY_MAYFAIL | __GFP_HIGHMEM);
- if (!pblk->trans_map) {
- pblk_err(pblk, "failed to allocate L2P (need %zu of memory)\n",
- map_size);
- return -ENOMEM;
- }
-
- pblk_ppa_set_empty(&ppa);
-
- for (i = 0; i < pblk->capacity; i++)
- pblk_trans_map_set(pblk, i, ppa);
-
- ret = pblk_l2p_recover(pblk, factory_init);
- if (ret)
- vfree(pblk->trans_map);
-
- return ret;
-}
-
-static void pblk_rwb_free(struct pblk *pblk)
-{
- if (pblk_rb_tear_down_check(&pblk->rwb))
- pblk_err(pblk, "write buffer error on tear down\n");
-
- pblk_rb_free(&pblk->rwb);
-}
-
-static int pblk_rwb_init(struct pblk *pblk)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- unsigned long buffer_size;
- int pgs_in_buffer, threshold;
-
- threshold = geo->mw_cunits * geo->all_luns;
- pgs_in_buffer = (max(geo->mw_cunits, geo->ws_opt) + geo->ws_opt)
- * geo->all_luns;
-
- if (write_buffer_size && (write_buffer_size > pgs_in_buffer))
- buffer_size = write_buffer_size;
- else
- buffer_size = pgs_in_buffer;
-
- return pblk_rb_init(&pblk->rwb, buffer_size, threshold, geo->csecs);
-}
-
-static int pblk_set_addrf_12(struct pblk *pblk, struct nvm_geo *geo,
- struct nvm_addrf_12 *dst)
-{
- struct nvm_addrf_12 *src = (struct nvm_addrf_12 *)&geo->addrf;
- int power_len;
-
- /* Re-calculate channel and lun format to adapt to configuration */
- power_len = get_count_order(geo->num_ch);
- if (1 << power_len != geo->num_ch) {
- pblk_err(pblk, "supports only power-of-two channel config.\n");
- return -EINVAL;
- }
- dst->ch_len = power_len;
-
- power_len = get_count_order(geo->num_lun);
- if (1 << power_len != geo->num_lun) {
- pblk_err(pblk, "supports only power-of-two LUN config.\n");
- return -EINVAL;
- }
- dst->lun_len = power_len;
-
- dst->blk_len = src->blk_len;
- dst->pg_len = src->pg_len;
- dst->pln_len = src->pln_len;
- dst->sec_len = src->sec_len;
-
- dst->sec_offset = 0;
- dst->pln_offset = dst->sec_len;
- dst->ch_offset = dst->pln_offset + dst->pln_len;
- dst->lun_offset = dst->ch_offset + dst->ch_len;
- dst->pg_offset = dst->lun_offset + dst->lun_len;
- dst->blk_offset = dst->pg_offset + dst->pg_len;
-
- dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset;
- dst->pln_mask = ((1ULL << dst->pln_len) - 1) << dst->pln_offset;
- dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset;
- dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset;
- dst->pg_mask = ((1ULL << dst->pg_len) - 1) << dst->pg_offset;
- dst->blk_mask = ((1ULL << dst->blk_len) - 1) << dst->blk_offset;
-
- return dst->blk_offset + src->blk_len;
-}
-
-static int pblk_set_addrf_20(struct nvm_geo *geo, struct nvm_addrf *adst,
- struct pblk_addrf *udst)
-{
- struct nvm_addrf *src = &geo->addrf;
-
- adst->ch_len = get_count_order(geo->num_ch);
- adst->lun_len = get_count_order(geo->num_lun);
- adst->chk_len = src->chk_len;
- adst->sec_len = src->sec_len;
-
- adst->sec_offset = 0;
- adst->ch_offset = adst->sec_len;
- adst->lun_offset = adst->ch_offset + adst->ch_len;
- adst->chk_offset = adst->lun_offset + adst->lun_len;
-
- adst->sec_mask = ((1ULL << adst->sec_len) - 1) << adst->sec_offset;
- adst->chk_mask = ((1ULL << adst->chk_len) - 1) << adst->chk_offset;
- adst->lun_mask = ((1ULL << adst->lun_len) - 1) << adst->lun_offset;
- adst->ch_mask = ((1ULL << adst->ch_len) - 1) << adst->ch_offset;
-
- udst->sec_stripe = geo->ws_opt;
- udst->ch_stripe = geo->num_ch;
- udst->lun_stripe = geo->num_lun;
-
- udst->sec_lun_stripe = udst->sec_stripe * udst->ch_stripe;
- udst->sec_ws_stripe = udst->sec_lun_stripe * udst->lun_stripe;
-
- return adst->chk_offset + adst->chk_len;
-}
-
-static int pblk_set_addrf(struct pblk *pblk)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- int mod;
-
- switch (geo->version) {
- case NVM_OCSSD_SPEC_12:
- div_u64_rem(geo->clba, pblk->min_write_pgs, &mod);
- if (mod) {
- pblk_err(pblk, "bad configuration of sectors/pages\n");
- return -EINVAL;
- }
-
- pblk->addrf_len = pblk_set_addrf_12(pblk, geo,
- (void *)&pblk->addrf);
- break;
- case NVM_OCSSD_SPEC_20:
- pblk->addrf_len = pblk_set_addrf_20(geo, (void *)&pblk->addrf,
- &pblk->uaddrf);
- break;
- default:
- pblk_err(pblk, "OCSSD revision not supported (%d)\n",
- geo->version);
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int pblk_create_global_caches(void)
-{
-
- pblk_caches.ws = kmem_cache_create("pblk_blk_ws",
- sizeof(struct pblk_line_ws), 0, 0, NULL);
- if (!pblk_caches.ws)
- return -ENOMEM;
-
- pblk_caches.rec = kmem_cache_create("pblk_rec",
- sizeof(struct pblk_rec_ctx), 0, 0, NULL);
- if (!pblk_caches.rec)
- goto fail_destroy_ws;
-
- pblk_caches.g_rq = kmem_cache_create("pblk_g_rq", pblk_g_rq_size,
- 0, 0, NULL);
- if (!pblk_caches.g_rq)
- goto fail_destroy_rec;
-
- pblk_caches.w_rq = kmem_cache_create("pblk_w_rq", pblk_w_rq_size,
- 0, 0, NULL);
- if (!pblk_caches.w_rq)
- goto fail_destroy_g_rq;
-
- return 0;
-
-fail_destroy_g_rq:
- kmem_cache_destroy(pblk_caches.g_rq);
-fail_destroy_rec:
- kmem_cache_destroy(pblk_caches.rec);
-fail_destroy_ws:
- kmem_cache_destroy(pblk_caches.ws);
-
- return -ENOMEM;
-}
-
-static int pblk_get_global_caches(void)
-{
- int ret = 0;
-
- mutex_lock(&pblk_caches.mutex);
-
- if (kref_get_unless_zero(&pblk_caches.kref))
- goto out;
-
- ret = pblk_create_global_caches();
- if (!ret)
- kref_init(&pblk_caches.kref);
-
-out:
- mutex_unlock(&pblk_caches.mutex);
- return ret;
-}
-
-static void pblk_destroy_global_caches(struct kref *ref)
-{
- struct pblk_global_caches *c;
-
- c = container_of(ref, struct pblk_global_caches, kref);
-
- kmem_cache_destroy(c->ws);
- kmem_cache_destroy(c->rec);
- kmem_cache_destroy(c->g_rq);
- kmem_cache_destroy(c->w_rq);
-}
-
-static void pblk_put_global_caches(void)
-{
- mutex_lock(&pblk_caches.mutex);
- kref_put(&pblk_caches.kref, pblk_destroy_global_caches);
- mutex_unlock(&pblk_caches.mutex);
-}
-
-static int pblk_core_init(struct pblk *pblk)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- int ret, max_write_ppas;
-
- atomic64_set(&pblk->user_wa, 0);
- atomic64_set(&pblk->pad_wa, 0);
- atomic64_set(&pblk->gc_wa, 0);
- pblk->user_rst_wa = 0;
- pblk->pad_rst_wa = 0;
- pblk->gc_rst_wa = 0;
-
- atomic64_set(&pblk->nr_flush, 0);
- pblk->nr_flush_rst = 0;
-
- pblk->min_write_pgs = geo->ws_opt;
- pblk->min_write_pgs_data = pblk->min_write_pgs;
- max_write_ppas = pblk->min_write_pgs * geo->all_luns;
- pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA);
- pblk->max_write_pgs = min_t(int, pblk->max_write_pgs,
- queue_max_hw_sectors(dev->q) / (geo->csecs >> SECTOR_SHIFT));
- pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
-
- pblk->oob_meta_size = geo->sos;
- if (!pblk_is_oob_meta_supported(pblk)) {
- /* For drives which does not have OOB metadata feature
- * in order to support recovery feature we need to use
- * so called packed metadata. Packed metada will store
- * the same information as OOB metadata (l2p table mapping,
- * but in the form of the single page at the end of
- * every write request.
- */
- if (pblk->min_write_pgs
- * sizeof(struct pblk_sec_meta) > PAGE_SIZE) {
- /* We want to keep all the packed metadata on single
- * page per write requests. So we need to ensure that
- * it will fit.
- *
- * This is more like sanity check, since there is
- * no device with such a big minimal write size
- * (above 1 metabytes).
- */
- pblk_err(pblk, "Not supported min write size\n");
- return -EINVAL;
- }
- /* For packed meta approach we do some simplification.
- * On read path we always issue requests which size
- * equal to max_write_pgs, with all pages filled with
- * user payload except of last one page which will be
- * filled with packed metadata.
- */
- pblk->max_write_pgs = pblk->min_write_pgs;
- pblk->min_write_pgs_data = pblk->min_write_pgs - 1;
- }
-
- pblk->pad_dist = kcalloc(pblk->min_write_pgs - 1, sizeof(atomic64_t),
- GFP_KERNEL);
- if (!pblk->pad_dist)
- return -ENOMEM;
-
- if (pblk_get_global_caches())
- goto fail_free_pad_dist;
-
- /* Internal bios can be at most the sectors signaled by the device. */
- ret = mempool_init_page_pool(&pblk->page_bio_pool, NVM_MAX_VLBA, 0);
- if (ret)
- goto free_global_caches;
-
- ret = mempool_init_slab_pool(&pblk->gen_ws_pool, PBLK_GEN_WS_POOL_SIZE,
- pblk_caches.ws);
- if (ret)
- goto free_page_bio_pool;
-
- ret = mempool_init_slab_pool(&pblk->rec_pool, geo->all_luns,
- pblk_caches.rec);
- if (ret)
- goto free_gen_ws_pool;
-
- ret = mempool_init_slab_pool(&pblk->r_rq_pool, geo->all_luns,
- pblk_caches.g_rq);
- if (ret)
- goto free_rec_pool;
-
- ret = mempool_init_slab_pool(&pblk->e_rq_pool, geo->all_luns,
- pblk_caches.g_rq);
- if (ret)
- goto free_r_rq_pool;
-
- ret = mempool_init_slab_pool(&pblk->w_rq_pool, geo->all_luns,
- pblk_caches.w_rq);
- if (ret)
- goto free_e_rq_pool;
-
- pblk->close_wq = alloc_workqueue("pblk-close-wq",
- WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_NR_CLOSE_JOBS);
- if (!pblk->close_wq)
- goto free_w_rq_pool;
-
- pblk->bb_wq = alloc_workqueue("pblk-bb-wq",
- WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
- if (!pblk->bb_wq)
- goto free_close_wq;
-
- pblk->r_end_wq = alloc_workqueue("pblk-read-end-wq",
- WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
- if (!pblk->r_end_wq)
- goto free_bb_wq;
-
- if (pblk_set_addrf(pblk))
- goto free_r_end_wq;
-
- INIT_LIST_HEAD(&pblk->compl_list);
- INIT_LIST_HEAD(&pblk->resubmit_list);
-
- return 0;
-
-free_r_end_wq:
- destroy_workqueue(pblk->r_end_wq);
-free_bb_wq:
- destroy_workqueue(pblk->bb_wq);
-free_close_wq:
- destroy_workqueue(pblk->close_wq);
-free_w_rq_pool:
- mempool_exit(&pblk->w_rq_pool);
-free_e_rq_pool:
- mempool_exit(&pblk->e_rq_pool);
-free_r_rq_pool:
- mempool_exit(&pblk->r_rq_pool);
-free_rec_pool:
- mempool_exit(&pblk->rec_pool);
-free_gen_ws_pool:
- mempool_exit(&pblk->gen_ws_pool);
-free_page_bio_pool:
- mempool_exit(&pblk->page_bio_pool);
-free_global_caches:
- pblk_put_global_caches();
-fail_free_pad_dist:
- kfree(pblk->pad_dist);
- return -ENOMEM;
-}
-
-static void pblk_core_free(struct pblk *pblk)
-{
- if (pblk->close_wq)
- destroy_workqueue(pblk->close_wq);
-
- if (pblk->r_end_wq)
- destroy_workqueue(pblk->r_end_wq);
-
- if (pblk->bb_wq)
- destroy_workqueue(pblk->bb_wq);
-
- mempool_exit(&pblk->page_bio_pool);
- mempool_exit(&pblk->gen_ws_pool);
- mempool_exit(&pblk->rec_pool);
- mempool_exit(&pblk->r_rq_pool);
- mempool_exit(&pblk->e_rq_pool);
- mempool_exit(&pblk->w_rq_pool);
-
- pblk_put_global_caches();
- kfree(pblk->pad_dist);
-}
-
-static void pblk_line_mg_free(struct pblk *pblk)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- int i;
-
- kfree(l_mg->bb_template);
- kfree(l_mg->bb_aux);
- kfree(l_mg->vsc_list);
-
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- kfree(l_mg->sline_meta[i]);
- kvfree(l_mg->eline_meta[i]->buf);
- kfree(l_mg->eline_meta[i]);
- }
-
- mempool_destroy(l_mg->bitmap_pool);
- kmem_cache_destroy(l_mg->bitmap_cache);
-}
-
-static void pblk_line_meta_free(struct pblk_line_mgmt *l_mg,
- struct pblk_line *line)
-{
- struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
-
- kfree(line->blk_bitmap);
- kfree(line->erase_bitmap);
- kfree(line->chks);
-
- kvfree(w_err_gc->lba_list);
- kfree(w_err_gc);
-}
-
-static void pblk_lines_free(struct pblk *pblk)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line *line;
- int i;
-
- for (i = 0; i < l_mg->nr_lines; i++) {
- line = &pblk->lines[i];
-
- pblk_line_free(line);
- pblk_line_meta_free(l_mg, line);
- }
-
- pblk_line_mg_free(pblk);
-
- kfree(pblk->luns);
- kfree(pblk->lines);
-}
-
-static int pblk_luns_init(struct pblk *pblk)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_lun *rlun;
- int i;
-
- /* TODO: Implement unbalanced LUN support */
- if (geo->num_lun < 0) {
- pblk_err(pblk, "unbalanced LUN config.\n");
- return -EINVAL;
- }
-
- pblk->luns = kcalloc(geo->all_luns, sizeof(struct pblk_lun),
- GFP_KERNEL);
- if (!pblk->luns)
- return -ENOMEM;
-
- for (i = 0; i < geo->all_luns; i++) {
- /* Stripe across channels */
- int ch = i % geo->num_ch;
- int lun_raw = i / geo->num_ch;
- int lunid = lun_raw + ch * geo->num_lun;
-
- rlun = &pblk->luns[i];
- rlun->bppa = dev->luns[lunid];
-
- sema_init(&rlun->wr_sem, 1);
- }
-
- return 0;
-}
-
-/* See comment over struct line_emeta definition */
-static unsigned int calc_emeta_len(struct pblk *pblk)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
-
- /* Round to sector size so that lba_list starts on its own sector */
- lm->emeta_sec[1] = DIV_ROUND_UP(
- sizeof(struct line_emeta) + lm->blk_bitmap_len +
- sizeof(struct wa_counters), geo->csecs);
- lm->emeta_len[1] = lm->emeta_sec[1] * geo->csecs;
-
- /* Round to sector size so that vsc_list starts on its own sector */
- lm->dsec_per_line = lm->sec_per_line - lm->emeta_sec[0];
- lm->emeta_sec[2] = DIV_ROUND_UP(lm->dsec_per_line * sizeof(u64),
- geo->csecs);
- lm->emeta_len[2] = lm->emeta_sec[2] * geo->csecs;
-
- lm->emeta_sec[3] = DIV_ROUND_UP(l_mg->nr_lines * sizeof(u32),
- geo->csecs);
- lm->emeta_len[3] = lm->emeta_sec[3] * geo->csecs;
-
- lm->vsc_list_len = l_mg->nr_lines * sizeof(u32);
-
- return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]);
-}
-
-static int pblk_set_provision(struct pblk *pblk, int nr_free_chks)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line_meta *lm = &pblk->lm;
- struct nvm_geo *geo = &dev->geo;
- sector_t provisioned;
- int sec_meta, blk_meta, clba;
- int minimum;
-
- if (geo->op == NVM_TARGET_DEFAULT_OP)
- pblk->op = PBLK_DEFAULT_OP;
- else
- pblk->op = geo->op;
-
- minimum = pblk_get_min_chks(pblk);
- provisioned = nr_free_chks;
- provisioned *= (100 - pblk->op);
- sector_div(provisioned, 100);
-
- if ((nr_free_chks - provisioned) < minimum) {
- if (geo->op != NVM_TARGET_DEFAULT_OP) {
- pblk_err(pblk, "OP too small to create a sane instance\n");
- return -EINTR;
- }
-
- /* If the user did not specify an OP value, and PBLK_DEFAULT_OP
- * is not enough, calculate and set sane value
- */
-
- provisioned = nr_free_chks - minimum;
- pblk->op = (100 * minimum) / nr_free_chks;
- pblk_info(pblk, "Default OP insufficient, adjusting OP to %d\n",
- pblk->op);
- }
-
- pblk->op_blks = nr_free_chks - provisioned;
-
- /* Internally pblk manages all free blocks, but all calculations based
- * on user capacity consider only provisioned blocks
- */
- pblk->rl.total_blocks = nr_free_chks;
-
- /* Consider sectors used for metadata */
- sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
- blk_meta = DIV_ROUND_UP(sec_meta, geo->clba);
-
- clba = (geo->clba / pblk->min_write_pgs) * pblk->min_write_pgs_data;
- pblk->capacity = (provisioned - blk_meta) * clba;
-
- atomic_set(&pblk->rl.free_blocks, nr_free_chks);
- atomic_set(&pblk->rl.free_user_blocks, nr_free_chks);
-
- return 0;
-}
-
-static int pblk_setup_line_meta_chk(struct pblk *pblk, struct pblk_line *line,
- struct nvm_chk_meta *meta)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- int i, nr_bad_chks = 0;
-
- for (i = 0; i < lm->blk_per_line; i++) {
- struct pblk_lun *rlun = &pblk->luns[i];
- struct nvm_chk_meta *chunk;
- struct nvm_chk_meta *chunk_meta;
- struct ppa_addr ppa;
- int pos;
-
- ppa = rlun->bppa;
- pos = pblk_ppa_to_pos(geo, ppa);
- chunk = &line->chks[pos];
-
- ppa.m.chk = line->id;
- chunk_meta = pblk_chunk_get_off(pblk, meta, ppa);
-
- chunk->state = chunk_meta->state;
- chunk->type = chunk_meta->type;
- chunk->wi = chunk_meta->wi;
- chunk->slba = chunk_meta->slba;
- chunk->cnlb = chunk_meta->cnlb;
- chunk->wp = chunk_meta->wp;
-
- trace_pblk_chunk_state(pblk_disk_name(pblk), &ppa,
- chunk->state);
-
- if (chunk->type & NVM_CHK_TP_SZ_SPEC) {
- WARN_ONCE(1, "pblk: custom-sized chunks unsupported\n");
- continue;
- }
-
- if (!(chunk->state & NVM_CHK_ST_OFFLINE))
- continue;
-
- set_bit(pos, line->blk_bitmap);
- nr_bad_chks++;
- }
-
- return nr_bad_chks;
-}
-
-static long pblk_setup_line_meta(struct pblk *pblk, struct pblk_line *line,
- void *chunk_meta, int line_id)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line_meta *lm = &pblk->lm;
- long nr_bad_chks, chk_in_line;
-
- line->pblk = pblk;
- line->id = line_id;
- line->type = PBLK_LINETYPE_FREE;
- line->state = PBLK_LINESTATE_NEW;
- line->gc_group = PBLK_LINEGC_NONE;
- line->vsc = &l_mg->vsc_list[line_id];
- spin_lock_init(&line->lock);
-
- nr_bad_chks = pblk_setup_line_meta_chk(pblk, line, chunk_meta);
-
- chk_in_line = lm->blk_per_line - nr_bad_chks;
- if (nr_bad_chks < 0 || nr_bad_chks > lm->blk_per_line ||
- chk_in_line < lm->min_blk_line) {
- line->state = PBLK_LINESTATE_BAD;
- list_add_tail(&line->list, &l_mg->bad_list);
- return 0;
- }
-
- atomic_set(&line->blk_in_line, chk_in_line);
- list_add_tail(&line->list, &l_mg->free_list);
- l_mg->nr_free_lines++;
-
- return chk_in_line;
-}
-
-static int pblk_alloc_line_meta(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_meta *lm = &pblk->lm;
-
- line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
- if (!line->blk_bitmap)
- return -ENOMEM;
-
- line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
- if (!line->erase_bitmap)
- goto free_blk_bitmap;
-
-
- line->chks = kmalloc_array(lm->blk_per_line,
- sizeof(struct nvm_chk_meta), GFP_KERNEL);
- if (!line->chks)
- goto free_erase_bitmap;
-
- line->w_err_gc = kzalloc(sizeof(struct pblk_w_err_gc), GFP_KERNEL);
- if (!line->w_err_gc)
- goto free_chks;
-
- return 0;
-
-free_chks:
- kfree(line->chks);
-free_erase_bitmap:
- kfree(line->erase_bitmap);
-free_blk_bitmap:
- kfree(line->blk_bitmap);
- return -ENOMEM;
-}
-
-static int pblk_line_mg_init(struct pblk *pblk)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line_meta *lm = &pblk->lm;
- int i, bb_distance;
-
- l_mg->nr_lines = geo->num_chk;
- l_mg->log_line = l_mg->data_line = NULL;
- l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
- l_mg->nr_free_lines = 0;
- bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
-
- INIT_LIST_HEAD(&l_mg->free_list);
- INIT_LIST_HEAD(&l_mg->corrupt_list);
- INIT_LIST_HEAD(&l_mg->bad_list);
- INIT_LIST_HEAD(&l_mg->gc_full_list);
- INIT_LIST_HEAD(&l_mg->gc_high_list);
- INIT_LIST_HEAD(&l_mg->gc_mid_list);
- INIT_LIST_HEAD(&l_mg->gc_low_list);
- INIT_LIST_HEAD(&l_mg->gc_empty_list);
- INIT_LIST_HEAD(&l_mg->gc_werr_list);
-
- INIT_LIST_HEAD(&l_mg->emeta_list);
-
- l_mg->gc_lists[0] = &l_mg->gc_werr_list;
- l_mg->gc_lists[1] = &l_mg->gc_high_list;
- l_mg->gc_lists[2] = &l_mg->gc_mid_list;
- l_mg->gc_lists[3] = &l_mg->gc_low_list;
-
- spin_lock_init(&l_mg->free_lock);
- spin_lock_init(&l_mg->close_lock);
- spin_lock_init(&l_mg->gc_lock);
-
- l_mg->vsc_list = kcalloc(l_mg->nr_lines, sizeof(__le32), GFP_KERNEL);
- if (!l_mg->vsc_list)
- goto fail;
-
- l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
- if (!l_mg->bb_template)
- goto fail_free_vsc_list;
-
- l_mg->bb_aux = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
- if (!l_mg->bb_aux)
- goto fail_free_bb_template;
-
- /* smeta is always small enough to fit on a kmalloc memory allocation,
- * emeta depends on the number of LUNs allocated to the pblk instance
- */
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- l_mg->sline_meta[i] = kmalloc(lm->smeta_len, GFP_KERNEL);
- if (!l_mg->sline_meta[i])
- goto fail_free_smeta;
- }
-
- l_mg->bitmap_cache = kmem_cache_create("pblk_lm_bitmap",
- lm->sec_bitmap_len, 0, 0, NULL);
- if (!l_mg->bitmap_cache)
- goto fail_free_smeta;
-
- /* the bitmap pool is used for both valid and map bitmaps */
- l_mg->bitmap_pool = mempool_create_slab_pool(PBLK_DATA_LINES * 2,
- l_mg->bitmap_cache);
- if (!l_mg->bitmap_pool)
- goto fail_destroy_bitmap_cache;
-
- /* emeta allocates three different buffers for managing metadata with
- * in-memory and in-media layouts
- */
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- struct pblk_emeta *emeta;
-
- emeta = kmalloc(sizeof(struct pblk_emeta), GFP_KERNEL);
- if (!emeta)
- goto fail_free_emeta;
-
- emeta->buf = kvmalloc(lm->emeta_len[0], GFP_KERNEL);
- if (!emeta->buf) {
- kfree(emeta);
- goto fail_free_emeta;
- }
-
- emeta->nr_entries = lm->emeta_sec[0];
- l_mg->eline_meta[i] = emeta;
- }
-
- for (i = 0; i < l_mg->nr_lines; i++)
- l_mg->vsc_list[i] = cpu_to_le32(EMPTY_ENTRY);
-
- bb_distance = (geo->all_luns) * geo->ws_opt;
- for (i = 0; i < lm->sec_per_line; i += bb_distance)
- bitmap_set(l_mg->bb_template, i, geo->ws_opt);
-
- return 0;
-
-fail_free_emeta:
- while (--i >= 0) {
- kvfree(l_mg->eline_meta[i]->buf);
- kfree(l_mg->eline_meta[i]);
- }
-
- mempool_destroy(l_mg->bitmap_pool);
-fail_destroy_bitmap_cache:
- kmem_cache_destroy(l_mg->bitmap_cache);
-fail_free_smeta:
- for (i = 0; i < PBLK_DATA_LINES; i++)
- kfree(l_mg->sline_meta[i]);
- kfree(l_mg->bb_aux);
-fail_free_bb_template:
- kfree(l_mg->bb_template);
-fail_free_vsc_list:
- kfree(l_mg->vsc_list);
-fail:
- return -ENOMEM;
-}
-
-static int pblk_line_meta_init(struct pblk *pblk)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- unsigned int smeta_len, emeta_len;
- int i;
-
- lm->sec_per_line = geo->clba * geo->all_luns;
- lm->blk_per_line = geo->all_luns;
- lm->blk_bitmap_len = BITS_TO_LONGS(geo->all_luns) * sizeof(long);
- lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long);
- lm->lun_bitmap_len = BITS_TO_LONGS(geo->all_luns) * sizeof(long);
- lm->mid_thrs = lm->sec_per_line / 2;
- lm->high_thrs = lm->sec_per_line / 4;
- lm->meta_distance = (geo->all_luns / 2) * pblk->min_write_pgs;
-
- /* Calculate necessary pages for smeta. See comment over struct
- * line_smeta definition
- */
- i = 1;
-add_smeta_page:
- lm->smeta_sec = i * geo->ws_opt;
- lm->smeta_len = lm->smeta_sec * geo->csecs;
-
- smeta_len = sizeof(struct line_smeta) + lm->lun_bitmap_len;
- if (smeta_len > lm->smeta_len) {
- i++;
- goto add_smeta_page;
- }
-
- /* Calculate necessary pages for emeta. See comment over struct
- * line_emeta definition
- */
- i = 1;
-add_emeta_page:
- lm->emeta_sec[0] = i * geo->ws_opt;
- lm->emeta_len[0] = lm->emeta_sec[0] * geo->csecs;
-
- emeta_len = calc_emeta_len(pblk);
- if (emeta_len > lm->emeta_len[0]) {
- i++;
- goto add_emeta_page;
- }
-
- lm->emeta_bb = geo->all_luns > i ? geo->all_luns - i : 0;
-
- lm->min_blk_line = 1;
- if (geo->all_luns > 1)
- lm->min_blk_line += DIV_ROUND_UP(lm->smeta_sec +
- lm->emeta_sec[0], geo->clba);
-
- if (lm->min_blk_line > lm->blk_per_line) {
- pblk_err(pblk, "config. not supported. Min. LUN in line:%d\n",
- lm->blk_per_line);
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int pblk_lines_init(struct pblk *pblk)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line *line;
- void *chunk_meta;
- int nr_free_chks = 0;
- int i, ret;
-
- ret = pblk_line_meta_init(pblk);
- if (ret)
- return ret;
-
- ret = pblk_line_mg_init(pblk);
- if (ret)
- return ret;
-
- ret = pblk_luns_init(pblk);
- if (ret)
- goto fail_free_meta;
-
- chunk_meta = pblk_get_chunk_meta(pblk);
- if (IS_ERR(chunk_meta)) {
- ret = PTR_ERR(chunk_meta);
- goto fail_free_luns;
- }
-
- pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line),
- GFP_KERNEL);
- if (!pblk->lines) {
- ret = -ENOMEM;
- goto fail_free_chunk_meta;
- }
-
- for (i = 0; i < l_mg->nr_lines; i++) {
- line = &pblk->lines[i];
-
- ret = pblk_alloc_line_meta(pblk, line);
- if (ret)
- goto fail_free_lines;
-
- nr_free_chks += pblk_setup_line_meta(pblk, line, chunk_meta, i);
-
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
- }
-
- if (!nr_free_chks) {
- pblk_err(pblk, "too many bad blocks prevent for sane instance\n");
- ret = -EINTR;
- goto fail_free_lines;
- }
-
- ret = pblk_set_provision(pblk, nr_free_chks);
- if (ret)
- goto fail_free_lines;
-
- vfree(chunk_meta);
- return 0;
-
-fail_free_lines:
- while (--i >= 0)
- pblk_line_meta_free(l_mg, &pblk->lines[i]);
- kfree(pblk->lines);
-fail_free_chunk_meta:
- vfree(chunk_meta);
-fail_free_luns:
- kfree(pblk->luns);
-fail_free_meta:
- pblk_line_mg_free(pblk);
-
- return ret;
-}
-
-static int pblk_writer_init(struct pblk *pblk)
-{
- pblk->writer_ts = kthread_create(pblk_write_ts, pblk, "pblk-writer-t");
- if (IS_ERR(pblk->writer_ts)) {
- int err = PTR_ERR(pblk->writer_ts);
-
- if (err != -EINTR)
- pblk_err(pblk, "could not allocate writer kthread (%d)\n",
- err);
- return err;
- }
-
- timer_setup(&pblk->wtimer, pblk_write_timer_fn, 0);
- mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(100));
-
- return 0;
-}
-
-static void pblk_writer_stop(struct pblk *pblk)
-{
- /* The pipeline must be stopped and the write buffer emptied before the
- * write thread is stopped
- */
- WARN(pblk_rb_read_count(&pblk->rwb),
- "Stopping not fully persisted write buffer\n");
-
- WARN(pblk_rb_sync_count(&pblk->rwb),
- "Stopping not fully synced write buffer\n");
-
- del_timer_sync(&pblk->wtimer);
- if (pblk->writer_ts)
- kthread_stop(pblk->writer_ts);
-}
-
-static void pblk_free(struct pblk *pblk)
-{
- pblk_lines_free(pblk);
- pblk_l2p_free(pblk);
- pblk_rwb_free(pblk);
- pblk_core_free(pblk);
-
- kfree(pblk);
-}
-
-static void pblk_tear_down(struct pblk *pblk, bool graceful)
-{
- if (graceful)
- __pblk_pipeline_flush(pblk);
- __pblk_pipeline_stop(pblk);
- pblk_writer_stop(pblk);
- pblk_rb_sync_l2p(&pblk->rwb);
- pblk_rl_free(&pblk->rl);
-
- pblk_debug(pblk, "consistent tear down (graceful:%d)\n", graceful);
-}
-
-static void pblk_exit(void *private, bool graceful)
-{
- struct pblk *pblk = private;
-
- pblk_gc_exit(pblk, graceful);
- pblk_tear_down(pblk, graceful);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- pblk_info(pblk, "exit: L2P CRC: %x\n", pblk_l2p_crc(pblk));
-#endif
-
- pblk_free(pblk);
-}
-
-static sector_t pblk_capacity(void *private)
-{
- struct pblk *pblk = private;
-
- return pblk->capacity * NR_PHY_IN_LOG;
-}
-
-static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
- int flags)
-{
- struct nvm_geo *geo = &dev->geo;
- struct request_queue *bqueue = dev->q;
- struct request_queue *tqueue = tdisk->queue;
- struct pblk *pblk;
- int ret;
-
- pblk = kzalloc(sizeof(struct pblk), GFP_KERNEL);
- if (!pblk)
- return ERR_PTR(-ENOMEM);
-
- pblk->dev = dev;
- pblk->disk = tdisk;
- pblk->state = PBLK_STATE_RUNNING;
- trace_pblk_state(pblk_disk_name(pblk), pblk->state);
- pblk->gc.gc_enabled = 0;
-
- if (!(geo->version == NVM_OCSSD_SPEC_12 ||
- geo->version == NVM_OCSSD_SPEC_20)) {
- pblk_err(pblk, "OCSSD version not supported (%u)\n",
- geo->version);
- kfree(pblk);
- return ERR_PTR(-EINVAL);
- }
-
- if (geo->ext) {
- pblk_err(pblk, "extended metadata not supported\n");
- kfree(pblk);
- return ERR_PTR(-EINVAL);
- }
-
- spin_lock_init(&pblk->resubmit_lock);
- spin_lock_init(&pblk->trans_lock);
- spin_lock_init(&pblk->lock);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_set(&pblk->inflight_writes, 0);
- atomic_long_set(&pblk->padded_writes, 0);
- atomic_long_set(&pblk->padded_wb, 0);
- atomic_long_set(&pblk->req_writes, 0);
- atomic_long_set(&pblk->sub_writes, 0);
- atomic_long_set(&pblk->sync_writes, 0);
- atomic_long_set(&pblk->inflight_reads, 0);
- atomic_long_set(&pblk->cache_reads, 0);
- atomic_long_set(&pblk->sync_reads, 0);
- atomic_long_set(&pblk->recov_writes, 0);
- atomic_long_set(&pblk->recov_writes, 0);
- atomic_long_set(&pblk->recov_gc_writes, 0);
- atomic_long_set(&pblk->recov_gc_reads, 0);
-#endif
-
- atomic_long_set(&pblk->read_failed, 0);
- atomic_long_set(&pblk->read_empty, 0);
- atomic_long_set(&pblk->read_high_ecc, 0);
- atomic_long_set(&pblk->read_failed_gc, 0);
- atomic_long_set(&pblk->write_failed, 0);
- atomic_long_set(&pblk->erase_failed, 0);
-
- ret = pblk_core_init(pblk);
- if (ret) {
- pblk_err(pblk, "could not initialize core\n");
- goto fail;
- }
-
- ret = pblk_lines_init(pblk);
- if (ret) {
- pblk_err(pblk, "could not initialize lines\n");
- goto fail_free_core;
- }
-
- ret = pblk_rwb_init(pblk);
- if (ret) {
- pblk_err(pblk, "could not initialize write buffer\n");
- goto fail_free_lines;
- }
-
- ret = pblk_l2p_init(pblk, flags & NVM_TARGET_FACTORY);
- if (ret) {
- pblk_err(pblk, "could not initialize maps\n");
- goto fail_free_rwb;
- }
-
- ret = pblk_writer_init(pblk);
- if (ret) {
- if (ret != -EINTR)
- pblk_err(pblk, "could not initialize write thread\n");
- goto fail_free_l2p;
- }
-
- ret = pblk_gc_init(pblk);
- if (ret) {
- pblk_err(pblk, "could not initialize gc\n");
- goto fail_stop_writer;
- }
-
- /* inherit the size from the underlying device */
- blk_queue_logical_block_size(tqueue, queue_physical_block_size(bqueue));
- blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue));
-
- blk_queue_write_cache(tqueue, true, false);
-
- tqueue->limits.discard_granularity = geo->clba * geo->csecs;
- tqueue->limits.discard_alignment = 0;
- blk_queue_max_discard_sectors(tqueue, UINT_MAX >> 9);
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, tqueue);
-
- pblk_info(pblk, "luns:%u, lines:%d, secs:%llu, buf entries:%u\n",
- geo->all_luns, pblk->l_mg.nr_lines,
- (unsigned long long)pblk->capacity,
- pblk->rwb.nr_entries);
-
- wake_up_process(pblk->writer_ts);
-
- /* Check if we need to start GC */
- pblk_gc_should_kick(pblk);
-
- return pblk;
-
-fail_stop_writer:
- pblk_writer_stop(pblk);
-fail_free_l2p:
- pblk_l2p_free(pblk);
-fail_free_rwb:
- pblk_rwb_free(pblk);
-fail_free_lines:
- pblk_lines_free(pblk);
-fail_free_core:
- pblk_core_free(pblk);
-fail:
- kfree(pblk);
- return ERR_PTR(ret);
-}
-
-/* physical block device target */
-static struct nvm_tgt_type tt_pblk = {
- .name = "pblk",
- .version = {1, 0, 0},
-
- .bops = &pblk_bops,
- .capacity = pblk_capacity,
-
- .init = pblk_init,
- .exit = pblk_exit,
-
- .sysfs_init = pblk_sysfs_init,
- .sysfs_exit = pblk_sysfs_exit,
- .owner = THIS_MODULE,
-};
-
-static int __init pblk_module_init(void)
-{
- int ret;
-
- ret = bioset_init(&pblk_bio_set, BIO_POOL_SIZE, 0, 0);
- if (ret)
- return ret;
- ret = nvm_register_tgt_type(&tt_pblk);
- if (ret)
- bioset_exit(&pblk_bio_set);
- return ret;
-}
-
-static void pblk_module_exit(void)
-{
- bioset_exit(&pblk_bio_set);
- nvm_unregister_tgt_type(&tt_pblk);
-}
-
-module_init(pblk_module_init);
-module_exit(pblk_module_exit);
-MODULE_AUTHOR("Javier Gonzalez <javier@cnexlabs.com>");
-MODULE_AUTHOR("Matias Bjorling <matias@cnexlabs.com>");
-MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("Physical Block-Device for Open-Channel SSDs");
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
deleted file mode 100644
index 5408e32b2f13..000000000000
--- a/drivers/lightnvm/pblk-map.c
+++ /dev/null
@@ -1,210 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2016 CNEX Labs
- * Initial release: Javier Gonzalez <javier@cnexlabs.com>
- * Matias Bjorling <matias@cnexlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * pblk-map.c - pblk's lba-ppa mapping strategy
- *
- */
-
-#include "pblk.h"
-
-static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
- struct ppa_addr *ppa_list,
- unsigned long *lun_bitmap,
- void *meta_list,
- unsigned int valid_secs)
-{
- struct pblk_line *line = pblk_line_get_data(pblk);
- struct pblk_emeta *emeta;
- struct pblk_w_ctx *w_ctx;
- __le64 *lba_list;
- u64 paddr;
- int nr_secs = pblk->min_write_pgs;
- int i;
-
- if (!line)
- return -ENOSPC;
-
- if (pblk_line_is_full(line)) {
- struct pblk_line *prev_line = line;
-
- /* If we cannot allocate a new line, make sure to store metadata
- * on current line and then fail
- */
- line = pblk_line_replace_data(pblk);
- pblk_line_close_meta(pblk, prev_line);
-
- if (!line) {
- pblk_pipeline_stop(pblk);
- return -ENOSPC;
- }
-
- }
-
- emeta = line->emeta;
- lba_list = emeta_to_lbas(pblk, emeta->buf);
-
- paddr = pblk_alloc_page(pblk, line, nr_secs);
-
- for (i = 0; i < nr_secs; i++, paddr++) {
- struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
- __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
-
- /* ppa to be sent to the device */
- ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
-
- /* Write context for target bio completion on write buffer. Note
- * that the write buffer is protected by the sync backpointer,
- * and a single writer thread have access to each specific entry
- * at a time. Thus, it is safe to modify the context for the
- * entry we are setting up for submission without taking any
- * lock or memory barrier.
- */
- if (i < valid_secs) {
- kref_get(&line->ref);
- atomic_inc(&line->sec_to_update);
- w_ctx = pblk_rb_w_ctx(&pblk->rwb, sentry + i);
- w_ctx->ppa = ppa_list[i];
- meta->lba = cpu_to_le64(w_ctx->lba);
- lba_list[paddr] = cpu_to_le64(w_ctx->lba);
- if (lba_list[paddr] != addr_empty)
- line->nr_valid_lbas++;
- else
- atomic64_inc(&pblk->pad_wa);
- } else {
- lba_list[paddr] = addr_empty;
- meta->lba = addr_empty;
- __pblk_map_invalidate(pblk, line, paddr);
- }
- }
-
- pblk_down_rq(pblk, ppa_list[0], lun_bitmap);
- return 0;
-}
-
-int pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
- unsigned long *lun_bitmap, unsigned int valid_secs,
- unsigned int off)
-{
- void *meta_list = pblk_get_meta_for_writes(pblk, rqd);
- void *meta_buffer;
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
- unsigned int map_secs;
- int min = pblk->min_write_pgs;
- int i;
- int ret;
-
- for (i = off; i < rqd->nr_ppas; i += min) {
- map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
- meta_buffer = pblk_get_meta(pblk, meta_list, i);
-
- ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
- lun_bitmap, meta_buffer, map_secs);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-
-/* only if erase_ppa is set, acquire erase semaphore */
-int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
- unsigned int sentry, unsigned long *lun_bitmap,
- unsigned int valid_secs, struct ppa_addr *erase_ppa)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- void *meta_list = pblk_get_meta_for_writes(pblk, rqd);
- void *meta_buffer;
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
- struct pblk_line *e_line, *d_line;
- unsigned int map_secs;
- int min = pblk->min_write_pgs;
- int i, erase_lun;
- int ret;
-
-
- for (i = 0; i < rqd->nr_ppas; i += min) {
- map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
- meta_buffer = pblk_get_meta(pblk, meta_list, i);
-
- ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
- lun_bitmap, meta_buffer, map_secs);
- if (ret)
- return ret;
-
- erase_lun = pblk_ppa_to_pos(geo, ppa_list[i]);
-
- /* line can change after page map. We might also be writing the
- * last line.
- */
- e_line = pblk_line_get_erase(pblk);
- if (!e_line)
- return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
- valid_secs, i + min);
-
- spin_lock(&e_line->lock);
- if (!test_bit(erase_lun, e_line->erase_bitmap)) {
- set_bit(erase_lun, e_line->erase_bitmap);
- atomic_dec(&e_line->left_eblks);
-
- *erase_ppa = ppa_list[i];
- erase_ppa->a.blk = e_line->id;
- erase_ppa->a.reserved = 0;
-
- spin_unlock(&e_line->lock);
-
- /* Avoid evaluating e_line->left_eblks */
- return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
- valid_secs, i + min);
- }
- spin_unlock(&e_line->lock);
- }
-
- d_line = pblk_line_get_data(pblk);
-
- /* line can change after page map. We might also be writing the
- * last line.
- */
- e_line = pblk_line_get_erase(pblk);
- if (!e_line)
- return -ENOSPC;
-
- /* Erase blocks that are bad in this line but might not be in next */
- if (unlikely(pblk_ppa_empty(*erase_ppa)) &&
- bitmap_weight(d_line->blk_bitmap, lm->blk_per_line)) {
- int bit = -1;
-
-retry:
- bit = find_next_bit(d_line->blk_bitmap,
- lm->blk_per_line, bit + 1);
- if (bit >= lm->blk_per_line)
- return 0;
-
- spin_lock(&e_line->lock);
- if (test_bit(bit, e_line->erase_bitmap)) {
- spin_unlock(&e_line->lock);
- goto retry;
- }
- spin_unlock(&e_line->lock);
-
- set_bit(bit, e_line->erase_bitmap);
- atomic_dec(&e_line->left_eblks);
- *erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */
- erase_ppa->a.blk = e_line->id;
- }
-
- return 0;
-}
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
deleted file mode 100644
index 5abb1705b039..000000000000
--- a/drivers/lightnvm/pblk-rb.c
+++ /dev/null
@@ -1,858 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2016 CNEX Labs
- * Initial release: Javier Gonzalez <javier@cnexlabs.com>
- *
- * Based upon the circular ringbuffer.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * pblk-rb.c - pblk's write buffer
- */
-
-#include <linux/circ_buf.h>
-
-#include "pblk.h"
-
-static DECLARE_RWSEM(pblk_rb_lock);
-
-static void pblk_rb_data_free(struct pblk_rb *rb)
-{
- struct pblk_rb_pages *p, *t;
-
- down_write(&pblk_rb_lock);
- list_for_each_entry_safe(p, t, &rb->pages, list) {
- free_pages((unsigned long)page_address(p->pages), p->order);
- list_del(&p->list);
- kfree(p);
- }
- up_write(&pblk_rb_lock);
-}
-
-void pblk_rb_free(struct pblk_rb *rb)
-{
- pblk_rb_data_free(rb);
- vfree(rb->entries);
-}
-
-/*
- * pblk_rb_calculate_size -- calculate the size of the write buffer
- */
-static unsigned int pblk_rb_calculate_size(unsigned int nr_entries,
- unsigned int threshold)
-{
- unsigned int thr_sz = 1 << (get_count_order(threshold + NVM_MAX_VLBA));
- unsigned int max_sz = max(thr_sz, nr_entries);
- unsigned int max_io;
-
- /* Alloc a write buffer that can (i) fit at least two split bios
- * (considering max I/O size NVM_MAX_VLBA, and (ii) guarantee that the
- * threshold will be respected
- */
- max_io = (1 << max((int)(get_count_order(max_sz)),
- (int)(get_count_order(NVM_MAX_VLBA << 1))));
- if ((threshold + NVM_MAX_VLBA) >= max_io)
- max_io <<= 1;
-
- return max_io;
-}
-
-/*
- * Initialize ring buffer. The data and metadata buffers must be previously
- * allocated and their size must be a power of two
- * (Documentation/core-api/circular-buffers.rst)
- */
-int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold,
- unsigned int seg_size)
-{
- struct pblk *pblk = container_of(rb, struct pblk, rwb);
- struct pblk_rb_entry *entries;
- unsigned int init_entry = 0;
- unsigned int max_order = MAX_ORDER - 1;
- unsigned int power_size, power_seg_sz;
- unsigned int alloc_order, order, iter;
- unsigned int nr_entries;
-
- nr_entries = pblk_rb_calculate_size(size, threshold);
- entries = vzalloc(array_size(nr_entries, sizeof(struct pblk_rb_entry)));
- if (!entries)
- return -ENOMEM;
-
- power_size = get_count_order(nr_entries);
- power_seg_sz = get_count_order(seg_size);
-
- down_write(&pblk_rb_lock);
- rb->entries = entries;
- rb->seg_size = (1 << power_seg_sz);
- rb->nr_entries = (1 << power_size);
- rb->mem = rb->subm = rb->sync = rb->l2p_update = 0;
- rb->back_thres = threshold;
- rb->flush_point = EMPTY_ENTRY;
-
- spin_lock_init(&rb->w_lock);
- spin_lock_init(&rb->s_lock);
-
- INIT_LIST_HEAD(&rb->pages);
-
- alloc_order = power_size;
- if (alloc_order >= max_order) {
- order = max_order;
- iter = (1 << (alloc_order - max_order));
- } else {
- order = alloc_order;
- iter = 1;
- }
-
- do {
- struct pblk_rb_entry *entry;
- struct pblk_rb_pages *page_set;
- void *kaddr;
- unsigned long set_size;
- int i;
-
- page_set = kmalloc(sizeof(struct pblk_rb_pages), GFP_KERNEL);
- if (!page_set) {
- up_write(&pblk_rb_lock);
- vfree(entries);
- return -ENOMEM;
- }
-
- page_set->order = order;
- page_set->pages = alloc_pages(GFP_KERNEL, order);
- if (!page_set->pages) {
- kfree(page_set);
- pblk_rb_data_free(rb);
- up_write(&pblk_rb_lock);
- vfree(entries);
- return -ENOMEM;
- }
- kaddr = page_address(page_set->pages);
-
- entry = &rb->entries[init_entry];
- entry->data = kaddr;
- entry->cacheline = pblk_cacheline_to_addr(init_entry++);
- entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
-
- set_size = (1 << order);
- for (i = 1; i < set_size; i++) {
- entry = &rb->entries[init_entry];
- entry->cacheline = pblk_cacheline_to_addr(init_entry++);
- entry->data = kaddr + (i * rb->seg_size);
- entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
- bio_list_init(&entry->w_ctx.bios);
- }
-
- list_add_tail(&page_set->list, &rb->pages);
- iter--;
- } while (iter > 0);
- up_write(&pblk_rb_lock);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_set(&rb->inflight_flush_point, 0);
-#endif
-
- /*
- * Initialize rate-limiter, which controls access to the write buffer
- * by user and GC I/O
- */
- pblk_rl_init(&pblk->rl, rb->nr_entries, threshold);
-
- return 0;
-}
-
-static void clean_wctx(struct pblk_w_ctx *w_ctx)
-{
- int flags;
-
- flags = READ_ONCE(w_ctx->flags);
- WARN_ONCE(!(flags & PBLK_SUBMITTED_ENTRY),
- "pblk: overwriting unsubmitted data\n");
-
- /* Release flags on context. Protect from writes and reads */
- smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
- pblk_ppa_set_empty(&w_ctx->ppa);
- w_ctx->lba = ADDR_EMPTY;
-}
-
-#define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size)
-#define pblk_rb_ring_space(rb, head, tail, size) \
- (CIRC_SPACE(head, tail, size))
-
-/*
- * Buffer space is calculated with respect to the back pointer signaling
- * synchronized entries to the media.
- */
-static unsigned int pblk_rb_space(struct pblk_rb *rb)
-{
- unsigned int mem = READ_ONCE(rb->mem);
- unsigned int sync = READ_ONCE(rb->sync);
-
- return pblk_rb_ring_space(rb, mem, sync, rb->nr_entries);
-}
-
-unsigned int pblk_rb_ptr_wrap(struct pblk_rb *rb, unsigned int p,
- unsigned int nr_entries)
-{
- return (p + nr_entries) & (rb->nr_entries - 1);
-}
-
-/*
- * Buffer count is calculated with respect to the submission entry signaling the
- * entries that are available to send to the media
- */
-unsigned int pblk_rb_read_count(struct pblk_rb *rb)
-{
- unsigned int mem = READ_ONCE(rb->mem);
- unsigned int subm = READ_ONCE(rb->subm);
-
- return pblk_rb_ring_count(mem, subm, rb->nr_entries);
-}
-
-unsigned int pblk_rb_sync_count(struct pblk_rb *rb)
-{
- unsigned int mem = READ_ONCE(rb->mem);
- unsigned int sync = READ_ONCE(rb->sync);
-
- return pblk_rb_ring_count(mem, sync, rb->nr_entries);
-}
-
-unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
-{
- unsigned int subm;
-
- subm = READ_ONCE(rb->subm);
- /* Commit read means updating submission pointer */
- smp_store_release(&rb->subm, pblk_rb_ptr_wrap(rb, subm, nr_entries));
-
- return subm;
-}
-
-static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int to_update)
-{
- struct pblk *pblk = container_of(rb, struct pblk, rwb);
- struct pblk_line *line;
- struct pblk_rb_entry *entry;
- struct pblk_w_ctx *w_ctx;
- unsigned int user_io = 0, gc_io = 0;
- unsigned int i;
- int flags;
-
- for (i = 0; i < to_update; i++) {
- entry = &rb->entries[rb->l2p_update];
- w_ctx = &entry->w_ctx;
-
- flags = READ_ONCE(entry->w_ctx.flags);
- if (flags & PBLK_IOTYPE_USER)
- user_io++;
- else if (flags & PBLK_IOTYPE_GC)
- gc_io++;
- else
- WARN(1, "pblk: unknown IO type\n");
-
- pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
- entry->cacheline);
-
- line = pblk_ppa_to_line(pblk, w_ctx->ppa);
- atomic_dec(&line->sec_to_update);
- kref_put(&line->ref, pblk_line_put);
- clean_wctx(w_ctx);
- rb->l2p_update = pblk_rb_ptr_wrap(rb, rb->l2p_update, 1);
- }
-
- pblk_rl_out(&pblk->rl, user_io, gc_io);
-
- return 0;
-}
-
-/*
- * When we move the l2p_update pointer, we update the l2p table - lookups will
- * point to the physical address instead of to the cacheline in the write buffer
- * from this moment on.
- */
-static int pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int nr_entries,
- unsigned int mem, unsigned int sync)
-{
- unsigned int space, count;
- int ret = 0;
-
- lockdep_assert_held(&rb->w_lock);
-
- /* Update l2p only as buffer entries are being overwritten */
- space = pblk_rb_ring_space(rb, mem, rb->l2p_update, rb->nr_entries);
- if (space > nr_entries)
- goto out;
-
- count = nr_entries - space;
- /* l2p_update used exclusively under rb->w_lock */
- ret = __pblk_rb_update_l2p(rb, count);
-
-out:
- return ret;
-}
-
-/*
- * Update the l2p entry for all sectors stored on the write buffer. This means
- * that all future lookups to the l2p table will point to a device address, not
- * to the cacheline in the write buffer.
- */
-void pblk_rb_sync_l2p(struct pblk_rb *rb)
-{
- unsigned int sync;
- unsigned int to_update;
-
- spin_lock(&rb->w_lock);
-
- /* Protect from reads and writes */
- sync = smp_load_acquire(&rb->sync);
-
- to_update = pblk_rb_ring_count(sync, rb->l2p_update, rb->nr_entries);
- __pblk_rb_update_l2p(rb, to_update);
-
- spin_unlock(&rb->w_lock);
-}
-
-/*
- * Write @nr_entries to ring buffer from @data buffer if there is enough space.
- * Typically, 4KB data chunks coming from a bio will be copied to the ring
- * buffer, thus the write will fail if not all incoming data can be copied.
- *
- */
-static void __pblk_rb_write_entry(struct pblk_rb *rb, void *data,
- struct pblk_w_ctx w_ctx,
- struct pblk_rb_entry *entry)
-{
- memcpy(entry->data, data, rb->seg_size);
-
- entry->w_ctx.lba = w_ctx.lba;
- entry->w_ctx.ppa = w_ctx.ppa;
-}
-
-void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data,
- struct pblk_w_ctx w_ctx, unsigned int ring_pos)
-{
- struct pblk *pblk = container_of(rb, struct pblk, rwb);
- struct pblk_rb_entry *entry;
- int flags;
-
- entry = &rb->entries[ring_pos];
- flags = READ_ONCE(entry->w_ctx.flags);
-#ifdef CONFIG_NVM_PBLK_DEBUG
- /* Caller must guarantee that the entry is free */
- BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
-#endif
-
- __pblk_rb_write_entry(rb, data, w_ctx, entry);
-
- pblk_update_map_cache(pblk, w_ctx.lba, entry->cacheline);
- flags = w_ctx.flags | PBLK_WRITTEN_DATA;
-
- /* Release flags on write context. Protect from writes */
- smp_store_release(&entry->w_ctx.flags, flags);
-}
-
-void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
- struct pblk_w_ctx w_ctx, struct pblk_line *line,
- u64 paddr, unsigned int ring_pos)
-{
- struct pblk *pblk = container_of(rb, struct pblk, rwb);
- struct pblk_rb_entry *entry;
- int flags;
-
- entry = &rb->entries[ring_pos];
- flags = READ_ONCE(entry->w_ctx.flags);
-#ifdef CONFIG_NVM_PBLK_DEBUG
- /* Caller must guarantee that the entry is free */
- BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
-#endif
-
- __pblk_rb_write_entry(rb, data, w_ctx, entry);
-
- if (!pblk_update_map_gc(pblk, w_ctx.lba, entry->cacheline, line, paddr))
- entry->w_ctx.lba = ADDR_EMPTY;
-
- flags = w_ctx.flags | PBLK_WRITTEN_DATA;
-
- /* Release flags on write context. Protect from writes */
- smp_store_release(&entry->w_ctx.flags, flags);
-}
-
-static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio,
- unsigned int pos)
-{
- struct pblk_rb_entry *entry;
- unsigned int sync, flush_point;
-
- pblk_rb_sync_init(rb, NULL);
- sync = READ_ONCE(rb->sync);
-
- if (pos == sync) {
- pblk_rb_sync_end(rb, NULL);
- return 0;
- }
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_inc(&rb->inflight_flush_point);
-#endif
-
- flush_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1);
- entry = &rb->entries[flush_point];
-
- /* Protect flush points */
- smp_store_release(&rb->flush_point, flush_point);
-
- if (bio)
- bio_list_add(&entry->w_ctx.bios, bio);
-
- pblk_rb_sync_end(rb, NULL);
-
- return bio ? 1 : 0;
-}
-
-static int __pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
- unsigned int *pos)
-{
- unsigned int mem;
- unsigned int sync;
- unsigned int threshold;
-
- sync = READ_ONCE(rb->sync);
- mem = READ_ONCE(rb->mem);
-
- threshold = nr_entries + rb->back_thres;
-
- if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < threshold)
- return 0;
-
- if (pblk_rb_update_l2p(rb, nr_entries, mem, sync))
- return 0;
-
- *pos = mem;
-
- return 1;
-}
-
-static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
- unsigned int *pos)
-{
- if (!__pblk_rb_may_write(rb, nr_entries, pos))
- return 0;
-
- /* Protect from read count */
- smp_store_release(&rb->mem, pblk_rb_ptr_wrap(rb, *pos, nr_entries));
- return 1;
-}
-
-void pblk_rb_flush(struct pblk_rb *rb)
-{
- struct pblk *pblk = container_of(rb, struct pblk, rwb);
- unsigned int mem = READ_ONCE(rb->mem);
-
- if (pblk_rb_flush_point_set(rb, NULL, mem))
- return;
-
- pblk_write_kick(pblk);
-}
-
-static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
- unsigned int *pos, struct bio *bio,
- int *io_ret)
-{
- unsigned int mem;
-
- if (!__pblk_rb_may_write(rb, nr_entries, pos))
- return 0;
-
- mem = pblk_rb_ptr_wrap(rb, *pos, nr_entries);
- *io_ret = NVM_IO_DONE;
-
- if (bio->bi_opf & REQ_PREFLUSH) {
- struct pblk *pblk = container_of(rb, struct pblk, rwb);
-
- atomic64_inc(&pblk->nr_flush);
- if (pblk_rb_flush_point_set(&pblk->rwb, bio, mem))
- *io_ret = NVM_IO_OK;
- }
-
- /* Protect from read count */
- smp_store_release(&rb->mem, mem);
-
- return 1;
-}
-
-/*
- * Atomically check that (i) there is space on the write buffer for the
- * incoming I/O, and (ii) the current I/O type has enough budget in the write
- * buffer (rate-limiter).
- */
-int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
- unsigned int nr_entries, unsigned int *pos)
-{
- struct pblk *pblk = container_of(rb, struct pblk, rwb);
- int io_ret;
-
- spin_lock(&rb->w_lock);
- io_ret = pblk_rl_user_may_insert(&pblk->rl, nr_entries);
- if (io_ret) {
- spin_unlock(&rb->w_lock);
- return io_ret;
- }
-
- if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &io_ret)) {
- spin_unlock(&rb->w_lock);
- return NVM_IO_REQUEUE;
- }
-
- pblk_rl_user_in(&pblk->rl, nr_entries);
- spin_unlock(&rb->w_lock);
-
- return io_ret;
-}
-
-/*
- * Look at pblk_rb_may_write_user comment
- */
-int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries,
- unsigned int *pos)
-{
- struct pblk *pblk = container_of(rb, struct pblk, rwb);
-
- spin_lock(&rb->w_lock);
- if (!pblk_rl_gc_may_insert(&pblk->rl, nr_entries)) {
- spin_unlock(&rb->w_lock);
- return 0;
- }
-
- if (!pblk_rb_may_write(rb, nr_entries, pos)) {
- spin_unlock(&rb->w_lock);
- return 0;
- }
-
- pblk_rl_gc_in(&pblk->rl, nr_entries);
- spin_unlock(&rb->w_lock);
-
- return 1;
-}
-
-/*
- * Read available entries on rb and add them to the given bio. To avoid a memory
- * copy, a page reference to the write buffer is used to be added to the bio.
- *
- * This function is used by the write thread to form the write bio that will
- * persist data on the write buffer to the media.
- */
-unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
- unsigned int pos, unsigned int nr_entries,
- unsigned int count)
-{
- struct pblk *pblk = container_of(rb, struct pblk, rwb);
- struct request_queue *q = pblk->dev->q;
- struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
- struct bio *bio = rqd->bio;
- struct pblk_rb_entry *entry;
- struct page *page;
- unsigned int pad = 0, to_read = nr_entries;
- unsigned int i;
- int flags;
-
- if (count < nr_entries) {
- pad = nr_entries - count;
- to_read = count;
- }
-
- /* Add space for packed metadata if in use*/
- pad += (pblk->min_write_pgs - pblk->min_write_pgs_data);
-
- c_ctx->sentry = pos;
- c_ctx->nr_valid = to_read;
- c_ctx->nr_padded = pad;
-
- for (i = 0; i < to_read; i++) {
- entry = &rb->entries[pos];
-
- /* A write has been allowed into the buffer, but data is still
- * being copied to it. It is ok to busy wait.
- */
-try:
- flags = READ_ONCE(entry->w_ctx.flags);
- if (!(flags & PBLK_WRITTEN_DATA)) {
- io_schedule();
- goto try;
- }
-
- page = virt_to_page(entry->data);
- if (!page) {
- pblk_err(pblk, "could not allocate write bio page\n");
- flags &= ~PBLK_WRITTEN_DATA;
- flags |= PBLK_SUBMITTED_ENTRY;
- /* Release flags on context. Protect from writes */
- smp_store_release(&entry->w_ctx.flags, flags);
- return NVM_IO_ERR;
- }
-
- if (bio_add_pc_page(q, bio, page, rb->seg_size, 0) !=
- rb->seg_size) {
- pblk_err(pblk, "could not add page to write bio\n");
- flags &= ~PBLK_WRITTEN_DATA;
- flags |= PBLK_SUBMITTED_ENTRY;
- /* Release flags on context. Protect from writes */
- smp_store_release(&entry->w_ctx.flags, flags);
- return NVM_IO_ERR;
- }
-
- flags &= ~PBLK_WRITTEN_DATA;
- flags |= PBLK_SUBMITTED_ENTRY;
-
- /* Release flags on context. Protect from writes */
- smp_store_release(&entry->w_ctx.flags, flags);
-
- pos = pblk_rb_ptr_wrap(rb, pos, 1);
- }
-
- if (pad) {
- if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, pad)) {
- pblk_err(pblk, "could not pad page in write bio\n");
- return NVM_IO_ERR;
- }
-
- if (pad < pblk->min_write_pgs)
- atomic64_inc(&pblk->pad_dist[pad - 1]);
- else
- pblk_warn(pblk, "padding more than min. sectors\n");
-
- atomic64_add(pad, &pblk->pad_wa);
- }
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_add(pad, &pblk->padded_writes);
-#endif
-
- return NVM_IO_OK;
-}
-
-/*
- * Copy to bio only if the lba matches the one on the given cache entry.
- * Otherwise, it means that the entry has been overwritten, and the bio should
- * be directed to disk.
- */
-int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
- struct ppa_addr ppa)
-{
- struct pblk *pblk = container_of(rb, struct pblk, rwb);
- struct pblk_rb_entry *entry;
- struct pblk_w_ctx *w_ctx;
- struct ppa_addr l2p_ppa;
- u64 pos = pblk_addr_to_cacheline(ppa);
- void *data;
- int flags;
- int ret = 1;
-
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- /* Caller must ensure that the access will not cause an overflow */
- BUG_ON(pos >= rb->nr_entries);
-#endif
- entry = &rb->entries[pos];
- w_ctx = &entry->w_ctx;
- flags = READ_ONCE(w_ctx->flags);
-
- spin_lock(&rb->w_lock);
- spin_lock(&pblk->trans_lock);
- l2p_ppa = pblk_trans_map_get(pblk, lba);
- spin_unlock(&pblk->trans_lock);
-
- /* Check if the entry has been overwritten or is scheduled to be */
- if (!pblk_ppa_comp(l2p_ppa, ppa) || w_ctx->lba != lba ||
- flags & PBLK_WRITABLE_ENTRY) {
- ret = 0;
- goto out;
- }
- data = bio_data(bio);
- memcpy(data, entry->data, rb->seg_size);
-
-out:
- spin_unlock(&rb->w_lock);
- return ret;
-}
-
-struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos)
-{
- unsigned int entry = pblk_rb_ptr_wrap(rb, pos, 0);
-
- return &rb->entries[entry].w_ctx;
-}
-
-unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags)
- __acquires(&rb->s_lock)
-{
- if (flags)
- spin_lock_irqsave(&rb->s_lock, *flags);
- else
- spin_lock_irq(&rb->s_lock);
-
- return rb->sync;
-}
-
-void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags)
- __releases(&rb->s_lock)
-{
- lockdep_assert_held(&rb->s_lock);
-
- if (flags)
- spin_unlock_irqrestore(&rb->s_lock, *flags);
- else
- spin_unlock_irq(&rb->s_lock);
-}
-
-unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries)
-{
- unsigned int sync, flush_point;
- lockdep_assert_held(&rb->s_lock);
-
- sync = READ_ONCE(rb->sync);
- flush_point = READ_ONCE(rb->flush_point);
-
- if (flush_point != EMPTY_ENTRY) {
- unsigned int secs_to_flush;
-
- secs_to_flush = pblk_rb_ring_count(flush_point, sync,
- rb->nr_entries);
- if (secs_to_flush < nr_entries) {
- /* Protect flush points */
- smp_store_release(&rb->flush_point, EMPTY_ENTRY);
- }
- }
-
- sync = pblk_rb_ptr_wrap(rb, sync, nr_entries);
-
- /* Protect from counts */
- smp_store_release(&rb->sync, sync);
-
- return sync;
-}
-
-/* Calculate how many sectors to submit up to the current flush point. */
-unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb)
-{
- unsigned int subm, sync, flush_point;
- unsigned int submitted, to_flush;
-
- /* Protect flush points */
- flush_point = smp_load_acquire(&rb->flush_point);
- if (flush_point == EMPTY_ENTRY)
- return 0;
-
- /* Protect syncs */
- sync = smp_load_acquire(&rb->sync);
-
- subm = READ_ONCE(rb->subm);
- submitted = pblk_rb_ring_count(subm, sync, rb->nr_entries);
-
- /* The sync point itself counts as a sector to sync */
- to_flush = pblk_rb_ring_count(flush_point, sync, rb->nr_entries) + 1;
-
- return (submitted < to_flush) ? (to_flush - submitted) : 0;
-}
-
-int pblk_rb_tear_down_check(struct pblk_rb *rb)
-{
- struct pblk_rb_entry *entry;
- int i;
- int ret = 0;
-
- spin_lock(&rb->w_lock);
- spin_lock_irq(&rb->s_lock);
-
- if ((rb->mem == rb->subm) && (rb->subm == rb->sync) &&
- (rb->sync == rb->l2p_update) &&
- (rb->flush_point == EMPTY_ENTRY)) {
- goto out;
- }
-
- if (!rb->entries) {
- ret = 1;
- goto out;
- }
-
- for (i = 0; i < rb->nr_entries; i++) {
- entry = &rb->entries[i];
-
- if (!entry->data) {
- ret = 1;
- goto out;
- }
- }
-
-out:
- spin_unlock_irq(&rb->s_lock);
- spin_unlock(&rb->w_lock);
-
- return ret;
-}
-
-unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos)
-{
- return (pos & (rb->nr_entries - 1));
-}
-
-int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos)
-{
- return (pos >= rb->nr_entries);
-}
-
-ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf)
-{
- struct pblk *pblk = container_of(rb, struct pblk, rwb);
- struct pblk_c_ctx *c;
- ssize_t offset;
- int queued_entries = 0;
-
- spin_lock_irq(&rb->s_lock);
- list_for_each_entry(c, &pblk->compl_list, list)
- queued_entries++;
- spin_unlock_irq(&rb->s_lock);
-
- if (rb->flush_point != EMPTY_ENTRY)
- offset = scnprintf(buf, PAGE_SIZE,
- "%u\t%u\t%u\t%u\t%u\t%u\t%u - %u/%u/%u - %d\n",
- rb->nr_entries,
- rb->mem,
- rb->subm,
- rb->sync,
- rb->l2p_update,
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_read(&rb->inflight_flush_point),
-#else
- 0,
-#endif
- rb->flush_point,
- pblk_rb_read_count(rb),
- pblk_rb_space(rb),
- pblk_rb_flush_point_count(rb),
- queued_entries);
- else
- offset = scnprintf(buf, PAGE_SIZE,
- "%u\t%u\t%u\t%u\t%u\t%u\tNULL - %u/%u/%u - %d\n",
- rb->nr_entries,
- rb->mem,
- rb->subm,
- rb->sync,
- rb->l2p_update,
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_read(&rb->inflight_flush_point),
-#else
- 0,
-#endif
- pblk_rb_read_count(rb),
- pblk_rb_space(rb),
- pblk_rb_flush_point_count(rb),
- queued_entries);
-
- return offset;
-}
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
deleted file mode 100644
index c28537a489bc..000000000000
--- a/drivers/lightnvm/pblk-read.c
+++ /dev/null
@@ -1,474 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2016 CNEX Labs
- * Initial release: Javier Gonzalez <javier@cnexlabs.com>
- * Matias Bjorling <matias@cnexlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * pblk-read.c - pblk's read path
- */
-
-#include "pblk.h"
-
-/*
- * There is no guarantee that the value read from cache has not been updated and
- * resides at another location in the cache. We guarantee though that if the
- * value is read from the cache, it belongs to the mapped lba. In order to
- * guarantee and order between writes and reads are ordered, a flush must be
- * issued.
- */
-static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
- sector_t lba, struct ppa_addr ppa)
-{
-#ifdef CONFIG_NVM_PBLK_DEBUG
- /* Callers must ensure that the ppa points to a cache address */
- BUG_ON(pblk_ppa_empty(ppa));
- BUG_ON(!pblk_addr_in_cache(ppa));
-#endif
-
- return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa);
-}
-
-static int pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
- struct bio *bio, sector_t blba,
- bool *from_cache)
-{
- void *meta_list = rqd->meta_list;
- int nr_secs, i;
-
-retry:
- nr_secs = pblk_lookup_l2p_seq(pblk, rqd->ppa_list, blba, rqd->nr_ppas,
- from_cache);
-
- if (!*from_cache)
- goto end;
-
- for (i = 0; i < nr_secs; i++) {
- struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
- sector_t lba = blba + i;
-
- if (pblk_ppa_empty(rqd->ppa_list[i])) {
- __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
-
- meta->lba = addr_empty;
- } else if (pblk_addr_in_cache(rqd->ppa_list[i])) {
- /*
- * Try to read from write buffer. The address is later
- * checked on the write buffer to prevent retrieving
- * overwritten data.
- */
- if (!pblk_read_from_cache(pblk, bio, lba,
- rqd->ppa_list[i])) {
- if (i == 0) {
- /*
- * We didn't call with bio_advance()
- * yet, so we can just retry.
- */
- goto retry;
- } else {
- /*
- * We already call bio_advance()
- * so we cannot retry and we need
- * to quit that function in order
- * to allow caller to handle the bio
- * splitting in the current sector
- * position.
- */
- nr_secs = i;
- goto end;
- }
- }
- meta->lba = cpu_to_le64(lba);
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_inc(&pblk->cache_reads);
-#endif
- }
- bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
- }
-
-end:
- if (pblk_io_aligned(pblk, nr_secs))
- rqd->is_seq = 1;
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_add(nr_secs, &pblk->inflight_reads);
-#endif
-
- return nr_secs;
-}
-
-
-static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
- sector_t blba)
-{
- void *meta_list = rqd->meta_list;
- int nr_lbas = rqd->nr_ppas;
- int i;
-
- if (!pblk_is_oob_meta_supported(pblk))
- return;
-
- for (i = 0; i < nr_lbas; i++) {
- struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
- u64 lba = le64_to_cpu(meta->lba);
-
- if (lba == ADDR_EMPTY)
- continue;
-
- if (lba != blba + i) {
-#ifdef CONFIG_NVM_PBLK_DEBUG
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
-
- print_ppa(pblk, &ppa_list[i], "seq", i);
-#endif
- pblk_err(pblk, "corrupted read LBA (%llu/%llu)\n",
- lba, (u64)blba + i);
- WARN_ON(1);
- }
- }
-}
-
-/*
- * There can be holes in the lba list.
- */
-static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
- u64 *lba_list, int nr_lbas)
-{
- void *meta_lba_list = rqd->meta_list;
- int i, j;
-
- if (!pblk_is_oob_meta_supported(pblk))
- return;
-
- for (i = 0, j = 0; i < nr_lbas; i++) {
- struct pblk_sec_meta *meta = pblk_get_meta(pblk,
- meta_lba_list, j);
- u64 lba = lba_list[i];
- u64 meta_lba;
-
- if (lba == ADDR_EMPTY)
- continue;
-
- meta_lba = le64_to_cpu(meta->lba);
-
- if (lba != meta_lba) {
-#ifdef CONFIG_NVM_PBLK_DEBUG
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
-
- print_ppa(pblk, &ppa_list[j], "rnd", j);
-#endif
- pblk_err(pblk, "corrupted read LBA (%llu/%llu)\n",
- meta_lba, lba);
- WARN_ON(1);
- }
-
- j++;
- }
-
- WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n");
-}
-
-static void pblk_end_user_read(struct bio *bio, int error)
-{
- if (error && error != NVM_RSP_WARN_HIGHECC)
- bio_io_error(bio);
- else
- bio_endio(bio);
-}
-
-static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
- bool put_line)
-{
- struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
- struct bio *int_bio = rqd->bio;
- unsigned long start_time = r_ctx->start_time;
-
- bio_end_io_acct(int_bio, start_time);
-
- if (rqd->error)
- pblk_log_read_err(pblk, rqd);
-
- pblk_read_check_seq(pblk, rqd, r_ctx->lba);
- bio_put(int_bio);
-
- if (put_line)
- pblk_rq_to_line_put(pblk, rqd);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_add(rqd->nr_ppas, &pblk->sync_reads);
- atomic_long_sub(rqd->nr_ppas, &pblk->inflight_reads);
-#endif
-
- pblk_free_rqd(pblk, rqd, PBLK_READ);
- atomic_dec(&pblk->inflight_io);
-}
-
-static void pblk_end_io_read(struct nvm_rq *rqd)
-{
- struct pblk *pblk = rqd->private;
- struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
- struct bio *bio = (struct bio *)r_ctx->private;
-
- pblk_end_user_read(bio, rqd->error);
- __pblk_end_io_read(pblk, rqd, true);
-}
-
-static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
- sector_t lba, bool *from_cache)
-{
- struct pblk_sec_meta *meta = pblk_get_meta(pblk, rqd->meta_list, 0);
- struct ppa_addr ppa;
-
- pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_inc(&pblk->inflight_reads);
-#endif
-
-retry:
- if (pblk_ppa_empty(ppa)) {
- __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
-
- meta->lba = addr_empty;
- return;
- }
-
- /* Try to read from write buffer. The address is later checked on the
- * write buffer to prevent retrieving overwritten data.
- */
- if (pblk_addr_in_cache(ppa)) {
- if (!pblk_read_from_cache(pblk, bio, lba, ppa)) {
- pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache);
- goto retry;
- }
-
- meta->lba = cpu_to_le64(lba);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_inc(&pblk->cache_reads);
-#endif
- } else {
- rqd->ppa_addr = ppa;
- }
-}
-
-void pblk_submit_read(struct pblk *pblk, struct bio *bio)
-{
- sector_t blba = pblk_get_lba(bio);
- unsigned int nr_secs = pblk_get_secs(bio);
- bool from_cache;
- struct pblk_g_ctx *r_ctx;
- struct nvm_rq *rqd;
- struct bio *int_bio, *split_bio;
- unsigned long start_time;
-
- start_time = bio_start_io_acct(bio);
-
- rqd = pblk_alloc_rqd(pblk, PBLK_READ);
-
- rqd->opcode = NVM_OP_PREAD;
- rqd->nr_ppas = nr_secs;
- rqd->private = pblk;
- rqd->end_io = pblk_end_io_read;
-
- r_ctx = nvm_rq_to_pdu(rqd);
- r_ctx->start_time = start_time;
- r_ctx->lba = blba;
-
- if (pblk_alloc_rqd_meta(pblk, rqd)) {
- bio_io_error(bio);
- pblk_free_rqd(pblk, rqd, PBLK_READ);
- return;
- }
-
- /* Clone read bio to deal internally with:
- * -read errors when reading from drive
- * -bio_advance() calls during cache reads
- */
- int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set);
-
- if (nr_secs > 1)
- nr_secs = pblk_read_ppalist_rq(pblk, rqd, int_bio, blba,
- &from_cache);
- else
- pblk_read_rq(pblk, rqd, int_bio, blba, &from_cache);
-
-split_retry:
- r_ctx->private = bio; /* original bio */
- rqd->bio = int_bio; /* internal bio */
-
- if (from_cache && nr_secs == rqd->nr_ppas) {
- /* All data was read from cache, we can complete the IO. */
- pblk_end_user_read(bio, 0);
- atomic_inc(&pblk->inflight_io);
- __pblk_end_io_read(pblk, rqd, false);
- } else if (nr_secs != rqd->nr_ppas) {
- /* The read bio request could be partially filled by the write
- * buffer, but there are some holes that need to be read from
- * the drive. In order to handle this, we will use block layer
- * mechanism to split this request in to smaller ones and make
- * a chain of it.
- */
- split_bio = bio_split(bio, nr_secs * NR_PHY_IN_LOG, GFP_KERNEL,
- &pblk_bio_set);
- bio_chain(split_bio, bio);
- submit_bio_noacct(bio);
-
- /* New bio contains first N sectors of the previous one, so
- * we can continue to use existing rqd, but we need to shrink
- * the number of PPAs in it. New bio is also guaranteed that
- * it contains only either data from cache or from drive, newer
- * mix of them.
- */
- bio = split_bio;
- rqd->nr_ppas = nr_secs;
- if (rqd->nr_ppas == 1)
- rqd->ppa_addr = rqd->ppa_list[0];
-
- /* Recreate int_bio - existing might have some needed internal
- * fields modified already.
- */
- bio_put(int_bio);
- int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set);
- goto split_retry;
- } else if (pblk_submit_io(pblk, rqd, NULL)) {
- /* Submitting IO to drive failed, let's report an error */
- rqd->error = -ENODEV;
- pblk_end_io_read(rqd);
- }
-}
-
-static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
- struct pblk_line *line, u64 *lba_list,
- u64 *paddr_list_gc, unsigned int nr_secs)
-{
- struct ppa_addr ppa_list_l2p[NVM_MAX_VLBA];
- struct ppa_addr ppa_gc;
- int valid_secs = 0;
- int i;
-
- pblk_lookup_l2p_rand(pblk, ppa_list_l2p, lba_list, nr_secs);
-
- for (i = 0; i < nr_secs; i++) {
- if (lba_list[i] == ADDR_EMPTY)
- continue;
-
- ppa_gc = addr_to_gen_ppa(pblk, paddr_list_gc[i], line->id);
- if (!pblk_ppa_comp(ppa_list_l2p[i], ppa_gc)) {
- paddr_list_gc[i] = lba_list[i] = ADDR_EMPTY;
- continue;
- }
-
- rqd->ppa_list[valid_secs++] = ppa_list_l2p[i];
- }
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_add(valid_secs, &pblk->inflight_reads);
-#endif
-
- return valid_secs;
-}
-
-static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
- struct pblk_line *line, sector_t lba,
- u64 paddr_gc)
-{
- struct ppa_addr ppa_l2p, ppa_gc;
- int valid_secs = 0;
-
- if (lba == ADDR_EMPTY)
- goto out;
-
- /* logic error: lba out-of-bounds */
- if (lba >= pblk->capacity) {
- WARN(1, "pblk: read lba out of bounds\n");
- goto out;
- }
-
- spin_lock(&pblk->trans_lock);
- ppa_l2p = pblk_trans_map_get(pblk, lba);
- spin_unlock(&pblk->trans_lock);
-
- ppa_gc = addr_to_gen_ppa(pblk, paddr_gc, line->id);
- if (!pblk_ppa_comp(ppa_l2p, ppa_gc))
- goto out;
-
- rqd->ppa_addr = ppa_l2p;
- valid_secs = 1;
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_inc(&pblk->inflight_reads);
-#endif
-
-out:
- return valid_secs;
-}
-
-int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
-{
- struct nvm_rq rqd;
- int ret = NVM_IO_OK;
-
- memset(&rqd, 0, sizeof(struct nvm_rq));
-
- ret = pblk_alloc_rqd_meta(pblk, &rqd);
- if (ret)
- return ret;
-
- if (gc_rq->nr_secs > 1) {
- gc_rq->secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, gc_rq->line,
- gc_rq->lba_list,
- gc_rq->paddr_list,
- gc_rq->nr_secs);
- if (gc_rq->secs_to_gc == 1)
- rqd.ppa_addr = rqd.ppa_list[0];
- } else {
- gc_rq->secs_to_gc = read_rq_gc(pblk, &rqd, gc_rq->line,
- gc_rq->lba_list[0],
- gc_rq->paddr_list[0]);
- }
-
- if (!(gc_rq->secs_to_gc))
- goto out;
-
- rqd.opcode = NVM_OP_PREAD;
- rqd.nr_ppas = gc_rq->secs_to_gc;
-
- if (pblk_submit_io_sync(pblk, &rqd, gc_rq->data)) {
- ret = -EIO;
- goto err_free_dma;
- }
-
- pblk_read_check_rand(pblk, &rqd, gc_rq->lba_list, gc_rq->nr_secs);
-
- atomic_dec(&pblk->inflight_io);
-
- if (rqd.error) {
- atomic_long_inc(&pblk->read_failed_gc);
-#ifdef CONFIG_NVM_PBLK_DEBUG
- pblk_print_failed_rqd(pblk, &rqd, rqd.error);
-#endif
- }
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_add(gc_rq->secs_to_gc, &pblk->sync_reads);
- atomic_long_add(gc_rq->secs_to_gc, &pblk->recov_gc_reads);
- atomic_long_sub(gc_rq->secs_to_gc, &pblk->inflight_reads);
-#endif
-
-out:
- pblk_free_rqd_meta(pblk, &rqd);
- return ret;
-
-err_free_dma:
- pblk_free_rqd_meta(pblk, &rqd);
- return ret;
-}
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
deleted file mode 100644
index 0e6f0c76e930..000000000000
--- a/drivers/lightnvm/pblk-recovery.c
+++ /dev/null
@@ -1,874 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2016 CNEX Labs
- * Initial: Javier Gonzalez <javier@cnexlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * pblk-recovery.c - pblk's recovery path
- *
- * The L2P recovery path is single threaded as the L2P table is updated in order
- * following the line sequence ID.
- */
-
-#include "pblk.h"
-#include "pblk-trace.h"
-
-int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta_buf)
-{
- u32 crc;
-
- crc = pblk_calc_emeta_crc(pblk, emeta_buf);
- if (le32_to_cpu(emeta_buf->crc) != crc)
- return 1;
-
- if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC)
- return 1;
-
- return 0;
-}
-
-static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_emeta *emeta = line->emeta;
- struct line_emeta *emeta_buf = emeta->buf;
- __le64 *lba_list;
- u64 data_start, data_end;
- u64 nr_valid_lbas, nr_lbas = 0;
- u64 i;
-
- lba_list = emeta_to_lbas(pblk, emeta_buf);
- if (!lba_list)
- return 1;
-
- data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
- data_end = line->emeta_ssec;
- nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas);
-
- for (i = data_start; i < data_end; i++) {
- struct ppa_addr ppa;
- int pos;
-
- ppa = addr_to_gen_ppa(pblk, i, line->id);
- pos = pblk_ppa_to_pos(geo, ppa);
-
- /* Do not update bad blocks */
- if (test_bit(pos, line->blk_bitmap))
- continue;
-
- if (le64_to_cpu(lba_list[i]) == ADDR_EMPTY) {
- spin_lock(&line->lock);
- if (test_and_set_bit(i, line->invalid_bitmap))
- WARN_ONCE(1, "pblk: rec. double invalidate:\n");
- else
- le32_add_cpu(line->vsc, -1);
- spin_unlock(&line->lock);
-
- continue;
- }
-
- pblk_update_map(pblk, le64_to_cpu(lba_list[i]), ppa);
- nr_lbas++;
- }
-
- if (nr_valid_lbas != nr_lbas)
- pblk_err(pblk, "line %d - inconsistent lba list(%llu/%llu)\n",
- line->id, nr_valid_lbas, nr_lbas);
-
- line->left_msecs = 0;
-
- return 0;
-}
-
-static void pblk_update_line_wp(struct pblk *pblk, struct pblk_line *line,
- u64 written_secs)
-{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- int i;
-
- for (i = 0; i < written_secs; i += pblk->min_write_pgs)
- __pblk_alloc_page(pblk, line, pblk->min_write_pgs);
-
- spin_lock(&l_mg->free_lock);
- if (written_secs > line->left_msecs) {
- /*
- * We have all data sectors written
- * and some emeta sectors written too.
- */
- line->left_msecs = 0;
- } else {
- /* We have only some data sectors written. */
- line->left_msecs -= written_secs;
- }
- spin_unlock(&l_mg->free_lock);
-}
-
-static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
- u64 written_secs = 0;
- int valid_chunks = 0;
- int i;
-
- for (i = 0; i < lm->blk_per_line; i++) {
- struct nvm_chk_meta *chunk = &line->chks[i];
-
- if (chunk->state & NVM_CHK_ST_OFFLINE)
- continue;
-
- written_secs += chunk->wp;
- valid_chunks++;
- }
-
- if (lm->blk_per_line - nr_bb != valid_chunks)
- pblk_err(pblk, "recovery line %d is bad\n", line->id);
-
- pblk_update_line_wp(pblk, line, written_secs - lm->smeta_sec);
-
- return written_secs;
-}
-
-struct pblk_recov_alloc {
- struct ppa_addr *ppa_list;
- void *meta_list;
- struct nvm_rq *rqd;
- void *data;
- dma_addr_t dma_ppa_list;
- dma_addr_t dma_meta_list;
-};
-
-static void pblk_recov_complete(struct kref *ref)
-{
- struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
-
- complete(&pad_rq->wait);
-}
-
-static void pblk_end_io_recov(struct nvm_rq *rqd)
-{
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
- struct pblk_pad_rq *pad_rq = rqd->private;
- struct pblk *pblk = pad_rq->pblk;
-
- pblk_up_chunk(pblk, ppa_list[0]);
-
- pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
-
- atomic_dec(&pblk->inflight_io);
- kref_put(&pad_rq->ref, pblk_recov_complete);
-}
-
-/* pad line using line bitmap. */
-static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
- int left_ppas)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- void *meta_list;
- struct pblk_pad_rq *pad_rq;
- struct nvm_rq *rqd;
- struct ppa_addr *ppa_list;
- void *data;
- __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
- u64 w_ptr = line->cur_sec;
- int left_line_ppas, rq_ppas;
- int i, j;
- int ret = 0;
-
- spin_lock(&line->lock);
- left_line_ppas = line->left_msecs;
- spin_unlock(&line->lock);
-
- pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
- if (!pad_rq)
- return -ENOMEM;
-
- data = vzalloc(array_size(pblk->max_write_pgs, geo->csecs));
- if (!data) {
- ret = -ENOMEM;
- goto free_rq;
- }
-
- pad_rq->pblk = pblk;
- init_completion(&pad_rq->wait);
- kref_init(&pad_rq->ref);
-
-next_pad_rq:
- rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
- if (rq_ppas < pblk->min_write_pgs) {
- pblk_err(pblk, "corrupted pad line %d\n", line->id);
- goto fail_complete;
- }
-
- rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT);
-
- ret = pblk_alloc_rqd_meta(pblk, rqd);
- if (ret) {
- pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
- goto fail_complete;
- }
-
- rqd->bio = NULL;
- rqd->opcode = NVM_OP_PWRITE;
- rqd->is_seq = 1;
- rqd->nr_ppas = rq_ppas;
- rqd->end_io = pblk_end_io_recov;
- rqd->private = pad_rq;
-
- ppa_list = nvm_rq_to_ppa_list(rqd);
- meta_list = rqd->meta_list;
-
- for (i = 0; i < rqd->nr_ppas; ) {
- struct ppa_addr ppa;
- int pos;
-
- w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
- ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
- pos = pblk_ppa_to_pos(geo, ppa);
-
- while (test_bit(pos, line->blk_bitmap)) {
- w_ptr += pblk->min_write_pgs;
- ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
- pos = pblk_ppa_to_pos(geo, ppa);
- }
-
- for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
- struct ppa_addr dev_ppa;
- struct pblk_sec_meta *meta;
- __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
-
- dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
-
- pblk_map_invalidate(pblk, dev_ppa);
- lba_list[w_ptr] = addr_empty;
- meta = pblk_get_meta(pblk, meta_list, i);
- meta->lba = addr_empty;
- ppa_list[i] = dev_ppa;
- }
- }
-
- kref_get(&pad_rq->ref);
- pblk_down_chunk(pblk, ppa_list[0]);
-
- ret = pblk_submit_io(pblk, rqd, data);
- if (ret) {
- pblk_err(pblk, "I/O submission failed: %d\n", ret);
- pblk_up_chunk(pblk, ppa_list[0]);
- kref_put(&pad_rq->ref, pblk_recov_complete);
- pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
- goto fail_complete;
- }
-
- left_line_ppas -= rq_ppas;
- left_ppas -= rq_ppas;
- if (left_ppas && left_line_ppas)
- goto next_pad_rq;
-
-fail_complete:
- kref_put(&pad_rq->ref, pblk_recov_complete);
- wait_for_completion(&pad_rq->wait);
-
- if (!pblk_line_is_full(line))
- pblk_err(pblk, "corrupted padded line: %d\n", line->id);
-
- vfree(data);
-free_rq:
- kfree(pad_rq);
- return ret;
-}
-
-static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- int distance = geo->mw_cunits * geo->all_luns * geo->ws_opt;
-
- return (distance > line->left_msecs) ? line->left_msecs : distance;
-}
-
-/* Return a chunk belonging to a line by stripe(write order) index */
-static struct nvm_chk_meta *pblk_get_stripe_chunk(struct pblk *pblk,
- struct pblk_line *line,
- int index)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_lun *rlun;
- struct ppa_addr ppa;
- int pos;
-
- rlun = &pblk->luns[index];
- ppa = rlun->bppa;
- pos = pblk_ppa_to_pos(geo, ppa);
-
- return &line->chks[pos];
-}
-
-static int pblk_line_wps_are_unbalanced(struct pblk *pblk,
- struct pblk_line *line)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- int blk_in_line = lm->blk_per_line;
- struct nvm_chk_meta *chunk;
- u64 max_wp, min_wp;
- int i;
-
- i = find_first_zero_bit(line->blk_bitmap, blk_in_line);
-
- /* If there is one or zero good chunks in the line,
- * the write pointers can't be unbalanced.
- */
- if (i >= (blk_in_line - 1))
- return 0;
-
- chunk = pblk_get_stripe_chunk(pblk, line, i);
- max_wp = chunk->wp;
- if (max_wp > pblk->max_write_pgs)
- min_wp = max_wp - pblk->max_write_pgs;
- else
- min_wp = 0;
-
- i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
- while (i < blk_in_line) {
- chunk = pblk_get_stripe_chunk(pblk, line, i);
- if (chunk->wp > max_wp || chunk->wp < min_wp)
- return 1;
-
- i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
- }
-
- return 0;
-}
-
-static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
- struct pblk_recov_alloc p)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct pblk_line_meta *lm = &pblk->lm;
- struct nvm_geo *geo = &dev->geo;
- struct ppa_addr *ppa_list;
- void *meta_list;
- struct nvm_rq *rqd;
- void *data;
- dma_addr_t dma_ppa_list, dma_meta_list;
- __le64 *lba_list;
- u64 paddr = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
- bool padded = false;
- int rq_ppas;
- int i, j;
- int ret;
- u64 left_ppas = pblk_sec_in_open_line(pblk, line) - lm->smeta_sec;
-
- if (pblk_line_wps_are_unbalanced(pblk, line))
- pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id);
-
- ppa_list = p.ppa_list;
- meta_list = p.meta_list;
- rqd = p.rqd;
- data = p.data;
- dma_ppa_list = p.dma_ppa_list;
- dma_meta_list = p.dma_meta_list;
-
- lba_list = emeta_to_lbas(pblk, line->emeta->buf);
-
-next_rq:
- memset(rqd, 0, pblk_g_rq_size);
-
- rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
- if (!rq_ppas)
- rq_ppas = pblk->min_write_pgs;
-
-retry_rq:
- rqd->bio = NULL;
- rqd->opcode = NVM_OP_PREAD;
- rqd->meta_list = meta_list;
- rqd->nr_ppas = rq_ppas;
- rqd->ppa_list = ppa_list;
- rqd->dma_ppa_list = dma_ppa_list;
- rqd->dma_meta_list = dma_meta_list;
- ppa_list = nvm_rq_to_ppa_list(rqd);
-
- if (pblk_io_aligned(pblk, rq_ppas))
- rqd->is_seq = 1;
-
- for (i = 0; i < rqd->nr_ppas; ) {
- struct ppa_addr ppa;
- int pos;
-
- ppa = addr_to_gen_ppa(pblk, paddr, line->id);
- pos = pblk_ppa_to_pos(geo, ppa);
-
- while (test_bit(pos, line->blk_bitmap)) {
- paddr += pblk->min_write_pgs;
- ppa = addr_to_gen_ppa(pblk, paddr, line->id);
- pos = pblk_ppa_to_pos(geo, ppa);
- }
-
- for (j = 0; j < pblk->min_write_pgs; j++, i++)
- ppa_list[i] =
- addr_to_gen_ppa(pblk, paddr + j, line->id);
- }
-
- ret = pblk_submit_io_sync(pblk, rqd, data);
- if (ret) {
- pblk_err(pblk, "I/O submission failed: %d\n", ret);
- return ret;
- }
-
- atomic_dec(&pblk->inflight_io);
-
- /* If a read fails, do a best effort by padding the line and retrying */
- if (rqd->error && rqd->error != NVM_RSP_WARN_HIGHECC) {
- int pad_distance, ret;
-
- if (padded) {
- pblk_log_read_err(pblk, rqd);
- return -EINTR;
- }
-
- pad_distance = pblk_pad_distance(pblk, line);
- ret = pblk_recov_pad_line(pblk, line, pad_distance);
- if (ret) {
- return ret;
- }
-
- padded = true;
- goto retry_rq;
- }
-
- pblk_get_packed_meta(pblk, rqd);
-
- for (i = 0; i < rqd->nr_ppas; i++) {
- struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
- u64 lba = le64_to_cpu(meta->lba);
-
- lba_list[paddr++] = cpu_to_le64(lba);
-
- if (lba == ADDR_EMPTY || lba >= pblk->capacity)
- continue;
-
- line->nr_valid_lbas++;
- pblk_update_map(pblk, lba, ppa_list[i]);
- }
-
- left_ppas -= rq_ppas;
- if (left_ppas > 0)
- goto next_rq;
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- WARN_ON(padded && !pblk_line_is_full(line));
-#endif
-
- return 0;
-}
-
-/* Scan line for lbas on out of bound area */
-static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct nvm_rq *rqd;
- struct ppa_addr *ppa_list;
- void *meta_list;
- struct pblk_recov_alloc p;
- void *data;
- dma_addr_t dma_ppa_list, dma_meta_list;
- int ret = 0;
-
- meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
- if (!meta_list)
- return -ENOMEM;
-
- ppa_list = (void *)(meta_list) + pblk_dma_meta_size(pblk);
- dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk);
-
- data = kcalloc(pblk->max_write_pgs, geo->csecs, GFP_KERNEL);
- if (!data) {
- ret = -ENOMEM;
- goto free_meta_list;
- }
-
- rqd = mempool_alloc(&pblk->r_rq_pool, GFP_KERNEL);
- memset(rqd, 0, pblk_g_rq_size);
-
- p.ppa_list = ppa_list;
- p.meta_list = meta_list;
- p.rqd = rqd;
- p.data = data;
- p.dma_ppa_list = dma_ppa_list;
- p.dma_meta_list = dma_meta_list;
-
- ret = pblk_recov_scan_oob(pblk, line, p);
- if (ret) {
- pblk_err(pblk, "could not recover L2P form OOB\n");
- goto out;
- }
-
- if (pblk_line_is_full(line))
- pblk_line_recov_close(pblk, line);
-
-out:
- mempool_free(rqd, &pblk->r_rq_pool);
- kfree(data);
-free_meta_list:
- nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
-
- return ret;
-}
-
-/* Insert lines ordered by sequence number (seq_num) on list */
-static void pblk_recov_line_add_ordered(struct list_head *head,
- struct pblk_line *line)
-{
- struct pblk_line *t = NULL;
-
- list_for_each_entry(t, head, list)
- if (t->seq_nr > line->seq_nr)
- break;
-
- __list_add(&line->list, t->list.prev, &t->list);
-}
-
-static u64 pblk_line_emeta_start(struct pblk *pblk, struct pblk_line *line)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- unsigned int emeta_secs;
- u64 emeta_start;
- struct ppa_addr ppa;
- int pos;
-
- emeta_secs = lm->emeta_sec[0];
- emeta_start = lm->sec_per_line;
-
- while (emeta_secs) {
- emeta_start--;
- ppa = addr_to_gen_ppa(pblk, emeta_start, line->id);
- pos = pblk_ppa_to_pos(geo, ppa);
- if (!test_bit(pos, line->blk_bitmap))
- emeta_secs--;
- }
-
- return emeta_start;
-}
-
-static int pblk_recov_check_line_version(struct pblk *pblk,
- struct line_emeta *emeta)
-{
- struct line_header *header = &emeta->header;
-
- if (header->version_major != EMETA_VERSION_MAJOR) {
- pblk_err(pblk, "line major version mismatch: %d, expected: %d\n",
- header->version_major, EMETA_VERSION_MAJOR);
- return 1;
- }
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- if (header->version_minor > EMETA_VERSION_MINOR)
- pblk_info(pblk, "newer line minor version found: %d\n",
- header->version_minor);
-#endif
-
- return 0;
-}
-
-static void pblk_recov_wa_counters(struct pblk *pblk,
- struct line_emeta *emeta)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- struct line_header *header = &emeta->header;
- struct wa_counters *wa = emeta_to_wa(lm, emeta);
-
- /* WA counters were introduced in emeta version 0.2 */
- if (header->version_major > 0 || header->version_minor >= 2) {
- u64 user = le64_to_cpu(wa->user);
- u64 pad = le64_to_cpu(wa->pad);
- u64 gc = le64_to_cpu(wa->gc);
-
- atomic64_set(&pblk->user_wa, user);
- atomic64_set(&pblk->pad_wa, pad);
- atomic64_set(&pblk->gc_wa, gc);
-
- pblk->user_rst_wa = user;
- pblk->pad_rst_wa = pad;
- pblk->gc_rst_wa = gc;
- }
-}
-
-static int pblk_line_was_written(struct pblk_line *line,
- struct pblk *pblk)
-{
-
- struct pblk_line_meta *lm = &pblk->lm;
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct nvm_chk_meta *chunk;
- struct ppa_addr bppa;
- int smeta_blk;
-
- if (line->state == PBLK_LINESTATE_BAD)
- return 0;
-
- smeta_blk = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
- if (smeta_blk >= lm->blk_per_line)
- return 0;
-
- bppa = pblk->luns[smeta_blk].bppa;
- chunk = &line->chks[pblk_ppa_to_pos(geo, bppa)];
-
- if (chunk->state & NVM_CHK_ST_CLOSED ||
- (chunk->state & NVM_CHK_ST_OPEN
- && chunk->wp >= lm->smeta_sec))
- return 1;
-
- return 0;
-}
-
-static bool pblk_line_is_open(struct pblk *pblk, struct pblk_line *line)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- int i;
-
- for (i = 0; i < lm->blk_per_line; i++)
- if (line->chks[i].state & NVM_CHK_ST_OPEN)
- return true;
-
- return false;
-}
-
-struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line *line, *tline, *data_line = NULL;
- struct pblk_smeta *smeta;
- struct pblk_emeta *emeta;
- struct line_smeta *smeta_buf;
- int found_lines = 0, recovered_lines = 0, open_lines = 0;
- int is_next = 0;
- int meta_line;
- int i, valid_uuid = 0;
- LIST_HEAD(recov_list);
-
- /* TODO: Implement FTL snapshot */
-
- /* Scan recovery - takes place when FTL snapshot fails */
- spin_lock(&l_mg->free_lock);
- meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
- set_bit(meta_line, &l_mg->meta_bitmap);
- smeta = l_mg->sline_meta[meta_line];
- emeta = l_mg->eline_meta[meta_line];
- smeta_buf = (struct line_smeta *)smeta;
- spin_unlock(&l_mg->free_lock);
-
- /* Order data lines using their sequence number */
- for (i = 0; i < l_mg->nr_lines; i++) {
- u32 crc;
-
- line = &pblk->lines[i];
-
- memset(smeta, 0, lm->smeta_len);
- line->smeta = smeta;
- line->lun_bitmap = ((void *)(smeta_buf)) +
- sizeof(struct line_smeta);
-
- if (!pblk_line_was_written(line, pblk))
- continue;
-
- /* Lines that cannot be read are assumed as not written here */
- if (pblk_line_smeta_read(pblk, line))
- continue;
-
- crc = pblk_calc_smeta_crc(pblk, smeta_buf);
- if (le32_to_cpu(smeta_buf->crc) != crc)
- continue;
-
- if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC)
- continue;
-
- if (smeta_buf->header.version_major != SMETA_VERSION_MAJOR) {
- pblk_err(pblk, "found incompatible line version %u\n",
- smeta_buf->header.version_major);
- return ERR_PTR(-EINVAL);
- }
-
- /* The first valid instance uuid is used for initialization */
- if (!valid_uuid) {
- import_guid(&pblk->instance_uuid, smeta_buf->header.uuid);
- valid_uuid = 1;
- }
-
- if (!guid_equal(&pblk->instance_uuid,
- (guid_t *)&smeta_buf->header.uuid)) {
- pblk_debug(pblk, "ignore line %u due to uuid mismatch\n",
- i);
- continue;
- }
-
- /* Update line metadata */
- spin_lock(&line->lock);
- line->id = le32_to_cpu(smeta_buf->header.id);
- line->type = le16_to_cpu(smeta_buf->header.type);
- line->seq_nr = le64_to_cpu(smeta_buf->seq_nr);
- spin_unlock(&line->lock);
-
- /* Update general metadata */
- spin_lock(&l_mg->free_lock);
- if (line->seq_nr >= l_mg->d_seq_nr)
- l_mg->d_seq_nr = line->seq_nr + 1;
- l_mg->nr_free_lines--;
- spin_unlock(&l_mg->free_lock);
-
- if (pblk_line_recov_alloc(pblk, line))
- goto out;
-
- pblk_recov_line_add_ordered(&recov_list, line);
- found_lines++;
- pblk_debug(pblk, "recovering data line %d, seq:%llu\n",
- line->id, smeta_buf->seq_nr);
- }
-
- if (!found_lines) {
- guid_gen(&pblk->instance_uuid);
-
- spin_lock(&l_mg->free_lock);
- WARN_ON_ONCE(!test_and_clear_bit(meta_line,
- &l_mg->meta_bitmap));
- spin_unlock(&l_mg->free_lock);
-
- goto out;
- }
-
- /* Verify closed blocks and recover this portion of L2P table*/
- list_for_each_entry_safe(line, tline, &recov_list, list) {
- recovered_lines++;
-
- line->emeta_ssec = pblk_line_emeta_start(pblk, line);
- line->emeta = emeta;
- memset(line->emeta->buf, 0, lm->emeta_len[0]);
-
- if (pblk_line_is_open(pblk, line)) {
- pblk_recov_l2p_from_oob(pblk, line);
- goto next;
- }
-
- if (pblk_line_emeta_read(pblk, line, line->emeta->buf)) {
- pblk_recov_l2p_from_oob(pblk, line);
- goto next;
- }
-
- if (pblk_recov_check_emeta(pblk, line->emeta->buf)) {
- pblk_recov_l2p_from_oob(pblk, line);
- goto next;
- }
-
- if (pblk_recov_check_line_version(pblk, line->emeta->buf))
- return ERR_PTR(-EINVAL);
-
- pblk_recov_wa_counters(pblk, line->emeta->buf);
-
- if (pblk_recov_l2p_from_emeta(pblk, line))
- pblk_recov_l2p_from_oob(pblk, line);
-
-next:
- if (pblk_line_is_full(line)) {
- struct list_head *move_list;
-
- spin_lock(&line->lock);
- line->state = PBLK_LINESTATE_CLOSED;
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
- move_list = pblk_line_gc_list(pblk, line);
- spin_unlock(&line->lock);
-
- spin_lock(&l_mg->gc_lock);
- list_move_tail(&line->list, move_list);
- spin_unlock(&l_mg->gc_lock);
-
- mempool_free(line->map_bitmap, l_mg->bitmap_pool);
- line->map_bitmap = NULL;
- line->smeta = NULL;
- line->emeta = NULL;
- } else {
- spin_lock(&line->lock);
- line->state = PBLK_LINESTATE_OPEN;
- spin_unlock(&line->lock);
-
- line->emeta->mem = 0;
- atomic_set(&line->emeta->sync, 0);
-
- trace_pblk_line_state(pblk_disk_name(pblk), line->id,
- line->state);
-
- data_line = line;
- line->meta_line = meta_line;
-
- open_lines++;
- }
- }
-
- if (!open_lines) {
- spin_lock(&l_mg->free_lock);
- WARN_ON_ONCE(!test_and_clear_bit(meta_line,
- &l_mg->meta_bitmap));
- spin_unlock(&l_mg->free_lock);
- } else {
- spin_lock(&l_mg->free_lock);
- l_mg->data_line = data_line;
- /* Allocate next line for preparation */
- l_mg->data_next = pblk_line_get(pblk);
- if (l_mg->data_next) {
- l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
- l_mg->data_next->type = PBLK_LINETYPE_DATA;
- is_next = 1;
- }
- spin_unlock(&l_mg->free_lock);
- }
-
- if (is_next)
- pblk_line_erase(pblk, l_mg->data_next);
-
-out:
- if (found_lines != recovered_lines)
- pblk_err(pblk, "failed to recover all found lines %d/%d\n",
- found_lines, recovered_lines);
-
- return data_line;
-}
-
-/*
- * Pad current line
- */
-int pblk_recov_pad(struct pblk *pblk)
-{
- struct pblk_line *line;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- int left_msecs;
- int ret = 0;
-
- spin_lock(&l_mg->free_lock);
- line = l_mg->data_line;
- left_msecs = line->left_msecs;
- spin_unlock(&l_mg->free_lock);
-
- ret = pblk_recov_pad_line(pblk, line, left_msecs);
- if (ret) {
- pblk_err(pblk, "tear down padding failed (%d)\n", ret);
- return ret;
- }
-
- pblk_line_close_meta(pblk, line);
- return ret;
-}
diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c
deleted file mode 100644
index a5f8bc2defbc..000000000000
--- a/drivers/lightnvm/pblk-rl.c
+++ /dev/null
@@ -1,254 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2016 CNEX Labs
- * Initial release: Javier Gonzalez <javier@cnexlabs.com>
- * Matias Bjorling <matias@cnexlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * pblk-rl.c - pblk's rate limiter for user I/O
- *
- */
-
-#include "pblk.h"
-
-static void pblk_rl_kick_u_timer(struct pblk_rl *rl)
-{
- mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000));
-}
-
-int pblk_rl_is_limit(struct pblk_rl *rl)
-{
- int rb_space;
-
- rb_space = atomic_read(&rl->rb_space);
-
- return (rb_space == 0);
-}
-
-int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries)
-{
- int rb_user_cnt = atomic_read(&rl->rb_user_cnt);
- int rb_space = atomic_read(&rl->rb_space);
-
- if (unlikely(rb_space >= 0) && (rb_space - nr_entries < 0))
- return NVM_IO_ERR;
-
- if (rb_user_cnt >= rl->rb_user_max)
- return NVM_IO_REQUEUE;
-
- return NVM_IO_OK;
-}
-
-void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries)
-{
- int rb_space = atomic_read(&rl->rb_space);
-
- if (unlikely(rb_space >= 0))
- atomic_sub(nr_entries, &rl->rb_space);
-}
-
-int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
-{
- int rb_gc_cnt = atomic_read(&rl->rb_gc_cnt);
- int rb_user_active;
-
- /* If there is no user I/O let GC take over space on the write buffer */
- rb_user_active = READ_ONCE(rl->rb_user_active);
- return (!(rb_gc_cnt >= rl->rb_gc_max && rb_user_active));
-}
-
-void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
-{
- atomic_add(nr_entries, &rl->rb_user_cnt);
-
- /* Release user I/O state. Protect from GC */
- smp_store_release(&rl->rb_user_active, 1);
- pblk_rl_kick_u_timer(rl);
-}
-
-void pblk_rl_werr_line_in(struct pblk_rl *rl)
-{
- atomic_inc(&rl->werr_lines);
-}
-
-void pblk_rl_werr_line_out(struct pblk_rl *rl)
-{
- atomic_dec(&rl->werr_lines);
-}
-
-void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries)
-{
- atomic_add(nr_entries, &rl->rb_gc_cnt);
-}
-
-void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc)
-{
- atomic_sub(nr_user, &rl->rb_user_cnt);
- atomic_sub(nr_gc, &rl->rb_gc_cnt);
-}
-
-unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl)
-{
- return atomic_read(&rl->free_blocks);
-}
-
-unsigned long pblk_rl_nr_user_free_blks(struct pblk_rl *rl)
-{
- return atomic_read(&rl->free_user_blocks);
-}
-
-static void __pblk_rl_update_rates(struct pblk_rl *rl,
- unsigned long free_blocks)
-{
- struct pblk *pblk = container_of(rl, struct pblk, rl);
- int max = rl->rb_budget;
- int werr_gc_needed = atomic_read(&rl->werr_lines);
-
- if (free_blocks >= rl->high) {
- if (werr_gc_needed) {
- /* Allocate a small budget for recovering
- * lines with write errors
- */
- rl->rb_gc_max = 1 << rl->rb_windows_pw;
- rl->rb_user_max = max - rl->rb_gc_max;
- rl->rb_state = PBLK_RL_WERR;
- } else {
- rl->rb_user_max = max;
- rl->rb_gc_max = 0;
- rl->rb_state = PBLK_RL_OFF;
- }
- } else if (free_blocks < rl->high) {
- int shift = rl->high_pw - rl->rb_windows_pw;
- int user_windows = free_blocks >> shift;
- int user_max = user_windows << ilog2(NVM_MAX_VLBA);
-
- rl->rb_user_max = user_max;
- rl->rb_gc_max = max - user_max;
-
- if (free_blocks <= rl->rsv_blocks) {
- rl->rb_user_max = 0;
- rl->rb_gc_max = max;
- }
-
- /* In the worst case, we will need to GC lines in the low list
- * (high valid sector count). If there are lines to GC on high
- * or mid lists, these will be prioritized
- */
- rl->rb_state = PBLK_RL_LOW;
- }
-
- if (rl->rb_state != PBLK_RL_OFF)
- pblk_gc_should_start(pblk);
- else
- pblk_gc_should_stop(pblk);
-}
-
-void pblk_rl_update_rates(struct pblk_rl *rl)
-{
- __pblk_rl_update_rates(rl, pblk_rl_nr_user_free_blks(rl));
-}
-
-void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
-{
- int blk_in_line = atomic_read(&line->blk_in_line);
- int free_blocks;
-
- atomic_add(blk_in_line, &rl->free_blocks);
- free_blocks = atomic_add_return(blk_in_line, &rl->free_user_blocks);
-
- __pblk_rl_update_rates(rl, free_blocks);
-}
-
-void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line,
- bool used)
-{
- int blk_in_line = atomic_read(&line->blk_in_line);
- int free_blocks;
-
- atomic_sub(blk_in_line, &rl->free_blocks);
-
- if (used)
- free_blocks = atomic_sub_return(blk_in_line,
- &rl->free_user_blocks);
- else
- free_blocks = atomic_read(&rl->free_user_blocks);
-
- __pblk_rl_update_rates(rl, free_blocks);
-}
-
-int pblk_rl_high_thrs(struct pblk_rl *rl)
-{
- return rl->high;
-}
-
-int pblk_rl_max_io(struct pblk_rl *rl)
-{
- return rl->rb_max_io;
-}
-
-static void pblk_rl_u_timer(struct timer_list *t)
-{
- struct pblk_rl *rl = from_timer(rl, t, u_timer);
-
- /* Release user I/O state. Protect from GC */
- smp_store_release(&rl->rb_user_active, 0);
-}
-
-void pblk_rl_free(struct pblk_rl *rl)
-{
- del_timer(&rl->u_timer);
-}
-
-void pblk_rl_init(struct pblk_rl *rl, int budget, int threshold)
-{
- struct pblk *pblk = container_of(rl, struct pblk, rl);
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line_meta *lm = &pblk->lm;
- int sec_meta, blk_meta;
- unsigned int rb_windows;
-
- /* Consider sectors used for metadata */
- sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
- blk_meta = DIV_ROUND_UP(sec_meta, geo->clba);
-
- rl->high = pblk->op_blks - blk_meta - lm->blk_per_line;
- rl->high_pw = get_count_order(rl->high);
-
- rl->rsv_blocks = pblk_get_min_chks(pblk);
-
- /* This will always be a power-of-2 */
- rb_windows = budget / NVM_MAX_VLBA;
- rl->rb_windows_pw = get_count_order(rb_windows);
-
- /* To start with, all buffer is available to user I/O writers */
- rl->rb_budget = budget;
- rl->rb_user_max = budget;
- rl->rb_gc_max = 0;
- rl->rb_state = PBLK_RL_HIGH;
-
- /* Maximize I/O size and ansure that back threshold is respected */
- if (threshold)
- rl->rb_max_io = budget - pblk->min_write_pgs_data - threshold;
- else
- rl->rb_max_io = budget - pblk->min_write_pgs_data - 1;
-
- atomic_set(&rl->rb_user_cnt, 0);
- atomic_set(&rl->rb_gc_cnt, 0);
- atomic_set(&rl->rb_space, -1);
- atomic_set(&rl->werr_lines, 0);
-
- timer_setup(&rl->u_timer, pblk_rl_u_timer, 0);
-
- rl->rb_user_active = 0;
- rl->rb_gc_active = 0;
-}
diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c
deleted file mode 100644
index 6387302b03f2..000000000000
--- a/drivers/lightnvm/pblk-sysfs.c
+++ /dev/null
@@ -1,728 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2016 CNEX Labs
- * Initial release: Javier Gonzalez <javier@cnexlabs.com>
- * Matias Bjorling <matias@cnexlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Implementation of a physical block-device target for Open-channel SSDs.
- *
- * pblk-sysfs.c - pblk's sysfs
- *
- */
-
-#include "pblk.h"
-
-static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_lun *rlun;
- ssize_t sz = 0;
- int i;
-
- for (i = 0; i < geo->all_luns; i++) {
- int active = 1;
-
- rlun = &pblk->luns[i];
- if (!down_trylock(&rlun->wr_sem)) {
- active = 0;
- up(&rlun->wr_sem);
- }
- sz += scnprintf(page + sz, PAGE_SIZE - sz,
- "pblk: pos:%d, ch:%d, lun:%d - %d\n",
- i,
- rlun->bppa.a.ch,
- rlun->bppa.a.lun,
- active);
- }
-
- return sz;
-}
-
-static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page)
-{
- int free_blocks, free_user_blocks, total_blocks;
- int rb_user_max, rb_user_cnt;
- int rb_gc_max, rb_gc_cnt, rb_budget, rb_state;
-
- free_blocks = pblk_rl_nr_free_blks(&pblk->rl);
- free_user_blocks = pblk_rl_nr_user_free_blks(&pblk->rl);
- rb_user_max = pblk->rl.rb_user_max;
- rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt);
- rb_gc_max = pblk->rl.rb_gc_max;
- rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt);
- rb_budget = pblk->rl.rb_budget;
- rb_state = pblk->rl.rb_state;
-
- total_blocks = pblk->rl.total_blocks;
-
- return snprintf(page, PAGE_SIZE,
- "u:%u/%u,gc:%u/%u(%u)(stop:<%u,full:>%u,free:%d/%d/%d)-%d\n",
- rb_user_cnt,
- rb_user_max,
- rb_gc_cnt,
- rb_gc_max,
- rb_state,
- rb_budget,
- pblk->rl.high,
- free_blocks,
- free_user_blocks,
- total_blocks,
- READ_ONCE(pblk->rl.rb_user_active));
-}
-
-static ssize_t pblk_sysfs_gc_state_show(struct pblk *pblk, char *page)
-{
- int gc_enabled, gc_active;
-
- pblk_gc_sysfs_state_show(pblk, &gc_enabled, &gc_active);
- return snprintf(page, PAGE_SIZE, "gc_enabled=%d, gc_active=%d\n",
- gc_enabled, gc_active);
-}
-
-static ssize_t pblk_sysfs_stats(struct pblk *pblk, char *page)
-{
- ssize_t sz;
-
- sz = snprintf(page, PAGE_SIZE,
- "read_failed=%lu, read_high_ecc=%lu, read_empty=%lu, read_failed_gc=%lu, write_failed=%lu, erase_failed=%lu\n",
- atomic_long_read(&pblk->read_failed),
- atomic_long_read(&pblk->read_high_ecc),
- atomic_long_read(&pblk->read_empty),
- atomic_long_read(&pblk->read_failed_gc),
- atomic_long_read(&pblk->write_failed),
- atomic_long_read(&pblk->erase_failed));
-
- return sz;
-}
-
-static ssize_t pblk_sysfs_write_buffer(struct pblk *pblk, char *page)
-{
- return pblk_rb_sysfs(&pblk->rwb, page);
-}
-
-static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- ssize_t sz = 0;
-
- if (geo->version == NVM_OCSSD_SPEC_12) {
- struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf;
- struct nvm_addrf_12 *gppaf = (struct nvm_addrf_12 *)&geo->addrf;
-
- sz = scnprintf(page, PAGE_SIZE,
- "g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
- pblk->addrf_len,
- ppaf->blk_offset, ppaf->blk_len,
- ppaf->pg_offset, ppaf->pg_len,
- ppaf->lun_offset, ppaf->lun_len,
- ppaf->ch_offset, ppaf->ch_len,
- ppaf->pln_offset, ppaf->pln_len,
- ppaf->sec_offset, ppaf->sec_len);
-
- sz += scnprintf(page + sz, PAGE_SIZE - sz,
- "d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
- gppaf->blk_offset, gppaf->blk_len,
- gppaf->pg_offset, gppaf->pg_len,
- gppaf->lun_offset, gppaf->lun_len,
- gppaf->ch_offset, gppaf->ch_len,
- gppaf->pln_offset, gppaf->pln_len,
- gppaf->sec_offset, gppaf->sec_len);
- } else {
- struct nvm_addrf *ppaf = &pblk->addrf;
- struct nvm_addrf *gppaf = &geo->addrf;
-
- sz = scnprintf(page, PAGE_SIZE,
- "pblk:(s:%d)ch:%d/%d,lun:%d/%d,chk:%d/%d/sec:%d/%d\n",
- pblk->addrf_len,
- ppaf->ch_offset, ppaf->ch_len,
- ppaf->lun_offset, ppaf->lun_len,
- ppaf->chk_offset, ppaf->chk_len,
- ppaf->sec_offset, ppaf->sec_len);
-
- sz += scnprintf(page + sz, PAGE_SIZE - sz,
- "device:ch:%d/%d,lun:%d/%d,chk:%d/%d,sec:%d/%d\n",
- gppaf->ch_offset, gppaf->ch_len,
- gppaf->lun_offset, gppaf->lun_len,
- gppaf->chk_offset, gppaf->chk_len,
- gppaf->sec_offset, gppaf->sec_len);
- }
-
- return sz;
-}
-
-static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line *line;
- ssize_t sz = 0;
- int nr_free_lines;
- int cur_data, cur_log;
- int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0;
- int d_line_cnt = 0, l_line_cnt = 0;
- int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0;
- int gc_werr = 0;
-
- int bad = 0, cor = 0;
- int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
- int map_weight = 0, meta_weight = 0;
-
- spin_lock(&l_mg->free_lock);
- cur_data = (l_mg->data_line) ? l_mg->data_line->id : -1;
- cur_log = (l_mg->log_line) ? l_mg->log_line->id : -1;
- nr_free_lines = l_mg->nr_free_lines;
-
- list_for_each_entry(line, &l_mg->free_list, list)
- free_line_cnt++;
- spin_unlock(&l_mg->free_lock);
-
- spin_lock(&l_mg->close_lock);
- list_for_each_entry(line, &l_mg->emeta_list, list)
- emeta_line_cnt++;
- spin_unlock(&l_mg->close_lock);
-
- spin_lock(&l_mg->gc_lock);
- list_for_each_entry(line, &l_mg->gc_full_list, list) {
- if (line->type == PBLK_LINETYPE_DATA)
- d_line_cnt++;
- else if (line->type == PBLK_LINETYPE_LOG)
- l_line_cnt++;
- closed_line_cnt++;
- gc_full++;
- }
-
- list_for_each_entry(line, &l_mg->gc_high_list, list) {
- if (line->type == PBLK_LINETYPE_DATA)
- d_line_cnt++;
- else if (line->type == PBLK_LINETYPE_LOG)
- l_line_cnt++;
- closed_line_cnt++;
- gc_high++;
- }
-
- list_for_each_entry(line, &l_mg->gc_mid_list, list) {
- if (line->type == PBLK_LINETYPE_DATA)
- d_line_cnt++;
- else if (line->type == PBLK_LINETYPE_LOG)
- l_line_cnt++;
- closed_line_cnt++;
- gc_mid++;
- }
-
- list_for_each_entry(line, &l_mg->gc_low_list, list) {
- if (line->type == PBLK_LINETYPE_DATA)
- d_line_cnt++;
- else if (line->type == PBLK_LINETYPE_LOG)
- l_line_cnt++;
- closed_line_cnt++;
- gc_low++;
- }
-
- list_for_each_entry(line, &l_mg->gc_empty_list, list) {
- if (line->type == PBLK_LINETYPE_DATA)
- d_line_cnt++;
- else if (line->type == PBLK_LINETYPE_LOG)
- l_line_cnt++;
- closed_line_cnt++;
- gc_empty++;
- }
-
- list_for_each_entry(line, &l_mg->gc_werr_list, list) {
- if (line->type == PBLK_LINETYPE_DATA)
- d_line_cnt++;
- else if (line->type == PBLK_LINETYPE_LOG)
- l_line_cnt++;
- closed_line_cnt++;
- gc_werr++;
- }
-
- list_for_each_entry(line, &l_mg->bad_list, list)
- bad++;
- list_for_each_entry(line, &l_mg->corrupt_list, list)
- cor++;
- spin_unlock(&l_mg->gc_lock);
-
- spin_lock(&l_mg->free_lock);
- if (l_mg->data_line) {
- cur_sec = l_mg->data_line->cur_sec;
- msecs = l_mg->data_line->left_msecs;
- vsc = le32_to_cpu(*l_mg->data_line->vsc);
- sec_in_line = l_mg->data_line->sec_in_line;
- meta_weight = bitmap_weight(&l_mg->meta_bitmap,
- PBLK_DATA_LINES);
-
- spin_lock(&l_mg->data_line->lock);
- if (l_mg->data_line->map_bitmap)
- map_weight = bitmap_weight(l_mg->data_line->map_bitmap,
- lm->sec_per_line);
- else
- map_weight = 0;
- spin_unlock(&l_mg->data_line->lock);
- }
- spin_unlock(&l_mg->free_lock);
-
- if (nr_free_lines != free_line_cnt)
- pblk_err(pblk, "corrupted free line list:%d/%d\n",
- nr_free_lines, free_line_cnt);
-
- sz = scnprintf(page, PAGE_SIZE - sz,
- "line: nluns:%d, nblks:%d, nsecs:%d\n",
- geo->all_luns, lm->blk_per_line, lm->sec_per_line);
-
- sz += scnprintf(page + sz, PAGE_SIZE - sz,
- "lines:d:%d,l:%d-f:%d,m:%d/%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n",
- cur_data, cur_log,
- nr_free_lines,
- emeta_line_cnt, meta_weight,
- closed_line_cnt,
- bad, cor,
- d_line_cnt, l_line_cnt,
- l_mg->nr_lines);
-
- sz += scnprintf(page + sz, PAGE_SIZE - sz,
- "GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, werr: %d, queue:%d\n",
- gc_full, gc_high, gc_mid, gc_low, gc_empty, gc_werr,
- atomic_read(&pblk->gc.read_inflight_gc));
-
- sz += scnprintf(page + sz, PAGE_SIZE - sz,
- "data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
- cur_data, cur_sec, msecs, vsc, sec_in_line,
- map_weight, lm->sec_per_line,
- atomic_read(&pblk->inflight_io));
-
- return sz;
-}
-
-static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_meta *lm = &pblk->lm;
- ssize_t sz = 0;
-
- sz = scnprintf(page, PAGE_SIZE - sz,
- "smeta - len:%d, secs:%d\n",
- lm->smeta_len, lm->smeta_sec);
- sz += scnprintf(page + sz, PAGE_SIZE - sz,
- "emeta - len:%d, sec:%d, bb_start:%d\n",
- lm->emeta_len[0], lm->emeta_sec[0],
- lm->emeta_bb);
- sz += scnprintf(page + sz, PAGE_SIZE - sz,
- "bitmap lengths: sec:%d, blk:%d, lun:%d\n",
- lm->sec_bitmap_len,
- lm->blk_bitmap_len,
- lm->lun_bitmap_len);
- sz += scnprintf(page + sz, PAGE_SIZE - sz,
- "blk_line:%d, sec_line:%d, sec_blk:%d\n",
- lm->blk_per_line,
- lm->sec_per_line,
- geo->clba);
-
- return sz;
-}
-
-static ssize_t pblk_sysfs_get_sec_per_write(struct pblk *pblk, char *page)
-{
- return snprintf(page, PAGE_SIZE, "%d\n", pblk->sec_per_write);
-}
-
-static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad,
- char *page)
-{
- int sz;
-
- sz = scnprintf(page, PAGE_SIZE,
- "user:%lld gc:%lld pad:%lld WA:",
- user, gc, pad);
-
- if (!user) {
- sz += scnprintf(page + sz, PAGE_SIZE - sz, "NaN\n");
- } else {
- u64 wa_int;
- u32 wa_frac;
-
- wa_int = (user + gc + pad) * 100000;
- wa_int = div64_u64(wa_int, user);
- wa_int = div_u64_rem(wa_int, 100000, &wa_frac);
-
- sz += scnprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n",
- wa_int, wa_frac);
- }
-
- return sz;
-}
-
-static ssize_t pblk_sysfs_get_write_amp_mileage(struct pblk *pblk, char *page)
-{
- return pblk_get_write_amp(atomic64_read(&pblk->user_wa),
- atomic64_read(&pblk->gc_wa), atomic64_read(&pblk->pad_wa),
- page);
-}
-
-static ssize_t pblk_sysfs_get_write_amp_trip(struct pblk *pblk, char *page)
-{
- return pblk_get_write_amp(
- atomic64_read(&pblk->user_wa) - pblk->user_rst_wa,
- atomic64_read(&pblk->gc_wa) - pblk->gc_rst_wa,
- atomic64_read(&pblk->pad_wa) - pblk->pad_rst_wa, page);
-}
-
-static long long bucket_percentage(unsigned long long bucket,
- unsigned long long total)
-{
- int p = bucket * 100;
-
- p = div_u64(p, total);
-
- return p;
-}
-
-static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page)
-{
- int sz = 0;
- unsigned long long total;
- unsigned long long total_buckets = 0;
- int buckets = pblk->min_write_pgs - 1;
- int i;
-
- total = atomic64_read(&pblk->nr_flush) - pblk->nr_flush_rst;
- if (!total) {
- for (i = 0; i < (buckets + 1); i++)
- sz += scnprintf(page + sz, PAGE_SIZE - sz,
- "%d:0 ", i);
- sz += scnprintf(page + sz, PAGE_SIZE - sz, "\n");
-
- return sz;
- }
-
- for (i = 0; i < buckets; i++)
- total_buckets += atomic64_read(&pblk->pad_dist[i]);
-
- sz += scnprintf(page + sz, PAGE_SIZE - sz, "0:%lld%% ",
- bucket_percentage(total - total_buckets, total));
-
- for (i = 0; i < buckets; i++) {
- unsigned long long p;
-
- p = bucket_percentage(atomic64_read(&pblk->pad_dist[i]),
- total);
- sz += scnprintf(page + sz, PAGE_SIZE - sz, "%d:%lld%% ",
- i + 1, p);
- }
- sz += scnprintf(page + sz, PAGE_SIZE - sz, "\n");
-
- return sz;
-}
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
-static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
-{
- return snprintf(page, PAGE_SIZE,
- "%lu\t%lu\t%ld\t%llu\t%ld\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n",
- atomic_long_read(&pblk->inflight_writes),
- atomic_long_read(&pblk->inflight_reads),
- atomic_long_read(&pblk->req_writes),
- (u64)atomic64_read(&pblk->nr_flush),
- atomic_long_read(&pblk->padded_writes),
- atomic_long_read(&pblk->padded_wb),
- atomic_long_read(&pblk->sub_writes),
- atomic_long_read(&pblk->sync_writes),
- atomic_long_read(&pblk->recov_writes),
- atomic_long_read(&pblk->recov_gc_writes),
- atomic_long_read(&pblk->recov_gc_reads),
- atomic_long_read(&pblk->cache_reads),
- atomic_long_read(&pblk->sync_reads));
-}
-#endif
-
-static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
- size_t len)
-{
- size_t c_len;
- int force;
-
- c_len = strcspn(page, "\n");
- if (c_len >= len)
- return -EINVAL;
-
- if (kstrtouint(page, 0, &force))
- return -EINVAL;
-
- pblk_gc_sysfs_force(pblk, force);
-
- return len;
-}
-
-static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk,
- const char *page, size_t len)
-{
- size_t c_len;
- int sec_per_write;
-
- c_len = strcspn(page, "\n");
- if (c_len >= len)
- return -EINVAL;
-
- if (kstrtouint(page, 0, &sec_per_write))
- return -EINVAL;
-
- if (!pblk_is_oob_meta_supported(pblk)) {
- /* For packed metadata case it is
- * not allowed to change sec_per_write.
- */
- return -EINVAL;
- }
-
- if (sec_per_write < pblk->min_write_pgs
- || sec_per_write > pblk->max_write_pgs
- || sec_per_write % pblk->min_write_pgs != 0)
- return -EINVAL;
-
- pblk_set_sec_per_write(pblk, sec_per_write);
-
- return len;
-}
-
-static ssize_t pblk_sysfs_set_write_amp_trip(struct pblk *pblk,
- const char *page, size_t len)
-{
- size_t c_len;
- int reset_value;
-
- c_len = strcspn(page, "\n");
- if (c_len >= len)
- return -EINVAL;
-
- if (kstrtouint(page, 0, &reset_value))
- return -EINVAL;
-
- if (reset_value != 0)
- return -EINVAL;
-
- pblk->user_rst_wa = atomic64_read(&pblk->user_wa);
- pblk->pad_rst_wa = atomic64_read(&pblk->pad_wa);
- pblk->gc_rst_wa = atomic64_read(&pblk->gc_wa);
-
- return len;
-}
-
-
-static ssize_t pblk_sysfs_set_padding_dist(struct pblk *pblk,
- const char *page, size_t len)
-{
- size_t c_len;
- int reset_value;
- int buckets = pblk->min_write_pgs - 1;
- int i;
-
- c_len = strcspn(page, "\n");
- if (c_len >= len)
- return -EINVAL;
-
- if (kstrtouint(page, 0, &reset_value))
- return -EINVAL;
-
- if (reset_value != 0)
- return -EINVAL;
-
- for (i = 0; i < buckets; i++)
- atomic64_set(&pblk->pad_dist[i], 0);
-
- pblk->nr_flush_rst = atomic64_read(&pblk->nr_flush);
-
- return len;
-}
-
-static struct attribute sys_write_luns = {
- .name = "write_luns",
- .mode = 0444,
-};
-
-static struct attribute sys_rate_limiter_attr = {
- .name = "rate_limiter",
- .mode = 0444,
-};
-
-static struct attribute sys_gc_state = {
- .name = "gc_state",
- .mode = 0444,
-};
-
-static struct attribute sys_errors_attr = {
- .name = "errors",
- .mode = 0444,
-};
-
-static struct attribute sys_rb_attr = {
- .name = "write_buffer",
- .mode = 0444,
-};
-
-static struct attribute sys_stats_ppaf_attr = {
- .name = "ppa_format",
- .mode = 0444,
-};
-
-static struct attribute sys_lines_attr = {
- .name = "lines",
- .mode = 0444,
-};
-
-static struct attribute sys_lines_info_attr = {
- .name = "lines_info",
- .mode = 0444,
-};
-
-static struct attribute sys_gc_force = {
- .name = "gc_force",
- .mode = 0200,
-};
-
-static struct attribute sys_max_sec_per_write = {
- .name = "max_sec_per_write",
- .mode = 0644,
-};
-
-static struct attribute sys_write_amp_mileage = {
- .name = "write_amp_mileage",
- .mode = 0444,
-};
-
-static struct attribute sys_write_amp_trip = {
- .name = "write_amp_trip",
- .mode = 0644,
-};
-
-static struct attribute sys_padding_dist = {
- .name = "padding_dist",
- .mode = 0644,
-};
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
-static struct attribute sys_stats_debug_attr = {
- .name = "stats",
- .mode = 0444,
-};
-#endif
-
-static struct attribute *pblk_attrs[] = {
- &sys_write_luns,
- &sys_rate_limiter_attr,
- &sys_errors_attr,
- &sys_gc_state,
- &sys_gc_force,
- &sys_max_sec_per_write,
- &sys_rb_attr,
- &sys_stats_ppaf_attr,
- &sys_lines_attr,
- &sys_lines_info_attr,
- &sys_write_amp_mileage,
- &sys_write_amp_trip,
- &sys_padding_dist,
-#ifdef CONFIG_NVM_PBLK_DEBUG
- &sys_stats_debug_attr,
-#endif
- NULL,
-};
-
-static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct pblk *pblk = container_of(kobj, struct pblk, kobj);
-
- if (strcmp(attr->name, "rate_limiter") == 0)
- return pblk_sysfs_rate_limiter(pblk, buf);
- else if (strcmp(attr->name, "write_luns") == 0)
- return pblk_sysfs_luns_show(pblk, buf);
- else if (strcmp(attr->name, "gc_state") == 0)
- return pblk_sysfs_gc_state_show(pblk, buf);
- else if (strcmp(attr->name, "errors") == 0)
- return pblk_sysfs_stats(pblk, buf);
- else if (strcmp(attr->name, "write_buffer") == 0)
- return pblk_sysfs_write_buffer(pblk, buf);
- else if (strcmp(attr->name, "ppa_format") == 0)
- return pblk_sysfs_ppaf(pblk, buf);
- else if (strcmp(attr->name, "lines") == 0)
- return pblk_sysfs_lines(pblk, buf);
- else if (strcmp(attr->name, "lines_info") == 0)
- return pblk_sysfs_lines_info(pblk, buf);
- else if (strcmp(attr->name, "max_sec_per_write") == 0)
- return pblk_sysfs_get_sec_per_write(pblk, buf);
- else if (strcmp(attr->name, "write_amp_mileage") == 0)
- return pblk_sysfs_get_write_amp_mileage(pblk, buf);
- else if (strcmp(attr->name, "write_amp_trip") == 0)
- return pblk_sysfs_get_write_amp_trip(pblk, buf);
- else if (strcmp(attr->name, "padding_dist") == 0)
- return pblk_sysfs_get_padding_dist(pblk, buf);
-#ifdef CONFIG_NVM_PBLK_DEBUG
- else if (strcmp(attr->name, "stats") == 0)
- return pblk_sysfs_stats_debug(pblk, buf);
-#endif
- return 0;
-}
-
-static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr,
- const char *buf, size_t len)
-{
- struct pblk *pblk = container_of(kobj, struct pblk, kobj);
-
- if (strcmp(attr->name, "gc_force") == 0)
- return pblk_sysfs_gc_force(pblk, buf, len);
- else if (strcmp(attr->name, "max_sec_per_write") == 0)
- return pblk_sysfs_set_sec_per_write(pblk, buf, len);
- else if (strcmp(attr->name, "write_amp_trip") == 0)
- return pblk_sysfs_set_write_amp_trip(pblk, buf, len);
- else if (strcmp(attr->name, "padding_dist") == 0)
- return pblk_sysfs_set_padding_dist(pblk, buf, len);
- return 0;
-}
-
-static const struct sysfs_ops pblk_sysfs_ops = {
- .show = pblk_sysfs_show,
- .store = pblk_sysfs_store,
-};
-
-static struct kobj_type pblk_ktype = {
- .sysfs_ops = &pblk_sysfs_ops,
- .default_attrs = pblk_attrs,
-};
-
-int pblk_sysfs_init(struct gendisk *tdisk)
-{
- struct pblk *pblk = tdisk->private_data;
- struct device *parent_dev = disk_to_dev(pblk->disk);
- int ret;
-
- ret = kobject_init_and_add(&pblk->kobj, &pblk_ktype,
- kobject_get(&parent_dev->kobj),
- "%s", "pblk");
- if (ret) {
- pblk_err(pblk, "could not register\n");
- return ret;
- }
-
- kobject_uevent(&pblk->kobj, KOBJ_ADD);
- return 0;
-}
-
-void pblk_sysfs_exit(struct gendisk *tdisk)
-{
- struct pblk *pblk = tdisk->private_data;
-
- kobject_uevent(&pblk->kobj, KOBJ_REMOVE);
- kobject_del(&pblk->kobj);
- kobject_put(&pblk->kobj);
-}
diff --git a/drivers/lightnvm/pblk-trace.h b/drivers/lightnvm/pblk-trace.h
deleted file mode 100644
index 47b67c6bff7a..000000000000
--- a/drivers/lightnvm/pblk-trace.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM pblk
-
-#if !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_PBLK_H
-
-#include <linux/tracepoint.h>
-
-struct ppa_addr;
-
-#define show_chunk_flags(state) __print_flags(state, "", \
- { NVM_CHK_ST_FREE, "FREE", }, \
- { NVM_CHK_ST_CLOSED, "CLOSED", }, \
- { NVM_CHK_ST_OPEN, "OPEN", }, \
- { NVM_CHK_ST_OFFLINE, "OFFLINE", })
-
-#define show_line_state(state) __print_symbolic(state, \
- { PBLK_LINESTATE_NEW, "NEW", }, \
- { PBLK_LINESTATE_FREE, "FREE", }, \
- { PBLK_LINESTATE_OPEN, "OPEN", }, \
- { PBLK_LINESTATE_CLOSED, "CLOSED", }, \
- { PBLK_LINESTATE_GC, "GC", }, \
- { PBLK_LINESTATE_BAD, "BAD", }, \
- { PBLK_LINESTATE_CORRUPT, "CORRUPT" })
-
-
-#define show_pblk_state(state) __print_symbolic(state, \
- { PBLK_STATE_RUNNING, "RUNNING", }, \
- { PBLK_STATE_STOPPING, "STOPPING", }, \
- { PBLK_STATE_RECOVERING, "RECOVERING", }, \
- { PBLK_STATE_STOPPED, "STOPPED" })
-
-#define show_chunk_erase_state(state) __print_symbolic(state, \
- { PBLK_CHUNK_RESET_START, "START", }, \
- { PBLK_CHUNK_RESET_DONE, "OK", }, \
- { PBLK_CHUNK_RESET_FAILED, "FAILED" })
-
-
-TRACE_EVENT(pblk_chunk_reset,
-
- TP_PROTO(const char *name, struct ppa_addr *ppa, int state),
-
- TP_ARGS(name, ppa, state),
-
- TP_STRUCT__entry(
- __string(name, name)
- __field(u64, ppa)
- __field(int, state)
- ),
-
- TP_fast_assign(
- __assign_str(name, name);
- __entry->ppa = ppa->ppa;
- __entry->state = state;
- ),
-
- TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name),
- (u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp),
- (u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu),
- (u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk),
- show_chunk_erase_state((int)__entry->state))
-
-);
-
-TRACE_EVENT(pblk_chunk_state,
-
- TP_PROTO(const char *name, struct ppa_addr *ppa, int state),
-
- TP_ARGS(name, ppa, state),
-
- TP_STRUCT__entry(
- __string(name, name)
- __field(u64, ppa)
- __field(int, state)
- ),
-
- TP_fast_assign(
- __assign_str(name, name);
- __entry->ppa = ppa->ppa;
- __entry->state = state;
- ),
-
- TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name),
- (u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp),
- (u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu),
- (u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk),
- show_chunk_flags((int)__entry->state))
-
-);
-
-TRACE_EVENT(pblk_line_state,
-
- TP_PROTO(const char *name, int line, int state),
-
- TP_ARGS(name, line, state),
-
- TP_STRUCT__entry(
- __string(name, name)
- __field(int, line)
- __field(int, state)
- ),
-
- TP_fast_assign(
- __assign_str(name, name);
- __entry->line = line;
- __entry->state = state;
- ),
-
- TP_printk("dev=%s line=%d state=%s", __get_str(name),
- (int)__entry->line,
- show_line_state((int)__entry->state))
-
-);
-
-TRACE_EVENT(pblk_state,
-
- TP_PROTO(const char *name, int state),
-
- TP_ARGS(name, state),
-
- TP_STRUCT__entry(
- __string(name, name)
- __field(int, state)
- ),
-
- TP_fast_assign(
- __assign_str(name, name);
- __entry->state = state;
- ),
-
- TP_printk("dev=%s state=%s", __get_str(name),
- show_pblk_state((int)__entry->state))
-
-);
-
-#endif /* !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ) */
-
-/* This part must be outside protection */
-
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH ../../drivers/lightnvm
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_FILE pblk-trace
-#include <trace/define_trace.h>
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
deleted file mode 100644
index b9a2aeba95ab..000000000000
--- a/drivers/lightnvm/pblk-write.c
+++ /dev/null
@@ -1,665 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2016 CNEX Labs
- * Initial release: Javier Gonzalez <javier@cnexlabs.com>
- * Matias Bjorling <matias@cnexlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * pblk-write.c - pblk's write path from write buffer to media
- */
-
-#include "pblk.h"
-#include "pblk-trace.h"
-
-static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
- struct pblk_c_ctx *c_ctx)
-{
- struct bio *original_bio;
- struct pblk_rb *rwb = &pblk->rwb;
- unsigned long ret;
- int i;
-
- for (i = 0; i < c_ctx->nr_valid; i++) {
- struct pblk_w_ctx *w_ctx;
- int pos = c_ctx->sentry + i;
- int flags;
-
- w_ctx = pblk_rb_w_ctx(rwb, pos);
- flags = READ_ONCE(w_ctx->flags);
-
- if (flags & PBLK_FLUSH_ENTRY) {
- flags &= ~PBLK_FLUSH_ENTRY;
- /* Release flags on context. Protect from writes */
- smp_store_release(&w_ctx->flags, flags);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_dec(&rwb->inflight_flush_point);
-#endif
- }
-
- while ((original_bio = bio_list_pop(&w_ctx->bios)))
- bio_endio(original_bio);
- }
-
- if (c_ctx->nr_padded)
- pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid,
- c_ctx->nr_padded);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_add(rqd->nr_ppas, &pblk->sync_writes);
-#endif
-
- ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid);
-
- bio_put(rqd->bio);
- pblk_free_rqd(pblk, rqd, PBLK_WRITE);
-
- return ret;
-}
-
-static unsigned long pblk_end_queued_w_bio(struct pblk *pblk,
- struct nvm_rq *rqd,
- struct pblk_c_ctx *c_ctx)
-{
- list_del(&c_ctx->list);
- return pblk_end_w_bio(pblk, rqd, c_ctx);
-}
-
-static void pblk_complete_write(struct pblk *pblk, struct nvm_rq *rqd,
- struct pblk_c_ctx *c_ctx)
-{
- struct pblk_c_ctx *c, *r;
- unsigned long flags;
- unsigned long pos;
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_sub(c_ctx->nr_valid, &pblk->inflight_writes);
-#endif
- pblk_up_rq(pblk, c_ctx->lun_bitmap);
-
- pos = pblk_rb_sync_init(&pblk->rwb, &flags);
- if (pos == c_ctx->sentry) {
- pos = pblk_end_w_bio(pblk, rqd, c_ctx);
-
-retry:
- list_for_each_entry_safe(c, r, &pblk->compl_list, list) {
- rqd = nvm_rq_from_c_ctx(c);
- if (c->sentry == pos) {
- pos = pblk_end_queued_w_bio(pblk, rqd, c);
- goto retry;
- }
- }
- } else {
- WARN_ON(nvm_rq_from_c_ctx(c_ctx) != rqd);
- list_add_tail(&c_ctx->list, &pblk->compl_list);
- }
- pblk_rb_sync_end(&pblk->rwb, &flags);
-}
-
-/* Map remaining sectors in chunk, starting from ppa */
-static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa,
- int rqd_ppas)
-{
- struct pblk_line *line;
- struct ppa_addr map_ppa = *ppa;
- __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
- __le64 *lba_list;
- u64 paddr;
- int done = 0;
- int n = 0;
-
- line = pblk_ppa_to_line(pblk, *ppa);
- lba_list = emeta_to_lbas(pblk, line->emeta->buf);
-
- spin_lock(&line->lock);
-
- while (!done) {
- paddr = pblk_dev_ppa_to_line_addr(pblk, map_ppa);
-
- if (!test_and_set_bit(paddr, line->map_bitmap))
- line->left_msecs--;
-
- if (n < rqd_ppas && lba_list[paddr] != addr_empty)
- line->nr_valid_lbas--;
-
- lba_list[paddr] = addr_empty;
-
- if (!test_and_set_bit(paddr, line->invalid_bitmap))
- le32_add_cpu(line->vsc, -1);
-
- done = nvm_next_ppa_in_chk(pblk->dev, &map_ppa);
-
- n++;
- }
-
- line->w_err_gc->has_write_err = 1;
- spin_unlock(&line->lock);
-}
-
-static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry,
- unsigned int nr_entries)
-{
- struct pblk_rb *rb = &pblk->rwb;
- struct pblk_rb_entry *entry;
- struct pblk_line *line;
- struct pblk_w_ctx *w_ctx;
- struct ppa_addr ppa_l2p;
- int flags;
- unsigned int i;
-
- spin_lock(&pblk->trans_lock);
- for (i = 0; i < nr_entries; i++) {
- entry = &rb->entries[pblk_rb_ptr_wrap(rb, sentry, i)];
- w_ctx = &entry->w_ctx;
-
- /* Check if the lba has been overwritten */
- if (w_ctx->lba != ADDR_EMPTY) {
- ppa_l2p = pblk_trans_map_get(pblk, w_ctx->lba);
- if (!pblk_ppa_comp(ppa_l2p, entry->cacheline))
- w_ctx->lba = ADDR_EMPTY;
- }
-
- /* Mark up the entry as submittable again */
- flags = READ_ONCE(w_ctx->flags);
- flags |= PBLK_WRITTEN_DATA;
- /* Release flags on write context. Protect from writes */
- smp_store_release(&w_ctx->flags, flags);
-
- /* Decrease the reference count to the line as we will
- * re-map these entries
- */
- line = pblk_ppa_to_line(pblk, w_ctx->ppa);
- atomic_dec(&line->sec_to_update);
- kref_put(&line->ref, pblk_line_put);
- }
- spin_unlock(&pblk->trans_lock);
-}
-
-static void pblk_queue_resubmit(struct pblk *pblk, struct pblk_c_ctx *c_ctx)
-{
- struct pblk_c_ctx *r_ctx;
-
- r_ctx = kzalloc(sizeof(struct pblk_c_ctx), GFP_KERNEL);
- if (!r_ctx)
- return;
-
- r_ctx->lun_bitmap = NULL;
- r_ctx->sentry = c_ctx->sentry;
- r_ctx->nr_valid = c_ctx->nr_valid;
- r_ctx->nr_padded = c_ctx->nr_padded;
-
- spin_lock(&pblk->resubmit_lock);
- list_add_tail(&r_ctx->list, &pblk->resubmit_list);
- spin_unlock(&pblk->resubmit_lock);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_add(c_ctx->nr_valid, &pblk->recov_writes);
-#endif
-}
-
-static void pblk_submit_rec(struct work_struct *work)
-{
- struct pblk_rec_ctx *recovery =
- container_of(work, struct pblk_rec_ctx, ws_rec);
- struct pblk *pblk = recovery->pblk;
- struct nvm_rq *rqd = recovery->rqd;
- struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
-
- pblk_log_write_err(pblk, rqd);
-
- pblk_map_remaining(pblk, ppa_list, rqd->nr_ppas);
- pblk_queue_resubmit(pblk, c_ctx);
-
- pblk_up_rq(pblk, c_ctx->lun_bitmap);
- if (c_ctx->nr_padded)
- pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid,
- c_ctx->nr_padded);
- bio_put(rqd->bio);
- pblk_free_rqd(pblk, rqd, PBLK_WRITE);
- mempool_free(recovery, &pblk->rec_pool);
-
- atomic_dec(&pblk->inflight_io);
- pblk_write_kick(pblk);
-}
-
-
-static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
-{
- struct pblk_rec_ctx *recovery;
-
- recovery = mempool_alloc(&pblk->rec_pool, GFP_ATOMIC);
- if (!recovery) {
- pblk_err(pblk, "could not allocate recovery work\n");
- return;
- }
-
- recovery->pblk = pblk;
- recovery->rqd = rqd;
-
- INIT_WORK(&recovery->ws_rec, pblk_submit_rec);
- queue_work(pblk->close_wq, &recovery->ws_rec);
-}
-
-static void pblk_end_io_write(struct nvm_rq *rqd)
-{
- struct pblk *pblk = rqd->private;
- struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
-
- if (rqd->error) {
- pblk_end_w_fail(pblk, rqd);
- return;
- } else {
- if (trace_pblk_chunk_state_enabled())
- pblk_check_chunk_state_update(pblk, rqd);
-#ifdef CONFIG_NVM_PBLK_DEBUG
- WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
-#endif
- }
-
- pblk_complete_write(pblk, rqd, c_ctx);
- atomic_dec(&pblk->inflight_io);
-}
-
-static void pblk_end_io_write_meta(struct nvm_rq *rqd)
-{
- struct pblk *pblk = rqd->private;
- struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd);
- struct pblk_line *line = m_ctx->private;
- struct pblk_emeta *emeta = line->emeta;
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
- int sync;
-
- pblk_up_chunk(pblk, ppa_list[0]);
-
- if (rqd->error) {
- pblk_log_write_err(pblk, rqd);
- pblk_err(pblk, "metadata I/O failed. Line %d\n", line->id);
- line->w_err_gc->has_write_err = 1;
- } else {
- if (trace_pblk_chunk_state_enabled())
- pblk_check_chunk_state_update(pblk, rqd);
- }
-
- sync = atomic_add_return(rqd->nr_ppas, &emeta->sync);
- if (sync == emeta->nr_entries)
- pblk_gen_run_ws(pblk, line, NULL, pblk_line_close_ws,
- GFP_ATOMIC, pblk->close_wq);
-
- pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
-
- atomic_dec(&pblk->inflight_io);
-}
-
-static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
- unsigned int nr_secs, nvm_end_io_fn(*end_io))
-{
- /* Setup write request */
- rqd->opcode = NVM_OP_PWRITE;
- rqd->nr_ppas = nr_secs;
- rqd->is_seq = 1;
- rqd->private = pblk;
- rqd->end_io = end_io;
-
- return pblk_alloc_rqd_meta(pblk, rqd);
-}
-
-static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
- struct ppa_addr *erase_ppa)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line *e_line = pblk_line_get_erase(pblk);
- struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
- unsigned int valid = c_ctx->nr_valid;
- unsigned int padded = c_ctx->nr_padded;
- unsigned int nr_secs = valid + padded;
- unsigned long *lun_bitmap;
- int ret;
-
- lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
- if (!lun_bitmap)
- return -ENOMEM;
- c_ctx->lun_bitmap = lun_bitmap;
-
- ret = pblk_alloc_w_rq(pblk, rqd, nr_secs, pblk_end_io_write);
- if (ret) {
- kfree(lun_bitmap);
- return ret;
- }
-
- if (likely(!e_line || !atomic_read(&e_line->left_eblks)))
- ret = pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
- valid, 0);
- else
- ret = pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
- valid, erase_ppa);
-
- return ret;
-}
-
-static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
- unsigned int secs_to_flush)
-{
- int secs_to_sync;
-
- secs_to_sync = pblk_calc_secs(pblk, secs_avail, secs_to_flush, true);
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- if ((!secs_to_sync && secs_to_flush)
- || (secs_to_sync < 0)
- || (secs_to_sync > secs_avail && !secs_to_flush)) {
- pblk_err(pblk, "bad sector calculation (a:%d,s:%d,f:%d)\n",
- secs_avail, secs_to_sync, secs_to_flush);
- }
-#endif
-
- return secs_to_sync;
-}
-
-int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_emeta *emeta = meta_line->emeta;
- struct ppa_addr *ppa_list;
- struct pblk_g_ctx *m_ctx;
- struct nvm_rq *rqd;
- void *data;
- u64 paddr;
- int rq_ppas = pblk->min_write_pgs;
- int id = meta_line->id;
- int rq_len;
- int i, j;
- int ret;
-
- rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT);
-
- m_ctx = nvm_rq_to_pdu(rqd);
- m_ctx->private = meta_line;
-
- rq_len = rq_ppas * geo->csecs;
- data = ((void *)emeta->buf) + emeta->mem;
-
- ret = pblk_alloc_w_rq(pblk, rqd, rq_ppas, pblk_end_io_write_meta);
- if (ret)
- goto fail_free_rqd;
-
- ppa_list = nvm_rq_to_ppa_list(rqd);
- for (i = 0; i < rqd->nr_ppas; ) {
- spin_lock(&meta_line->lock);
- paddr = __pblk_alloc_page(pblk, meta_line, rq_ppas);
- spin_unlock(&meta_line->lock);
- for (j = 0; j < rq_ppas; j++, i++, paddr++)
- ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id);
- }
-
- spin_lock(&l_mg->close_lock);
- emeta->mem += rq_len;
- if (emeta->mem >= lm->emeta_len[0])
- list_del(&meta_line->list);
- spin_unlock(&l_mg->close_lock);
-
- pblk_down_chunk(pblk, ppa_list[0]);
-
- ret = pblk_submit_io(pblk, rqd, data);
- if (ret) {
- pblk_err(pblk, "emeta I/O submission failed: %d\n", ret);
- goto fail_rollback;
- }
-
- return NVM_IO_OK;
-
-fail_rollback:
- pblk_up_chunk(pblk, ppa_list[0]);
- spin_lock(&l_mg->close_lock);
- pblk_dealloc_page(pblk, meta_line, rq_ppas);
- list_add(&meta_line->list, &meta_line->list);
- spin_unlock(&l_mg->close_lock);
-fail_free_rqd:
- pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
- return ret;
-}
-
-static inline bool pblk_valid_meta_ppa(struct pblk *pblk,
- struct pblk_line *meta_line,
- struct nvm_rq *data_rqd)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_c_ctx *data_c_ctx = nvm_rq_to_pdu(data_rqd);
- struct pblk_line *data_line = pblk_line_get_data(pblk);
- struct ppa_addr ppa, ppa_opt;
- u64 paddr;
- int pos_opt;
-
- /* Schedule a metadata I/O that is half the distance from the data I/O
- * with regards to the number of LUNs forming the pblk instance. This
- * balances LUN conflicts across every I/O.
- *
- * When the LUN configuration changes (e.g., due to GC), this distance
- * can align, which would result on metadata and data I/Os colliding. In
- * this case, modify the distance to not be optimal, but move the
- * optimal in the right direction.
- */
- paddr = pblk_lookup_page(pblk, meta_line);
- ppa = addr_to_gen_ppa(pblk, paddr, 0);
- ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0);
- pos_opt = pblk_ppa_to_pos(geo, ppa_opt);
-
- if (test_bit(pos_opt, data_c_ctx->lun_bitmap) ||
- test_bit(pos_opt, data_line->blk_bitmap))
- return true;
-
- if (unlikely(pblk_ppa_comp(ppa_opt, ppa)))
- data_line->meta_distance--;
-
- return false;
-}
-
-static struct pblk_line *pblk_should_submit_meta_io(struct pblk *pblk,
- struct nvm_rq *data_rqd)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line *meta_line;
-
- spin_lock(&l_mg->close_lock);
- if (list_empty(&l_mg->emeta_list)) {
- spin_unlock(&l_mg->close_lock);
- return NULL;
- }
- meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list);
- if (meta_line->emeta->mem >= lm->emeta_len[0]) {
- spin_unlock(&l_mg->close_lock);
- return NULL;
- }
- spin_unlock(&l_mg->close_lock);
-
- if (!pblk_valid_meta_ppa(pblk, meta_line, data_rqd))
- return NULL;
-
- return meta_line;
-}
-
-static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
-{
- struct ppa_addr erase_ppa;
- struct pblk_line *meta_line;
- int err;
-
- pblk_ppa_set_empty(&erase_ppa);
-
- /* Assign lbas to ppas and populate request structure */
- err = pblk_setup_w_rq(pblk, rqd, &erase_ppa);
- if (err) {
- pblk_err(pblk, "could not setup write request: %d\n", err);
- return NVM_IO_ERR;
- }
-
- meta_line = pblk_should_submit_meta_io(pblk, rqd);
-
- /* Submit data write for current data line */
- err = pblk_submit_io(pblk, rqd, NULL);
- if (err) {
- pblk_err(pblk, "data I/O submission failed: %d\n", err);
- return NVM_IO_ERR;
- }
-
- if (!pblk_ppa_empty(erase_ppa)) {
- /* Submit erase for next data line */
- if (pblk_blk_erase_async(pblk, erase_ppa)) {
- struct pblk_line *e_line = pblk_line_get_erase(pblk);
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- int bit;
-
- atomic_inc(&e_line->left_eblks);
- bit = pblk_ppa_to_pos(geo, erase_ppa);
- WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
- }
- }
-
- if (meta_line) {
- /* Submit metadata write for previous data line */
- err = pblk_submit_meta_io(pblk, meta_line);
- if (err) {
- pblk_err(pblk, "metadata I/O submission failed: %d",
- err);
- return NVM_IO_ERR;
- }
- }
-
- return NVM_IO_OK;
-}
-
-static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd)
-{
- struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
- struct bio *bio = rqd->bio;
-
- if (c_ctx->nr_padded)
- pblk_bio_free_pages(pblk, bio, c_ctx->nr_valid,
- c_ctx->nr_padded);
-}
-
-static int pblk_submit_write(struct pblk *pblk, int *secs_left)
-{
- struct bio *bio;
- struct nvm_rq *rqd;
- unsigned int secs_avail, secs_to_sync, secs_to_com;
- unsigned int secs_to_flush, packed_meta_pgs;
- unsigned long pos;
- unsigned int resubmit;
-
- *secs_left = 0;
-
- spin_lock(&pblk->resubmit_lock);
- resubmit = !list_empty(&pblk->resubmit_list);
- spin_unlock(&pblk->resubmit_lock);
-
- /* Resubmit failed writes first */
- if (resubmit) {
- struct pblk_c_ctx *r_ctx;
-
- spin_lock(&pblk->resubmit_lock);
- r_ctx = list_first_entry(&pblk->resubmit_list,
- struct pblk_c_ctx, list);
- list_del(&r_ctx->list);
- spin_unlock(&pblk->resubmit_lock);
-
- secs_avail = r_ctx->nr_valid;
- pos = r_ctx->sentry;
-
- pblk_prepare_resubmit(pblk, pos, secs_avail);
- secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail,
- secs_avail);
-
- kfree(r_ctx);
- } else {
- /* If there are no sectors in the cache,
- * flushes (bios without data) will be cleared on
- * the cache threads
- */
- secs_avail = pblk_rb_read_count(&pblk->rwb);
- if (!secs_avail)
- return 0;
-
- secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb);
- if (!secs_to_flush && secs_avail < pblk->min_write_pgs_data)
- return 0;
-
- secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail,
- secs_to_flush);
- if (secs_to_sync > pblk->max_write_pgs) {
- pblk_err(pblk, "bad buffer sync calculation\n");
- return 0;
- }
-
- secs_to_com = (secs_to_sync > secs_avail) ?
- secs_avail : secs_to_sync;
- pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
- }
-
- packed_meta_pgs = (pblk->min_write_pgs - pblk->min_write_pgs_data);
- bio = bio_alloc(GFP_KERNEL, secs_to_sync + packed_meta_pgs);
-
- bio->bi_iter.bi_sector = 0; /* internal bio */
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
-
- rqd = pblk_alloc_rqd(pblk, PBLK_WRITE);
- rqd->bio = bio;
-
- if (pblk_rb_read_to_bio(&pblk->rwb, rqd, pos, secs_to_sync,
- secs_avail)) {
- pblk_err(pblk, "corrupted write bio\n");
- goto fail_put_bio;
- }
-
- if (pblk_submit_io_set(pblk, rqd))
- goto fail_free_bio;
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_long_add(secs_to_sync, &pblk->sub_writes);
-#endif
-
- *secs_left = 1;
- return 0;
-
-fail_free_bio:
- pblk_free_write_rqd(pblk, rqd);
-fail_put_bio:
- bio_put(bio);
- pblk_free_rqd(pblk, rqd, PBLK_WRITE);
-
- return -EINTR;
-}
-
-int pblk_write_ts(void *data)
-{
- struct pblk *pblk = data;
- int secs_left;
- int write_failure = 0;
-
- while (!kthread_should_stop()) {
- if (!write_failure) {
- write_failure = pblk_submit_write(pblk, &secs_left);
-
- if (secs_left)
- continue;
- }
- set_current_state(TASK_INTERRUPTIBLE);
- io_schedule();
- }
-
- return 0;
-}
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
deleted file mode 100644
index 86ffa875bfe1..000000000000
--- a/drivers/lightnvm/pblk.h
+++ /dev/null
@@ -1,1358 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2015 IT University of Copenhagen (rrpc.h)
- * Copyright (C) 2016 CNEX Labs
- * Initial release: Matias Bjorling <matias@cnexlabs.com>
- * Write buffering: Javier Gonzalez <javier@cnexlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Implementation of a Physical Block-device target for Open-channel SSDs.
- *
- */
-
-#ifndef PBLK_H_
-#define PBLK_H_
-
-#include <linux/blkdev.h>
-#include <linux/blk-mq.h>
-#include <linux/bio.h>
-#include <linux/module.h>
-#include <linux/kthread.h>
-#include <linux/vmalloc.h>
-#include <linux/crc32.h>
-#include <linux/uuid.h>
-
-#include <linux/lightnvm.h>
-
-/* Run only GC if less than 1/X blocks are free */
-#define GC_LIMIT_INVERSE 5
-#define GC_TIME_MSECS 1000
-
-#define PBLK_SECTOR (512)
-#define PBLK_EXPOSED_PAGE_SIZE (4096)
-
-#define PBLK_NR_CLOSE_JOBS (4)
-
-#define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16)
-
-/* Max 512 LUNs per device */
-#define PBLK_MAX_LUNS_BITMAP (4)
-
-#define NR_PHY_IN_LOG (PBLK_EXPOSED_PAGE_SIZE / PBLK_SECTOR)
-
-/* Static pool sizes */
-#define PBLK_GEN_WS_POOL_SIZE (2)
-
-#define PBLK_DEFAULT_OP (11)
-
-enum {
- PBLK_READ = READ,
- PBLK_WRITE = WRITE,/* Write from write buffer */
- PBLK_WRITE_INT, /* Internal write - no write buffer */
- PBLK_READ_RECOV, /* Recovery read - errors allowed */
- PBLK_ERASE,
-};
-
-enum {
- /* IO Types */
- PBLK_IOTYPE_USER = 1 << 0,
- PBLK_IOTYPE_GC = 1 << 1,
-
- /* Write buffer flags */
- PBLK_FLUSH_ENTRY = 1 << 2,
- PBLK_WRITTEN_DATA = 1 << 3,
- PBLK_SUBMITTED_ENTRY = 1 << 4,
- PBLK_WRITABLE_ENTRY = 1 << 5,
-};
-
-enum {
- PBLK_BLK_ST_OPEN = 0x1,
- PBLK_BLK_ST_CLOSED = 0x2,
-};
-
-enum {
- PBLK_CHUNK_RESET_START,
- PBLK_CHUNK_RESET_DONE,
- PBLK_CHUNK_RESET_FAILED,
-};
-
-struct pblk_sec_meta {
- u64 reserved;
- __le64 lba;
-};
-
-/* The number of GC lists and the rate-limiter states go together. This way the
- * rate-limiter can dictate how much GC is needed based on resource utilization.
- */
-#define PBLK_GC_NR_LISTS 4
-
-enum {
- PBLK_RL_OFF = 0,
- PBLK_RL_WERR = 1,
- PBLK_RL_HIGH = 2,
- PBLK_RL_MID = 3,
- PBLK_RL_LOW = 4
-};
-
-#define pblk_dma_ppa_size (sizeof(u64) * NVM_MAX_VLBA)
-
-/* write buffer completion context */
-struct pblk_c_ctx {
- struct list_head list; /* Head for out-of-order completion */
-
- unsigned long *lun_bitmap; /* Luns used on current request */
- unsigned int sentry;
- unsigned int nr_valid;
- unsigned int nr_padded;
-};
-
-/* read context */
-struct pblk_g_ctx {
- void *private;
- unsigned long start_time;
- u64 lba;
-};
-
-/* Pad context */
-struct pblk_pad_rq {
- struct pblk *pblk;
- struct completion wait;
- struct kref ref;
-};
-
-/* Recovery context */
-struct pblk_rec_ctx {
- struct pblk *pblk;
- struct nvm_rq *rqd;
- struct work_struct ws_rec;
-};
-
-/* Write context */
-struct pblk_w_ctx {
- struct bio_list bios; /* Original bios - used for completion
- * in REQ_FUA, REQ_FLUSH case
- */
- u64 lba; /* Logic addr. associated with entry */
- struct ppa_addr ppa; /* Physic addr. associated with entry */
- int flags; /* Write context flags */
-};
-
-struct pblk_rb_entry {
- struct ppa_addr cacheline; /* Cacheline for this entry */
- void *data; /* Pointer to data on this entry */
- struct pblk_w_ctx w_ctx; /* Context for this entry */
- struct list_head index; /* List head to enable indexes */
-};
-
-#define EMPTY_ENTRY (~0U)
-
-struct pblk_rb_pages {
- struct page *pages;
- int order;
- struct list_head list;
-};
-
-struct pblk_rb {
- struct pblk_rb_entry *entries; /* Ring buffer entries */
- unsigned int mem; /* Write offset - points to next
- * writable entry in memory
- */
- unsigned int subm; /* Read offset - points to last entry
- * that has been submitted to the media
- * to be persisted
- */
- unsigned int sync; /* Synced - backpointer that signals
- * the last submitted entry that has
- * been successfully persisted to media
- */
- unsigned int flush_point; /* Sync point - last entry that must be
- * flushed to the media. Used with
- * REQ_FLUSH and REQ_FUA
- */
- unsigned int l2p_update; /* l2p update point - next entry for
- * which l2p mapping will be updated to
- * contain a device ppa address (instead
- * of a cacheline
- */
- unsigned int nr_entries; /* Number of entries in write buffer -
- * must be a power of two
- */
- unsigned int seg_size; /* Size of the data segments being
- * stored on each entry. Typically this
- * will be 4KB
- */
-
- unsigned int back_thres; /* Threshold that shall be maintained by
- * the backpointer in order to respect
- * geo->mw_cunits on a per chunk basis
- */
-
- struct list_head pages; /* List of data pages */
-
- spinlock_t w_lock; /* Write lock */
- spinlock_t s_lock; /* Sync lock */
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- atomic_t inflight_flush_point; /* Not served REQ_FLUSH | REQ_FUA */
-#endif
-};
-
-#define PBLK_RECOVERY_SECTORS 16
-
-struct pblk_lun {
- struct ppa_addr bppa;
- struct semaphore wr_sem;
-};
-
-struct pblk_gc_rq {
- struct pblk_line *line;
- void *data;
- u64 paddr_list[NVM_MAX_VLBA];
- u64 lba_list[NVM_MAX_VLBA];
- int nr_secs;
- int secs_to_gc;
- struct list_head list;
-};
-
-struct pblk_gc {
- /* These states are not protected by a lock since (i) they are in the
- * fast path, and (ii) they are not critical.
- */
- int gc_active;
- int gc_enabled;
- int gc_forced;
-
- struct task_struct *gc_ts;
- struct task_struct *gc_writer_ts;
- struct task_struct *gc_reader_ts;
-
- struct workqueue_struct *gc_line_reader_wq;
- struct workqueue_struct *gc_reader_wq;
-
- struct timer_list gc_timer;
-
- struct semaphore gc_sem;
- atomic_t read_inflight_gc; /* Number of lines with inflight GC reads */
- atomic_t pipeline_gc; /* Number of lines in the GC pipeline -
- * started reads to finished writes
- */
- int w_entries;
-
- struct list_head w_list;
- struct list_head r_list;
-
- spinlock_t lock;
- spinlock_t w_lock;
- spinlock_t r_lock;
-};
-
-struct pblk_rl {
- unsigned int high; /* Upper threshold for rate limiter (free run -
- * user I/O rate limiter
- */
- unsigned int high_pw; /* High rounded up as a power of 2 */
-
-#define PBLK_USER_HIGH_THRS 8 /* Begin write limit at 12% available blks */
-#define PBLK_USER_LOW_THRS 10 /* Aggressive GC at 10% available blocks */
-
- int rb_windows_pw; /* Number of rate windows in the write buffer
- * given as a power-of-2. This guarantees that
- * when user I/O is being rate limited, there
- * will be reserved enough space for the GC to
- * place its payload. A window is of
- * pblk->max_write_pgs size, which in NVMe is
- * 64, i.e., 256kb.
- */
- int rb_budget; /* Total number of entries available for I/O */
- int rb_user_max; /* Max buffer entries available for user I/O */
- int rb_gc_max; /* Max buffer entries available for GC I/O */
- int rb_gc_rsv; /* Reserved buffer entries for GC I/O */
- int rb_state; /* Rate-limiter current state */
- int rb_max_io; /* Maximum size for an I/O giving the config */
-
- atomic_t rb_user_cnt; /* User I/O buffer counter */
- atomic_t rb_gc_cnt; /* GC I/O buffer counter */
- atomic_t rb_space; /* Space limit in case of reaching capacity */
-
- int rsv_blocks; /* Reserved blocks for GC */
-
- int rb_user_active;
- int rb_gc_active;
-
- atomic_t werr_lines; /* Number of write error lines that needs gc */
-
- struct timer_list u_timer;
-
- unsigned long total_blocks;
-
- atomic_t free_blocks; /* Total number of free blocks (+ OP) */
- atomic_t free_user_blocks; /* Number of user free blocks (no OP) */
-};
-
-#define PBLK_LINE_EMPTY (~0U)
-
-enum {
- /* Line Types */
- PBLK_LINETYPE_FREE = 0,
- PBLK_LINETYPE_LOG = 1,
- PBLK_LINETYPE_DATA = 2,
-
- /* Line state */
- PBLK_LINESTATE_NEW = 9,
- PBLK_LINESTATE_FREE = 10,
- PBLK_LINESTATE_OPEN = 11,
- PBLK_LINESTATE_CLOSED = 12,
- PBLK_LINESTATE_GC = 13,
- PBLK_LINESTATE_BAD = 14,
- PBLK_LINESTATE_CORRUPT = 15,
-
- /* GC group */
- PBLK_LINEGC_NONE = 20,
- PBLK_LINEGC_EMPTY = 21,
- PBLK_LINEGC_LOW = 22,
- PBLK_LINEGC_MID = 23,
- PBLK_LINEGC_HIGH = 24,
- PBLK_LINEGC_FULL = 25,
- PBLK_LINEGC_WERR = 26
-};
-
-#define PBLK_MAGIC 0x70626c6b /*pblk*/
-
-/* emeta/smeta persistent storage format versions:
- * Changes in major version requires offline migration.
- * Changes in minor version are handled automatically during
- * recovery.
- */
-
-#define SMETA_VERSION_MAJOR (0)
-#define SMETA_VERSION_MINOR (1)
-
-#define EMETA_VERSION_MAJOR (0)
-#define EMETA_VERSION_MINOR (2)
-
-struct line_header {
- __le32 crc;
- __le32 identifier; /* pblk identifier */
- __u8 uuid[16]; /* instance uuid */
- __le16 type; /* line type */
- __u8 version_major; /* version major */
- __u8 version_minor; /* version minor */
- __le32 id; /* line id for current line */
-};
-
-struct line_smeta {
- struct line_header header;
-
- __le32 crc; /* Full structure including struct crc */
- /* Previous line metadata */
- __le32 prev_id; /* Line id for previous line */
-
- /* Current line metadata */
- __le64 seq_nr; /* Sequence number for current line */
-
- /* Active writers */
- __le32 window_wr_lun; /* Number of parallel LUNs to write */
-
- __le32 rsvd[2];
-
- __le64 lun_bitmap[];
-};
-
-
-/*
- * Metadata layout in media:
- * First sector:
- * 1. struct line_emeta
- * 2. bad block bitmap (u64 * window_wr_lun)
- * 3. write amplification counters
- * Mid sectors (start at lbas_sector):
- * 3. nr_lbas (u64) forming lba list
- * Last sectors (start at vsc_sector):
- * 4. u32 valid sector count (vsc) for all lines (~0U: free line)
- */
-struct line_emeta {
- struct line_header header;
-
- __le32 crc; /* Full structure including struct crc */
-
- /* Previous line metadata */
- __le32 prev_id; /* Line id for prev line */
-
- /* Current line metadata */
- __le64 seq_nr; /* Sequence number for current line */
-
- /* Active writers */
- __le32 window_wr_lun; /* Number of parallel LUNs to write */
-
- /* Bookkeeping for recovery */
- __le32 next_id; /* Line id for next line */
- __le64 nr_lbas; /* Number of lbas mapped in line */
- __le64 nr_valid_lbas; /* Number of valid lbas mapped in line */
- __le64 bb_bitmap[]; /* Updated bad block bitmap for line */
-};
-
-
-/* Write amplification counters stored on media */
-struct wa_counters {
- __le64 user; /* Number of user written sectors */
- __le64 gc; /* Number of sectors written by GC*/
- __le64 pad; /* Number of padded sectors */
-};
-
-struct pblk_emeta {
- struct line_emeta *buf; /* emeta buffer in media format */
- int mem; /* Write offset - points to next
- * writable entry in memory
- */
- atomic_t sync; /* Synced - backpointer that signals the
- * last entry that has been successfully
- * persisted to media
- */
- unsigned int nr_entries; /* Number of emeta entries */
-};
-
-struct pblk_smeta {
- struct line_smeta *buf; /* smeta buffer in persistent format */
-};
-
-struct pblk_w_err_gc {
- int has_write_err;
- int has_gc_err;
- __le64 *lba_list;
-};
-
-struct pblk_line {
- struct pblk *pblk;
- unsigned int id; /* Line number corresponds to the
- * block line
- */
- unsigned int seq_nr; /* Unique line sequence number */
-
- int state; /* PBLK_LINESTATE_X */
- int type; /* PBLK_LINETYPE_X */
- int gc_group; /* PBLK_LINEGC_X */
- struct list_head list; /* Free, GC lists */
-
- unsigned long *lun_bitmap; /* Bitmap for LUNs mapped in line */
-
- struct nvm_chk_meta *chks; /* Chunks forming line */
-
- struct pblk_smeta *smeta; /* Start metadata */
- struct pblk_emeta *emeta; /* End medatada */
-
- int meta_line; /* Metadata line id */
- int meta_distance; /* Distance between data and metadata */
-
- u64 emeta_ssec; /* Sector where emeta starts */
-
- unsigned int sec_in_line; /* Number of usable secs in line */
-
- atomic_t blk_in_line; /* Number of good blocks in line */
- unsigned long *blk_bitmap; /* Bitmap for valid/invalid blocks */
- unsigned long *erase_bitmap; /* Bitmap for erased blocks */
-
- unsigned long *map_bitmap; /* Bitmap for mapped sectors in line */
- unsigned long *invalid_bitmap; /* Bitmap for invalid sectors in line */
-
- atomic_t left_eblks; /* Blocks left for erasing */
- atomic_t left_seblks; /* Blocks left for sync erasing */
-
- int left_msecs; /* Sectors left for mapping */
- unsigned int cur_sec; /* Sector map pointer */
- unsigned int nr_valid_lbas; /* Number of valid lbas in line */
-
- __le32 *vsc; /* Valid sector count in line */
-
- struct kref ref; /* Write buffer L2P references */
- atomic_t sec_to_update; /* Outstanding L2P updates to ppa */
-
- struct pblk_w_err_gc *w_err_gc; /* Write error gc recovery metadata */
-
- spinlock_t lock; /* Necessary for invalid_bitmap only */
-};
-
-#define PBLK_DATA_LINES 4
-
-enum {
- PBLK_EMETA_TYPE_HEADER = 1, /* struct line_emeta first sector */
- PBLK_EMETA_TYPE_LLBA = 2, /* lba list - type: __le64 */
- PBLK_EMETA_TYPE_VSC = 3, /* vsc list - type: __le32 */
-};
-
-struct pblk_line_mgmt {
- int nr_lines; /* Total number of full lines */
- int nr_free_lines; /* Number of full lines in free list */
-
- /* Free lists - use free_lock */
- struct list_head free_list; /* Full lines ready to use */
- struct list_head corrupt_list; /* Full lines corrupted */
- struct list_head bad_list; /* Full lines bad */
-
- /* GC lists - use gc_lock */
- struct list_head *gc_lists[PBLK_GC_NR_LISTS];
- struct list_head gc_high_list; /* Full lines ready to GC, high isc */
- struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */
- struct list_head gc_low_list; /* Full lines ready to GC, low isc */
-
- struct list_head gc_werr_list; /* Write err recovery list */
-
- struct list_head gc_full_list; /* Full lines ready to GC, no valid */
- struct list_head gc_empty_list; /* Full lines close, all valid */
-
- struct pblk_line *log_line; /* Current FTL log line */
- struct pblk_line *data_line; /* Current data line */
- struct pblk_line *log_next; /* Next FTL log line */
- struct pblk_line *data_next; /* Next data line */
-
- struct list_head emeta_list; /* Lines queued to schedule emeta */
-
- __le32 *vsc_list; /* Valid sector counts for all lines */
-
- /* Pre-allocated metadata for data lines */
- struct pblk_smeta *sline_meta[PBLK_DATA_LINES];
- struct pblk_emeta *eline_meta[PBLK_DATA_LINES];
- unsigned long meta_bitmap;
-
- /* Cache and mempool for map/invalid bitmaps */
- struct kmem_cache *bitmap_cache;
- mempool_t *bitmap_pool;
-
- /* Helpers for fast bitmap calculations */
- unsigned long *bb_template;
- unsigned long *bb_aux;
-
- unsigned long d_seq_nr; /* Data line unique sequence number */
- unsigned long l_seq_nr; /* Log line unique sequence number */
-
- spinlock_t free_lock;
- spinlock_t close_lock;
- spinlock_t gc_lock;
-};
-
-struct pblk_line_meta {
- unsigned int smeta_len; /* Total length for smeta */
- unsigned int smeta_sec; /* Sectors needed for smeta */
-
- unsigned int emeta_len[4]; /* Lengths for emeta:
- * [0]: Total
- * [1]: struct line_emeta +
- * bb_bitmap + struct wa_counters
- * [2]: L2P portion
- * [3]: vsc
- */
- unsigned int emeta_sec[4]; /* Sectors needed for emeta. Same layout
- * as emeta_len
- */
-
- unsigned int emeta_bb; /* Boundary for bb that affects emeta */
-
- unsigned int vsc_list_len; /* Length for vsc list */
- unsigned int sec_bitmap_len; /* Length for sector bitmap in line */
- unsigned int blk_bitmap_len; /* Length for block bitmap in line */
- unsigned int lun_bitmap_len; /* Length for lun bitmap in line */
-
- unsigned int blk_per_line; /* Number of blocks in a full line */
- unsigned int sec_per_line; /* Number of sectors in a line */
- unsigned int dsec_per_line; /* Number of data sectors in a line */
- unsigned int min_blk_line; /* Min. number of good blocks in line */
-
- unsigned int mid_thrs; /* Threshold for GC mid list */
- unsigned int high_thrs; /* Threshold for GC high list */
-
- unsigned int meta_distance; /* Distance between data and metadata */
-};
-
-enum {
- PBLK_STATE_RUNNING = 0,
- PBLK_STATE_STOPPING = 1,
- PBLK_STATE_RECOVERING = 2,
- PBLK_STATE_STOPPED = 3,
-};
-
-/* Internal format to support not power-of-2 device formats */
-struct pblk_addrf {
- /* gen to dev */
- int sec_stripe;
- int ch_stripe;
- int lun_stripe;
-
- /* dev to gen */
- int sec_lun_stripe;
- int sec_ws_stripe;
-};
-
-struct pblk {
- struct nvm_tgt_dev *dev;
- struct gendisk *disk;
-
- struct kobject kobj;
-
- struct pblk_lun *luns;
-
- struct pblk_line *lines; /* Line array */
- struct pblk_line_mgmt l_mg; /* Line management */
- struct pblk_line_meta lm; /* Line metadata */
-
- struct nvm_addrf addrf; /* Aligned address format */
- struct pblk_addrf uaddrf; /* Unaligned address format */
- int addrf_len;
-
- struct pblk_rb rwb;
-
- int state; /* pblk line state */
-
- int min_write_pgs; /* Minimum amount of pages required by controller */
- int min_write_pgs_data; /* Minimum amount of payload pages */
- int max_write_pgs; /* Maximum amount of pages supported by controller */
- int oob_meta_size; /* Size of OOB sector metadata */
-
- sector_t capacity; /* Device capacity when bad blocks are subtracted */
-
- int op; /* Percentage of device used for over-provisioning */
- int op_blks; /* Number of blocks used for over-provisioning */
-
- /* pblk provisioning values. Used by rate limiter */
- struct pblk_rl rl;
-
- int sec_per_write;
-
- guid_t instance_uuid;
-
- /* Persistent write amplification counters, 4kb sector I/Os */
- atomic64_t user_wa; /* Sectors written by user */
- atomic64_t gc_wa; /* Sectors written by GC */
- atomic64_t pad_wa; /* Padded sectors written */
-
- /* Reset values for delta write amplification measurements */
- u64 user_rst_wa;
- u64 gc_rst_wa;
- u64 pad_rst_wa;
-
- /* Counters used for calculating padding distribution */
- atomic64_t *pad_dist; /* Padding distribution buckets */
- u64 nr_flush_rst; /* Flushes reset value for pad dist.*/
- atomic64_t nr_flush; /* Number of flush/fua I/O */
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
- /* Non-persistent debug counters, 4kb sector I/Os */
- atomic_long_t inflight_writes; /* Inflight writes (user and gc) */
- atomic_long_t padded_writes; /* Sectors padded due to flush/fua */
- atomic_long_t padded_wb; /* Sectors padded in write buffer */
- atomic_long_t req_writes; /* Sectors stored on write buffer */
- atomic_long_t sub_writes; /* Sectors submitted from buffer */
- atomic_long_t sync_writes; /* Sectors synced to media */
- atomic_long_t inflight_reads; /* Inflight sector read requests */
- atomic_long_t cache_reads; /* Read requests that hit the cache */
- atomic_long_t sync_reads; /* Completed sector read requests */
- atomic_long_t recov_writes; /* Sectors submitted from recovery */
- atomic_long_t recov_gc_writes; /* Sectors submitted from write GC */
- atomic_long_t recov_gc_reads; /* Sectors submitted from read GC */
-#endif
-
- spinlock_t lock;
-
- atomic_long_t read_failed;
- atomic_long_t read_empty;
- atomic_long_t read_high_ecc;
- atomic_long_t read_failed_gc;
- atomic_long_t write_failed;
- atomic_long_t erase_failed;
-
- atomic_t inflight_io; /* General inflight I/O counter */
-
- struct task_struct *writer_ts;
-
- /* Simple translation map of logical addresses to physical addresses.
- * The logical addresses is known by the host system, while the physical
- * addresses are used when writing to the disk block device.
- */
- unsigned char *trans_map;
- spinlock_t trans_lock;
-
- struct list_head compl_list;
-
- spinlock_t resubmit_lock; /* Resubmit list lock */
- struct list_head resubmit_list; /* Resubmit list for failed writes*/
-
- mempool_t page_bio_pool;
- mempool_t gen_ws_pool;
- mempool_t rec_pool;
- mempool_t r_rq_pool;
- mempool_t w_rq_pool;
- mempool_t e_rq_pool;
-
- struct workqueue_struct *close_wq;
- struct workqueue_struct *bb_wq;
- struct workqueue_struct *r_end_wq;
-
- struct timer_list wtimer;
-
- struct pblk_gc gc;
-};
-
-struct pblk_line_ws {
- struct pblk *pblk;
- struct pblk_line *line;
- void *priv;
- struct work_struct ws;
-};
-
-#define pblk_g_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_g_ctx))
-#define pblk_w_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_c_ctx))
-
-#define pblk_err(pblk, fmt, ...) \
- pr_err("pblk %s: " fmt, pblk->disk->disk_name, ##__VA_ARGS__)
-#define pblk_info(pblk, fmt, ...) \
- pr_info("pblk %s: " fmt, pblk->disk->disk_name, ##__VA_ARGS__)
-#define pblk_warn(pblk, fmt, ...) \
- pr_warn("pblk %s: " fmt, pblk->disk->disk_name, ##__VA_ARGS__)
-#define pblk_debug(pblk, fmt, ...) \
- pr_debug("pblk %s: " fmt, pblk->disk->disk_name, ##__VA_ARGS__)
-
-/*
- * pblk ring buffer operations
- */
-int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold,
- unsigned int seg_sz);
-int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
- unsigned int nr_entries, unsigned int *pos);
-int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries,
- unsigned int *pos);
-void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data,
- struct pblk_w_ctx w_ctx, unsigned int pos);
-void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
- struct pblk_w_ctx w_ctx, struct pblk_line *line,
- u64 paddr, unsigned int pos);
-struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos);
-void pblk_rb_flush(struct pblk_rb *rb);
-
-void pblk_rb_sync_l2p(struct pblk_rb *rb);
-unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
- unsigned int pos, unsigned int nr_entries,
- unsigned int count);
-int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
- struct ppa_addr ppa);
-unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
-
-unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags);
-unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries);
-unsigned int pblk_rb_ptr_wrap(struct pblk_rb *rb, unsigned int p,
- unsigned int nr_entries);
-void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags);
-unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb);
-
-unsigned int pblk_rb_read_count(struct pblk_rb *rb);
-unsigned int pblk_rb_sync_count(struct pblk_rb *rb);
-unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos);
-
-int pblk_rb_tear_down_check(struct pblk_rb *rb);
-int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos);
-void pblk_rb_free(struct pblk_rb *rb);
-ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf);
-
-/*
- * pblk core
- */
-struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type);
-void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type);
-int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd);
-void pblk_free_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd);
-void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write);
-int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
- struct pblk_c_ctx *c_ctx);
-void pblk_discard(struct pblk *pblk, struct bio *bio);
-struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk);
-struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk,
- struct nvm_chk_meta *lp,
- struct ppa_addr ppa);
-void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
-void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
-int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd, void *buf);
-int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd, void *buf);
-int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line);
-void pblk_check_chunk_state_update(struct pblk *pblk, struct nvm_rq *rqd);
-struct pblk_line *pblk_line_get(struct pblk *pblk);
-struct pblk_line *pblk_line_get_first_data(struct pblk *pblk);
-struct pblk_line *pblk_line_replace_data(struct pblk *pblk);
-void pblk_ppa_to_line_put(struct pblk *pblk, struct ppa_addr ppa);
-void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd);
-int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line);
-void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line);
-struct pblk_line *pblk_line_get_data(struct pblk *pblk);
-struct pblk_line *pblk_line_get_erase(struct pblk *pblk);
-int pblk_line_erase(struct pblk *pblk, struct pblk_line *line);
-int pblk_line_is_full(struct pblk_line *line);
-void pblk_line_free(struct pblk_line *line);
-void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line);
-void pblk_line_close(struct pblk *pblk, struct pblk_line *line);
-void pblk_line_close_ws(struct work_struct *work);
-void pblk_pipeline_stop(struct pblk *pblk);
-void __pblk_pipeline_stop(struct pblk *pblk);
-void __pblk_pipeline_flush(struct pblk *pblk);
-void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
- void (*work)(struct work_struct *), gfp_t gfp_mask,
- struct workqueue_struct *wq);
-u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line);
-int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line);
-int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
- void *emeta_buf);
-int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa);
-void pblk_line_put(struct kref *ref);
-void pblk_line_put_wq(struct kref *ref);
-struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line);
-u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line);
-void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
-u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
-u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
-int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
- unsigned long secs_to_flush, bool skip_meta);
-void pblk_down_rq(struct pblk *pblk, struct ppa_addr ppa,
- unsigned long *lun_bitmap);
-void pblk_down_chunk(struct pblk *pblk, struct ppa_addr ppa);
-void pblk_up_chunk(struct pblk *pblk, struct ppa_addr ppa);
-void pblk_up_rq(struct pblk *pblk, unsigned long *lun_bitmap);
-int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
- int nr_pages);
-void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
- int nr_pages);
-void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa);
-void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
- u64 paddr);
-void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa);
-void pblk_update_map_cache(struct pblk *pblk, sector_t lba,
- struct ppa_addr ppa);
-void pblk_update_map_dev(struct pblk *pblk, sector_t lba,
- struct ppa_addr ppa, struct ppa_addr entry_line);
-int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa,
- struct pblk_line *gc_line, u64 paddr);
-void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
- u64 *lba_list, int nr_secs);
-int pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
- sector_t blba, int nr_secs, bool *from_cache);
-void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd);
-void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd);
-
-/*
- * pblk user I/O write path
- */
-void pblk_write_to_cache(struct pblk *pblk, struct bio *bio,
- unsigned long flags);
-int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq);
-
-/*
- * pblk map
- */
-int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
- unsigned int sentry, unsigned long *lun_bitmap,
- unsigned int valid_secs, struct ppa_addr *erase_ppa);
-int pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
- unsigned long *lun_bitmap, unsigned int valid_secs,
- unsigned int off);
-
-/*
- * pblk write thread
- */
-int pblk_write_ts(void *data);
-void pblk_write_timer_fn(struct timer_list *t);
-void pblk_write_should_kick(struct pblk *pblk);
-void pblk_write_kick(struct pblk *pblk);
-
-/*
- * pblk read path
- */
-extern struct bio_set pblk_bio_set;
-void pblk_submit_read(struct pblk *pblk, struct bio *bio);
-int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq);
-/*
- * pblk recovery
- */
-struct pblk_line *pblk_recov_l2p(struct pblk *pblk);
-int pblk_recov_pad(struct pblk *pblk);
-int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta);
-
-/*
- * pblk gc
- */
-#define PBLK_GC_MAX_READERS 8 /* Max number of outstanding GC reader jobs */
-#define PBLK_GC_RQ_QD 128 /* Queue depth for inflight GC requests */
-#define PBLK_GC_L_QD 4 /* Queue depth for inflight GC lines */
-
-int pblk_gc_init(struct pblk *pblk);
-void pblk_gc_exit(struct pblk *pblk, bool graceful);
-void pblk_gc_should_start(struct pblk *pblk);
-void pblk_gc_should_stop(struct pblk *pblk);
-void pblk_gc_should_kick(struct pblk *pblk);
-void pblk_gc_free_full_lines(struct pblk *pblk);
-void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
- int *gc_active);
-int pblk_gc_sysfs_force(struct pblk *pblk, int force);
-void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line);
-
-/*
- * pblk rate limiter
- */
-void pblk_rl_init(struct pblk_rl *rl, int budget, int threshold);
-void pblk_rl_free(struct pblk_rl *rl);
-void pblk_rl_update_rates(struct pblk_rl *rl);
-int pblk_rl_high_thrs(struct pblk_rl *rl);
-unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl);
-unsigned long pblk_rl_nr_user_free_blks(struct pblk_rl *rl);
-int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries);
-void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries);
-void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries);
-int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries);
-void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries);
-void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc);
-int pblk_rl_max_io(struct pblk_rl *rl);
-void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line);
-void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line,
- bool used);
-int pblk_rl_is_limit(struct pblk_rl *rl);
-
-void pblk_rl_werr_line_in(struct pblk_rl *rl);
-void pblk_rl_werr_line_out(struct pblk_rl *rl);
-
-/*
- * pblk sysfs
- */
-int pblk_sysfs_init(struct gendisk *tdisk);
-void pblk_sysfs_exit(struct gendisk *tdisk);
-
-static inline struct nvm_rq *nvm_rq_from_c_ctx(void *c_ctx)
-{
- return c_ctx - sizeof(struct nvm_rq);
-}
-
-static inline void *emeta_to_bb(struct line_emeta *emeta)
-{
- return emeta->bb_bitmap;
-}
-
-static inline void *emeta_to_wa(struct pblk_line_meta *lm,
- struct line_emeta *emeta)
-{
- return emeta->bb_bitmap + lm->blk_bitmap_len;
-}
-
-static inline void *emeta_to_lbas(struct pblk *pblk, struct line_emeta *emeta)
-{
- return ((void *)emeta + pblk->lm.emeta_len[1]);
-}
-
-static inline void *emeta_to_vsc(struct pblk *pblk, struct line_emeta *emeta)
-{
- return (emeta_to_lbas(pblk, emeta) + pblk->lm.emeta_len[2]);
-}
-
-static inline int pblk_line_vsc(struct pblk_line *line)
-{
- return le32_to_cpu(*line->vsc);
-}
-
-static inline int pblk_ppa_to_line_id(struct ppa_addr p)
-{
- return p.a.blk;
-}
-
-static inline struct pblk_line *pblk_ppa_to_line(struct pblk *pblk,
- struct ppa_addr p)
-{
- return &pblk->lines[pblk_ppa_to_line_id(p)];
-}
-
-static inline int pblk_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p)
-{
- return p.a.lun * geo->num_ch + p.a.ch;
-}
-
-static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr,
- u64 line_id)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct ppa_addr ppa;
-
- if (geo->version == NVM_OCSSD_SPEC_12) {
- struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf;
-
- ppa.ppa = 0;
- ppa.g.blk = line_id;
- ppa.g.pg = (paddr & ppaf->pg_mask) >> ppaf->pg_offset;
- ppa.g.lun = (paddr & ppaf->lun_mask) >> ppaf->lun_offset;
- ppa.g.ch = (paddr & ppaf->ch_mask) >> ppaf->ch_offset;
- ppa.g.pl = (paddr & ppaf->pln_mask) >> ppaf->pln_offset;
- ppa.g.sec = (paddr & ppaf->sec_mask) >> ppaf->sec_offset;
- } else {
- struct pblk_addrf *uaddrf = &pblk->uaddrf;
- int secs, chnls, luns;
-
- ppa.ppa = 0;
-
- ppa.m.chk = line_id;
-
- paddr = div_u64_rem(paddr, uaddrf->sec_stripe, &secs);
- ppa.m.sec = secs;
-
- paddr = div_u64_rem(paddr, uaddrf->ch_stripe, &chnls);
- ppa.m.grp = chnls;
-
- paddr = div_u64_rem(paddr, uaddrf->lun_stripe, &luns);
- ppa.m.pu = luns;
-
- ppa.m.sec += uaddrf->sec_stripe * paddr;
- }
-
- return ppa;
-}
-
-static inline struct nvm_chk_meta *pblk_dev_ppa_to_chunk(struct pblk *pblk,
- struct ppa_addr p)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line *line = pblk_ppa_to_line(pblk, p);
- int pos = pblk_ppa_to_pos(geo, p);
-
- return &line->chks[pos];
-}
-
-static inline u64 pblk_dev_ppa_to_chunk_addr(struct pblk *pblk,
- struct ppa_addr p)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
-
- return dev_to_chunk_addr(dev->parent, &pblk->addrf, p);
-}
-
-static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk,
- struct ppa_addr p)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- u64 paddr;
-
- if (geo->version == NVM_OCSSD_SPEC_12) {
- struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf;
-
- paddr = (u64)p.g.ch << ppaf->ch_offset;
- paddr |= (u64)p.g.lun << ppaf->lun_offset;
- paddr |= (u64)p.g.pg << ppaf->pg_offset;
- paddr |= (u64)p.g.pl << ppaf->pln_offset;
- paddr |= (u64)p.g.sec << ppaf->sec_offset;
- } else {
- struct pblk_addrf *uaddrf = &pblk->uaddrf;
- u64 secs = p.m.sec;
- int sec_stripe;
-
- paddr = (u64)p.m.grp * uaddrf->sec_stripe;
- paddr += (u64)p.m.pu * uaddrf->sec_lun_stripe;
-
- secs = div_u64_rem(secs, uaddrf->sec_stripe, &sec_stripe);
- paddr += secs * uaddrf->sec_ws_stripe;
- paddr += sec_stripe;
- }
-
- return paddr;
-}
-
-static inline struct ppa_addr pblk_ppa32_to_ppa64(struct pblk *pblk, u32 ppa32)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
-
- return nvm_ppa32_to_ppa64(dev->parent, &pblk->addrf, ppa32);
-}
-
-static inline u32 pblk_ppa64_to_ppa32(struct pblk *pblk, struct ppa_addr ppa64)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
-
- return nvm_ppa64_to_ppa32(dev->parent, &pblk->addrf, ppa64);
-}
-
-static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk,
- sector_t lba)
-{
- struct ppa_addr ppa;
-
- if (pblk->addrf_len < 32) {
- u32 *map = (u32 *)pblk->trans_map;
-
- ppa = pblk_ppa32_to_ppa64(pblk, map[lba]);
- } else {
- struct ppa_addr *map = (struct ppa_addr *)pblk->trans_map;
-
- ppa = map[lba];
- }
-
- return ppa;
-}
-
-static inline void pblk_trans_map_set(struct pblk *pblk, sector_t lba,
- struct ppa_addr ppa)
-{
- if (pblk->addrf_len < 32) {
- u32 *map = (u32 *)pblk->trans_map;
-
- map[lba] = pblk_ppa64_to_ppa32(pblk, ppa);
- } else {
- u64 *map = (u64 *)pblk->trans_map;
-
- map[lba] = ppa.ppa;
- }
-}
-
-static inline int pblk_ppa_empty(struct ppa_addr ppa_addr)
-{
- return (ppa_addr.ppa == ADDR_EMPTY);
-}
-
-static inline void pblk_ppa_set_empty(struct ppa_addr *ppa_addr)
-{
- ppa_addr->ppa = ADDR_EMPTY;
-}
-
-static inline bool pblk_ppa_comp(struct ppa_addr lppa, struct ppa_addr rppa)
-{
- return (lppa.ppa == rppa.ppa);
-}
-
-static inline int pblk_addr_in_cache(struct ppa_addr ppa)
-{
- return (ppa.ppa != ADDR_EMPTY && ppa.c.is_cached);
-}
-
-static inline int pblk_addr_to_cacheline(struct ppa_addr ppa)
-{
- return ppa.c.line;
-}
-
-static inline struct ppa_addr pblk_cacheline_to_addr(int addr)
-{
- struct ppa_addr p;
-
- p.c.line = addr;
- p.c.is_cached = 1;
-
- return p;
-}
-
-static inline u32 pblk_calc_meta_header_crc(struct pblk *pblk,
- struct line_header *header)
-{
- u32 crc = ~(u32)0;
-
- crc = crc32_le(crc, (unsigned char *)header + sizeof(crc),
- sizeof(struct line_header) - sizeof(crc));
-
- return crc;
-}
-
-static inline u32 pblk_calc_smeta_crc(struct pblk *pblk,
- struct line_smeta *smeta)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- u32 crc = ~(u32)0;
-
- crc = crc32_le(crc, (unsigned char *)smeta +
- sizeof(struct line_header) + sizeof(crc),
- lm->smeta_len -
- sizeof(struct line_header) - sizeof(crc));
-
- return crc;
-}
-
-static inline u32 pblk_calc_emeta_crc(struct pblk *pblk,
- struct line_emeta *emeta)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- u32 crc = ~(u32)0;
-
- crc = crc32_le(crc, (unsigned char *)emeta +
- sizeof(struct line_header) + sizeof(crc),
- lm->emeta_len[0] -
- sizeof(struct line_header) - sizeof(crc));
-
- return crc;
-}
-
-static inline int pblk_io_aligned(struct pblk *pblk, int nr_secs)
-{
- return !(nr_secs % pblk->min_write_pgs);
-}
-
-#ifdef CONFIG_NVM_PBLK_DEBUG
-static inline void print_ppa(struct pblk *pblk, struct ppa_addr *p,
- char *msg, int error)
-{
- struct nvm_geo *geo = &pblk->dev->geo;
-
- if (p->c.is_cached) {
- pblk_err(pblk, "ppa: (%s: %x) cache line: %llu\n",
- msg, error, (u64)p->c.line);
- } else if (geo->version == NVM_OCSSD_SPEC_12) {
- pblk_err(pblk, "ppa: (%s: %x):ch:%d,lun:%d,blk:%d,pg:%d,pl:%d,sec:%d\n",
- msg, error,
- p->g.ch, p->g.lun, p->g.blk,
- p->g.pg, p->g.pl, p->g.sec);
- } else {
- pblk_err(pblk, "ppa: (%s: %x):ch:%d,lun:%d,chk:%d,sec:%d\n",
- msg, error,
- p->m.grp, p->m.pu, p->m.chk, p->m.sec);
- }
-}
-
-static inline void pblk_print_failed_rqd(struct pblk *pblk, struct nvm_rq *rqd,
- int error)
-{
- int bit = -1;
-
- if (rqd->nr_ppas == 1) {
- print_ppa(pblk, &rqd->ppa_addr, "rqd", error);
- return;
- }
-
- while ((bit = find_next_bit((void *)&rqd->ppa_status, rqd->nr_ppas,
- bit + 1)) < rqd->nr_ppas) {
- print_ppa(pblk, &rqd->ppa_list[bit], "rqd", error);
- }
-
- pblk_err(pblk, "error:%d, ppa_status:%llx\n", error, rqd->ppa_status);
-}
-
-static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev,
- struct ppa_addr *ppas, int nr_ppas)
-{
- struct nvm_geo *geo = &tgt_dev->geo;
- struct ppa_addr *ppa;
- int i;
-
- for (i = 0; i < nr_ppas; i++) {
- ppa = &ppas[i];
-
- if (geo->version == NVM_OCSSD_SPEC_12) {
- if (!ppa->c.is_cached &&
- ppa->g.ch < geo->num_ch &&
- ppa->g.lun < geo->num_lun &&
- ppa->g.pl < geo->num_pln &&
- ppa->g.blk < geo->num_chk &&
- ppa->g.pg < geo->num_pg &&
- ppa->g.sec < geo->ws_min)
- continue;
- } else {
- if (!ppa->c.is_cached &&
- ppa->m.grp < geo->num_ch &&
- ppa->m.pu < geo->num_lun &&
- ppa->m.chk < geo->num_chk &&
- ppa->m.sec < geo->clba)
- continue;
- }
-
- print_ppa(tgt_dev->q->queuedata, ppa, "boundary", i);
-
- return 1;
- }
- return 0;
-}
-
-static inline int pblk_check_io(struct pblk *pblk, struct nvm_rq *rqd)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
-
- if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) {
- WARN_ON(1);
- return -EINVAL;
- }
-
- if (rqd->opcode == NVM_OP_PWRITE) {
- struct pblk_line *line;
- int i;
-
- for (i = 0; i < rqd->nr_ppas; i++) {
- line = pblk_ppa_to_line(pblk, ppa_list[i]);
-
- spin_lock(&line->lock);
- if (line->state != PBLK_LINESTATE_OPEN) {
- pblk_err(pblk, "bad ppa: line:%d,state:%d\n",
- line->id, line->state);
- WARN_ON(1);
- spin_unlock(&line->lock);
- return -EINVAL;
- }
- spin_unlock(&line->lock);
- }
- }
-
- return 0;
-}
-#endif
-
-static inline int pblk_boundary_paddr_checks(struct pblk *pblk, u64 paddr)
-{
- struct pblk_line_meta *lm = &pblk->lm;
-
- if (paddr > lm->sec_per_line)
- return 1;
-
- return 0;
-}
-
-static inline unsigned int pblk_get_bi_idx(struct bio *bio)
-{
- return bio->bi_iter.bi_idx;
-}
-
-static inline sector_t pblk_get_lba(struct bio *bio)
-{
- return bio->bi_iter.bi_sector / NR_PHY_IN_LOG;
-}
-
-static inline unsigned int pblk_get_secs(struct bio *bio)
-{
- return bio->bi_iter.bi_size / PBLK_EXPOSED_PAGE_SIZE;
-}
-
-static inline char *pblk_disk_name(struct pblk *pblk)
-{
- struct gendisk *disk = pblk->disk;
-
- return disk->disk_name;
-}
-
-static inline unsigned int pblk_get_min_chks(struct pblk *pblk)
-{
- struct pblk_line_meta *lm = &pblk->lm;
- /* In a worst-case scenario every line will have OP invalid sectors.
- * We will then need a minimum of 1/OP lines to free up a single line
- */
-
- return DIV_ROUND_UP(100, pblk->op) * lm->blk_per_line;
-}
-
-static inline struct pblk_sec_meta *pblk_get_meta(struct pblk *pblk,
- void *meta, int index)
-{
- return meta +
- max_t(int, sizeof(struct pblk_sec_meta), pblk->oob_meta_size)
- * index;
-}
-
-static inline int pblk_dma_meta_size(struct pblk *pblk)
-{
- return max_t(int, sizeof(struct pblk_sec_meta), pblk->oob_meta_size)
- * NVM_MAX_VLBA;
-}
-
-static inline int pblk_is_oob_meta_supported(struct pblk *pblk)
-{
- return pblk->oob_meta_size >= sizeof(struct pblk_sec_meta);
-}
-#endif /* PBLK_H_ */
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 3c44c4bb40fc..19598bd38939 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1329,6 +1329,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
struct raid1_plug_cb *plug = NULL;
int first_clone;
int max_sectors;
+ bool write_behind = false;
if (mddev_is_clustered(mddev) &&
md_cluster_ops->area_resyncing(mddev, WRITE,
@@ -1381,6 +1382,15 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
max_sectors = r1_bio->sectors;
for (i = 0; i < disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
+
+ /*
+ * The write-behind io is only attempted on drives marked as
+ * write-mostly, which means we could allocate write behind
+ * bio later.
+ */
+ if (rdev && test_bit(WriteMostly, &rdev->flags))
+ write_behind = true;
+
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
atomic_inc(&rdev->nr_pending);
blocked_rdev = rdev;
@@ -1454,6 +1464,15 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
goto retry_write;
}
+ /*
+ * When using a bitmap, we may call alloc_behind_master_bio below.
+ * alloc_behind_master_bio allocates a copy of the data payload a page
+ * at a time and thus needs a new bio that can fit the whole payload
+ * this bio in page sized chunks.
+ */
+ if (write_behind && bitmap)
+ max_sectors = min_t(int, max_sectors,
+ BIO_MAX_VECS * (PAGE_SIZE >> 9));
if (max_sectors < bio_sectors(bio)) {
struct bio *split = bio_split(bio, max_sectors,
GFP_NOIO, &conf->bio_split);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 07119d7e0fdf..aa2636582841 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1712,6 +1712,11 @@ retry_discard:
} else
r10_bio->master_bio = (struct bio *)first_r10bio;
+ /*
+ * first select target devices under rcu_lock and
+ * inc refcount on their rdev. Record them by setting
+ * bios[x] to bio
+ */
rcu_read_lock();
for (disk = 0; disk < geo->raid_disks; disk++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
@@ -1743,9 +1748,6 @@ retry_discard:
for (disk = 0; disk < geo->raid_disks; disk++) {
sector_t dev_start, dev_end;
struct bio *mbio, *rbio = NULL;
- struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
- struct md_rdev *rrdev = rcu_dereference(
- conf->mirrors[disk].replacement);
/*
* Now start to calculate the start and end address for each disk.
@@ -1775,9 +1777,12 @@ retry_discard:
/*
* It only handles discard bio which size is >= stripe size, so
- * dev_end > dev_start all the time
+ * dev_end > dev_start all the time.
+ * It doesn't need to use rcu lock to get rdev here. We already
+ * add rdev->nr_pending in the first loop.
*/
if (r10_bio->devs[disk].bio) {
+ struct md_rdev *rdev = conf->mirrors[disk].rdev;
mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
mbio->bi_end_io = raid10_end_discard_request;
mbio->bi_private = r10_bio;
@@ -1790,6 +1795,7 @@ retry_discard:
bio_endio(mbio);
}
if (r10_bio->devs[disk].repl_bio) {
+ struct md_rdev *rrdev = conf->mirrors[disk].replacement;
rbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
rbio->bi_end_io = raid10_end_discard_request;
rbio->bi_private = r10_bio;
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index c3f3d77f1aac..dc0450ca23a3 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -33,12 +33,12 @@ config NVME_HWMON
in the system.
config NVME_FABRICS
+ select NVME_CORE
tristate
config NVME_RDMA
tristate "NVM Express over Fabrics RDMA host driver"
depends on INFINIBAND && INFINIBAND_ADDR_TRANS && BLOCK
- select NVME_CORE
select NVME_FABRICS
select SG_POOL
help
@@ -55,7 +55,6 @@ config NVME_FC
tristate "NVM Express over Fabrics FC host driver"
depends on BLOCK
depends on HAS_DMA
- select NVME_CORE
select NVME_FABRICS
select SG_POOL
help
@@ -72,7 +71,6 @@ config NVME_TCP
tristate "NVM Express over Fabrics TCP host driver"
depends on INET
depends on BLOCK
- select NVME_CORE
select NVME_FABRICS
select CRYPTO
select CRYPTO_CRC32C
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index cbc509784b2e..dfaacd472e5d 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -12,7 +12,6 @@ obj-$(CONFIG_NVME_TCP) += nvme-tcp.o
nvme-core-y := core.o ioctl.o
nvme-core-$(CONFIG_TRACING) += trace.o
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
-nvme-core-$(CONFIG_NVM) += lightnvm.o
nvme-core-$(CONFIG_BLK_DEV_ZONED) += zns.o
nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o
nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 68acd33c3856..8679a108f571 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -587,9 +587,6 @@ static void nvme_free_ns(struct kref *kref)
{
struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
- if (ns->ndev)
- nvme_nvm_unregister(ns);
-
put_disk(ns->disk);
nvme_put_ns_head(ns->head);
nvme_put_ctrl(ns->ctrl);
@@ -1028,7 +1025,8 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
return BLK_STS_IOERR;
}
- cmd->common.command_id = req->tag;
+ nvme_req(req)->genctr++;
+ cmd->common.command_id = nvme_cid(req);
trace_nvme_setup_cmd(req, cmd);
return ret;
}
@@ -3217,9 +3215,6 @@ static const struct attribute_group nvme_ns_id_attr_group = {
const struct attribute_group *nvme_ns_id_attr_groups[] = {
&nvme_ns_id_attr_group,
-#ifdef CONFIG_NVM
- &nvme_nvm_attr_group,
-#endif
NULL,
};
@@ -3762,13 +3757,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
if (nvme_update_ns_info(ns, id))
goto out_unlink_ns;
- if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
- if (nvme_nvm_register(ns, disk->disk_name, node)) {
- dev_warn(ctrl->device, "LightNVM init failure\n");
- goto out_unlink_ns;
- }
- }
-
down_write(&ctrl->namespaces_rwsem);
list_add_tail(&ns->list, &ctrl->namespaces);
up_write(&ctrl->namespaces_rwsem);
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index a5469fd9d4c3..668c6bb7a567 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -719,7 +719,6 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
ret = -EINVAL;
goto out;
}
- nvmf_host_put(opts->host);
opts->host = nvmf_host_add(p);
kfree(p);
if (!opts->host) {
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 305ddd415e45..22314962842d 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -342,9 +342,7 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
case NVME_IOCTL_IO64_CMD:
return nvme_user_cmd64(ns->ctrl, ns, argp);
default:
- if (!ns->ndev)
- return -ENOTTY;
- return nvme_nvm_ioctl(ns, cmd, argp);
+ return -ENOTTY;
}
}
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
deleted file mode 100644
index e9d9ad47f70f..000000000000
--- a/drivers/nvme/host/lightnvm.c
+++ /dev/null
@@ -1,1274 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * nvme-lightnvm.c - LightNVM NVMe device
- *
- * Copyright (C) 2014-2015 IT University of Copenhagen
- * Initial release: Matias Bjorling <mb@lightnvm.io>
- */
-
-#include "nvme.h"
-
-#include <linux/nvme.h>
-#include <linux/bitops.h>
-#include <linux/lightnvm.h>
-#include <linux/vmalloc.h>
-#include <linux/sched/sysctl.h>
-#include <uapi/linux/lightnvm.h>
-
-enum nvme_nvm_admin_opcode {
- nvme_nvm_admin_identity = 0xe2,
- nvme_nvm_admin_get_bb_tbl = 0xf2,
- nvme_nvm_admin_set_bb_tbl = 0xf1,
-};
-
-enum nvme_nvm_log_page {
- NVME_NVM_LOG_REPORT_CHUNK = 0xca,
-};
-
-struct nvme_nvm_ph_rw {
- __u8 opcode;
- __u8 flags;
- __u16 command_id;
- __le32 nsid;
- __u64 rsvd2;
- __le64 metadata;
- __le64 prp1;
- __le64 prp2;
- __le64 spba;
- __le16 length;
- __le16 control;
- __le32 dsmgmt;
- __le64 resv;
-};
-
-struct nvme_nvm_erase_blk {
- __u8 opcode;
- __u8 flags;
- __u16 command_id;
- __le32 nsid;
- __u64 rsvd[2];
- __le64 prp1;
- __le64 prp2;
- __le64 spba;
- __le16 length;
- __le16 control;
- __le32 dsmgmt;
- __le64 resv;
-};
-
-struct nvme_nvm_identity {
- __u8 opcode;
- __u8 flags;
- __u16 command_id;
- __le32 nsid;
- __u64 rsvd[2];
- __le64 prp1;
- __le64 prp2;
- __u32 rsvd11[6];
-};
-
-struct nvme_nvm_getbbtbl {
- __u8 opcode;
- __u8 flags;
- __u16 command_id;
- __le32 nsid;
- __u64 rsvd[2];
- __le64 prp1;
- __le64 prp2;
- __le64 spba;
- __u32 rsvd4[4];
-};
-
-struct nvme_nvm_setbbtbl {
- __u8 opcode;
- __u8 flags;
- __u16 command_id;
- __le32 nsid;
- __le64 rsvd[2];
- __le64 prp1;
- __le64 prp2;
- __le64 spba;
- __le16 nlb;
- __u8 value;
- __u8 rsvd3;
- __u32 rsvd4[3];
-};
-
-struct nvme_nvm_command {
- union {
- struct nvme_common_command common;
- struct nvme_nvm_ph_rw ph_rw;
- struct nvme_nvm_erase_blk erase;
- struct nvme_nvm_identity identity;
- struct nvme_nvm_getbbtbl get_bb;
- struct nvme_nvm_setbbtbl set_bb;
- };
-};
-
-struct nvme_nvm_id12_grp {
- __u8 mtype;
- __u8 fmtype;
- __le16 res16;
- __u8 num_ch;
- __u8 num_lun;
- __u8 num_pln;
- __u8 rsvd1;
- __le16 num_chk;
- __le16 num_pg;
- __le16 fpg_sz;
- __le16 csecs;
- __le16 sos;
- __le16 rsvd2;
- __le32 trdt;
- __le32 trdm;
- __le32 tprt;
- __le32 tprm;
- __le32 tbet;
- __le32 tbem;
- __le32 mpos;
- __le32 mccap;
- __le16 cpar;
- __u8 reserved[906];
-} __packed;
-
-struct nvme_nvm_id12_addrf {
- __u8 ch_offset;
- __u8 ch_len;
- __u8 lun_offset;
- __u8 lun_len;
- __u8 pln_offset;
- __u8 pln_len;
- __u8 blk_offset;
- __u8 blk_len;
- __u8 pg_offset;
- __u8 pg_len;
- __u8 sec_offset;
- __u8 sec_len;
- __u8 res[4];
-} __packed;
-
-struct nvme_nvm_id12 {
- __u8 ver_id;
- __u8 vmnt;
- __u8 cgrps;
- __u8 res;
- __le32 cap;
- __le32 dom;
- struct nvme_nvm_id12_addrf ppaf;
- __u8 resv[228];
- struct nvme_nvm_id12_grp grp;
- __u8 resv2[2880];
-} __packed;
-
-struct nvme_nvm_bb_tbl {
- __u8 tblid[4];
- __le16 verid;
- __le16 revid;
- __le32 rvsd1;
- __le32 tblks;
- __le32 tfact;
- __le32 tgrown;
- __le32 tdresv;
- __le32 thresv;
- __le32 rsvd2[8];
- __u8 blk[];
-};
-
-struct nvme_nvm_id20_addrf {
- __u8 grp_len;
- __u8 pu_len;
- __u8 chk_len;
- __u8 lba_len;
- __u8 resv[4];
-};
-
-struct nvme_nvm_id20 {
- __u8 mjr;
- __u8 mnr;
- __u8 resv[6];
-
- struct nvme_nvm_id20_addrf lbaf;
-
- __le32 mccap;
- __u8 resv2[12];
-
- __u8 wit;
- __u8 resv3[31];
-
- /* Geometry */
- __le16 num_grp;
- __le16 num_pu;
- __le32 num_chk;
- __le32 clba;
- __u8 resv4[52];
-
- /* Write data requirements */
- __le32 ws_min;
- __le32 ws_opt;
- __le32 mw_cunits;
- __le32 maxoc;
- __le32 maxocpu;
- __u8 resv5[44];
-
- /* Performance related metrics */
- __le32 trdt;
- __le32 trdm;
- __le32 twrt;
- __le32 twrm;
- __le32 tcrst;
- __le32 tcrsm;
- __u8 resv6[40];
-
- /* Reserved area */
- __u8 resv7[2816];
-
- /* Vendor specific */
- __u8 vs[1024];
-};
-
-struct nvme_nvm_chk_meta {
- __u8 state;
- __u8 type;
- __u8 wi;
- __u8 rsvd[5];
- __le64 slba;
- __le64 cnlb;
- __le64 wp;
-};
-
-/*
- * Check we didn't inadvertently grow the command struct
- */
-static inline void _nvme_nvm_check_size(void)
-{
- BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_grp) != 960);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_addrf) != 16);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_id12) != NVME_IDENTIFY_DATA_SIZE);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_id20_addrf) != 8);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_id20) != NVME_IDENTIFY_DATA_SIZE);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) != 32);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) !=
- sizeof(struct nvm_chk_meta));
-}
-
-static void nvme_nvm_set_addr_12(struct nvm_addrf_12 *dst,
- struct nvme_nvm_id12_addrf *src)
-{
- dst->ch_len = src->ch_len;
- dst->lun_len = src->lun_len;
- dst->blk_len = src->blk_len;
- dst->pg_len = src->pg_len;
- dst->pln_len = src->pln_len;
- dst->sec_len = src->sec_len;
-
- dst->ch_offset = src->ch_offset;
- dst->lun_offset = src->lun_offset;
- dst->blk_offset = src->blk_offset;
- dst->pg_offset = src->pg_offset;
- dst->pln_offset = src->pln_offset;
- dst->sec_offset = src->sec_offset;
-
- dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset;
- dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset;
- dst->blk_mask = ((1ULL << dst->blk_len) - 1) << dst->blk_offset;
- dst->pg_mask = ((1ULL << dst->pg_len) - 1) << dst->pg_offset;
- dst->pln_mask = ((1ULL << dst->pln_len) - 1) << dst->pln_offset;
- dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset;
-}
-
-static int nvme_nvm_setup_12(struct nvme_nvm_id12 *id,
- struct nvm_geo *geo)
-{
- struct nvme_nvm_id12_grp *src;
- int sec_per_pg, sec_per_pl, pg_per_blk;
-
- if (id->cgrps != 1)
- return -EINVAL;
-
- src = &id->grp;
-
- if (src->mtype != 0) {
- pr_err("nvm: memory type not supported\n");
- return -EINVAL;
- }
-
- /* 1.2 spec. only reports a single version id - unfold */
- geo->major_ver_id = id->ver_id;
- geo->minor_ver_id = 2;
-
- /* Set compacted version for upper layers */
- geo->version = NVM_OCSSD_SPEC_12;
-
- geo->num_ch = src->num_ch;
- geo->num_lun = src->num_lun;
- geo->all_luns = geo->num_ch * geo->num_lun;
-
- geo->num_chk = le16_to_cpu(src->num_chk);
-
- geo->csecs = le16_to_cpu(src->csecs);
- geo->sos = le16_to_cpu(src->sos);
-
- pg_per_blk = le16_to_cpu(src->num_pg);
- sec_per_pg = le16_to_cpu(src->fpg_sz) / geo->csecs;
- sec_per_pl = sec_per_pg * src->num_pln;
- geo->clba = sec_per_pl * pg_per_blk;
-
- geo->all_chunks = geo->all_luns * geo->num_chk;
- geo->total_secs = geo->clba * geo->all_chunks;
-
- geo->ws_min = sec_per_pg;
- geo->ws_opt = sec_per_pg;
- geo->mw_cunits = geo->ws_opt << 3; /* default to MLC safe values */
-
- /* Do not impose values for maximum number of open blocks as it is
- * unspecified in 1.2. Users of 1.2 must be aware of this and eventually
- * specify these values through a quirk if restrictions apply.
- */
- geo->maxoc = geo->all_luns * geo->num_chk;
- geo->maxocpu = geo->num_chk;
-
- geo->mccap = le32_to_cpu(src->mccap);
-
- geo->trdt = le32_to_cpu(src->trdt);
- geo->trdm = le32_to_cpu(src->trdm);
- geo->tprt = le32_to_cpu(src->tprt);
- geo->tprm = le32_to_cpu(src->tprm);
- geo->tbet = le32_to_cpu(src->tbet);
- geo->tbem = le32_to_cpu(src->tbem);
-
- /* 1.2 compatibility */
- geo->vmnt = id->vmnt;
- geo->cap = le32_to_cpu(id->cap);
- geo->dom = le32_to_cpu(id->dom);
-
- geo->mtype = src->mtype;
- geo->fmtype = src->fmtype;
-
- geo->cpar = le16_to_cpu(src->cpar);
- geo->mpos = le32_to_cpu(src->mpos);
-
- geo->pln_mode = NVM_PLANE_SINGLE;
-
- if (geo->mpos & 0x020202) {
- geo->pln_mode = NVM_PLANE_DOUBLE;
- geo->ws_opt <<= 1;
- } else if (geo->mpos & 0x040404) {
- geo->pln_mode = NVM_PLANE_QUAD;
- geo->ws_opt <<= 2;
- }
-
- geo->num_pln = src->num_pln;
- geo->num_pg = le16_to_cpu(src->num_pg);
- geo->fpg_sz = le16_to_cpu(src->fpg_sz);
-
- nvme_nvm_set_addr_12((struct nvm_addrf_12 *)&geo->addrf, &id->ppaf);
-
- return 0;
-}
-
-static void nvme_nvm_set_addr_20(struct nvm_addrf *dst,
- struct nvme_nvm_id20_addrf *src)
-{
- dst->ch_len = src->grp_len;
- dst->lun_len = src->pu_len;
- dst->chk_len = src->chk_len;
- dst->sec_len = src->lba_len;
-
- dst->sec_offset = 0;
- dst->chk_offset = dst->sec_len;
- dst->lun_offset = dst->chk_offset + dst->chk_len;
- dst->ch_offset = dst->lun_offset + dst->lun_len;
-
- dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset;
- dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset;
- dst->chk_mask = ((1ULL << dst->chk_len) - 1) << dst->chk_offset;
- dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset;
-}
-
-static int nvme_nvm_setup_20(struct nvme_nvm_id20 *id,
- struct nvm_geo *geo)
-{
- geo->major_ver_id = id->mjr;
- geo->minor_ver_id = id->mnr;
-
- /* Set compacted version for upper layers */
- geo->version = NVM_OCSSD_SPEC_20;
-
- geo->num_ch = le16_to_cpu(id->num_grp);
- geo->num_lun = le16_to_cpu(id->num_pu);
- geo->all_luns = geo->num_ch * geo->num_lun;
-
- geo->num_chk = le32_to_cpu(id->num_chk);
- geo->clba = le32_to_cpu(id->clba);
-
- geo->all_chunks = geo->all_luns * geo->num_chk;
- geo->total_secs = geo->clba * geo->all_chunks;
-
- geo->ws_min = le32_to_cpu(id->ws_min);
- geo->ws_opt = le32_to_cpu(id->ws_opt);
- geo->mw_cunits = le32_to_cpu(id->mw_cunits);
- geo->maxoc = le32_to_cpu(id->maxoc);
- geo->maxocpu = le32_to_cpu(id->maxocpu);
-
- geo->trdt = le32_to_cpu(id->trdt);
- geo->trdm = le32_to_cpu(id->trdm);
- geo->tprt = le32_to_cpu(id->twrt);
- geo->tprm = le32_to_cpu(id->twrm);
- geo->tbet = le32_to_cpu(id->tcrst);
- geo->tbem = le32_to_cpu(id->tcrsm);
-
- nvme_nvm_set_addr_20(&geo->addrf, &id->lbaf);
-
- return 0;
-}
-
-static int nvme_nvm_identity(struct nvm_dev *nvmdev)
-{
- struct nvme_ns *ns = nvmdev->q->queuedata;
- struct nvme_nvm_id12 *id;
- struct nvme_nvm_command c = {};
- int ret;
-
- c.identity.opcode = nvme_nvm_admin_identity;
- c.identity.nsid = cpu_to_le32(ns->head->ns_id);
-
- id = kmalloc(sizeof(struct nvme_nvm_id12), GFP_KERNEL);
- if (!id)
- return -ENOMEM;
-
- ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c,
- id, sizeof(struct nvme_nvm_id12));
- if (ret) {
- ret = -EIO;
- goto out;
- }
-
- /*
- * The 1.2 and 2.0 specifications share the first byte in their geometry
- * command to make it possible to know what version a device implements.
- */
- switch (id->ver_id) {
- case 1:
- ret = nvme_nvm_setup_12(id, &nvmdev->geo);
- break;
- case 2:
- ret = nvme_nvm_setup_20((struct nvme_nvm_id20 *)id,
- &nvmdev->geo);
- break;
- default:
- dev_err(ns->ctrl->device, "OCSSD revision not supported (%d)\n",
- id->ver_id);
- ret = -EINVAL;
- }
-
-out:
- kfree(id);
- return ret;
-}
-
-static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
- u8 *blks)
-{
- struct request_queue *q = nvmdev->q;
- struct nvm_geo *geo = &nvmdev->geo;
- struct nvme_ns *ns = q->queuedata;
- struct nvme_ctrl *ctrl = ns->ctrl;
- struct nvme_nvm_command c = {};
- struct nvme_nvm_bb_tbl *bb_tbl;
- int nr_blks = geo->num_chk * geo->num_pln;
- int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blks;
- int ret = 0;
-
- c.get_bb.opcode = nvme_nvm_admin_get_bb_tbl;
- c.get_bb.nsid = cpu_to_le32(ns->head->ns_id);
- c.get_bb.spba = cpu_to_le64(ppa.ppa);
-
- bb_tbl = kzalloc(tblsz, GFP_KERNEL);
- if (!bb_tbl)
- return -ENOMEM;
-
- ret = nvme_submit_sync_cmd(ctrl->admin_q, (struct nvme_command *)&c,
- bb_tbl, tblsz);
- if (ret) {
- dev_err(ctrl->device, "get bad block table failed (%d)\n", ret);
- ret = -EIO;
- goto out;
- }
-
- if (bb_tbl->tblid[0] != 'B' || bb_tbl->tblid[1] != 'B' ||
- bb_tbl->tblid[2] != 'L' || bb_tbl->tblid[3] != 'T') {
- dev_err(ctrl->device, "bbt format mismatch\n");
- ret = -EINVAL;
- goto out;
- }
-
- if (le16_to_cpu(bb_tbl->verid) != 1) {
- ret = -EINVAL;
- dev_err(ctrl->device, "bbt version not supported\n");
- goto out;
- }
-
- if (le32_to_cpu(bb_tbl->tblks) != nr_blks) {
- ret = -EINVAL;
- dev_err(ctrl->device,
- "bbt unsuspected blocks returned (%u!=%u)",
- le32_to_cpu(bb_tbl->tblks), nr_blks);
- goto out;
- }
-
- memcpy(blks, bb_tbl->blk, geo->num_chk * geo->num_pln);
-out:
- kfree(bb_tbl);
- return ret;
-}
-
-static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas,
- int nr_ppas, int type)
-{
- struct nvme_ns *ns = nvmdev->q->queuedata;
- struct nvme_nvm_command c = {};
- int ret = 0;
-
- c.set_bb.opcode = nvme_nvm_admin_set_bb_tbl;
- c.set_bb.nsid = cpu_to_le32(ns->head->ns_id);
- c.set_bb.spba = cpu_to_le64(ppas->ppa);
- c.set_bb.nlb = cpu_to_le16(nr_ppas - 1);
- c.set_bb.value = type;
-
- ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c,
- NULL, 0);
- if (ret)
- dev_err(ns->ctrl->device, "set bad block table failed (%d)\n",
- ret);
- return ret;
-}
-
-/*
- * Expect the lba in device format
- */
-static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev,
- sector_t slba, int nchks,
- struct nvm_chk_meta *meta)
-{
- struct nvm_geo *geo = &ndev->geo;
- struct nvme_ns *ns = ndev->q->queuedata;
- struct nvme_ctrl *ctrl = ns->ctrl;
- struct nvme_nvm_chk_meta *dev_meta, *dev_meta_off;
- struct ppa_addr ppa;
- size_t left = nchks * sizeof(struct nvme_nvm_chk_meta);
- size_t log_pos, offset, len;
- int i, max_len;
- int ret = 0;
-
- /*
- * limit requests to maximum 256K to avoid issuing arbitrary large
- * requests when the device does not specific a maximum transfer size.
- */
- max_len = min_t(unsigned int, ctrl->max_hw_sectors << 9, 256 * 1024);
-
- dev_meta = kmalloc(max_len, GFP_KERNEL);
- if (!dev_meta)
- return -ENOMEM;
-
- /* Normalize lba address space to obtain log offset */
- ppa.ppa = slba;
- ppa = dev_to_generic_addr(ndev, ppa);
-
- log_pos = ppa.m.chk;
- log_pos += ppa.m.pu * geo->num_chk;
- log_pos += ppa.m.grp * geo->num_lun * geo->num_chk;
-
- offset = log_pos * sizeof(struct nvme_nvm_chk_meta);
-
- while (left) {
- len = min_t(unsigned int, left, max_len);
-
- memset(dev_meta, 0, max_len);
- dev_meta_off = dev_meta;
-
- ret = nvme_get_log(ctrl, ns->head->ns_id,
- NVME_NVM_LOG_REPORT_CHUNK, 0, NVME_CSI_NVM,
- dev_meta, len, offset);
- if (ret) {
- dev_err(ctrl->device, "Get REPORT CHUNK log error\n");
- break;
- }
-
- for (i = 0; i < len; i += sizeof(struct nvme_nvm_chk_meta)) {
- meta->state = dev_meta_off->state;
- meta->type = dev_meta_off->type;
- meta->wi = dev_meta_off->wi;
- meta->slba = le64_to_cpu(dev_meta_off->slba);
- meta->cnlb = le64_to_cpu(dev_meta_off->cnlb);
- meta->wp = le64_to_cpu(dev_meta_off->wp);
-
- meta++;
- dev_meta_off++;
- }
-
- offset += len;
- left -= len;
- }
-
- kfree(dev_meta);
-
- return ret;
-}
-
-static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns,
- struct nvme_nvm_command *c)
-{
- c->ph_rw.opcode = rqd->opcode;
- c->ph_rw.nsid = cpu_to_le32(ns->head->ns_id);
- c->ph_rw.spba = cpu_to_le64(rqd->ppa_addr.ppa);
- c->ph_rw.metadata = cpu_to_le64(rqd->dma_meta_list);
- c->ph_rw.control = cpu_to_le16(rqd->flags);
- c->ph_rw.length = cpu_to_le16(rqd->nr_ppas - 1);
-}
-
-static void nvme_nvm_end_io(struct request *rq, blk_status_t status)
-{
- struct nvm_rq *rqd = rq->end_io_data;
-
- rqd->ppa_status = le64_to_cpu(nvme_req(rq)->result.u64);
- rqd->error = nvme_req(rq)->status;
- nvm_end_io(rqd);
-
- kfree(nvme_req(rq)->cmd);
- blk_mq_free_request(rq);
-}
-
-static struct request *nvme_nvm_alloc_request(struct request_queue *q,
- struct nvm_rq *rqd,
- struct nvme_nvm_command *cmd)
-{
- struct nvme_ns *ns = q->queuedata;
- struct request *rq;
-
- nvme_nvm_rqtocmd(rqd, ns, cmd);
-
- rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0);
- if (IS_ERR(rq))
- return rq;
-
- rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
-
- if (rqd->bio)
- blk_rq_append_bio(rq, rqd->bio);
- else
- rq->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
-
- return rq;
-}
-
-static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd,
- void *buf)
-{
- struct nvm_geo *geo = &dev->geo;
- struct request_queue *q = dev->q;
- struct nvme_nvm_command *cmd;
- struct request *rq;
- int ret;
-
- cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
-
- rq = nvme_nvm_alloc_request(q, rqd, cmd);
- if (IS_ERR(rq)) {
- ret = PTR_ERR(rq);
- goto err_free_cmd;
- }
-
- if (buf) {
- ret = blk_rq_map_kern(q, rq, buf, geo->csecs * rqd->nr_ppas,
- GFP_KERNEL);
- if (ret)
- goto err_free_cmd;
- }
-
- rq->end_io_data = rqd;
-
- blk_execute_rq_nowait(NULL, rq, 0, nvme_nvm_end_io);
-
- return 0;
-
-err_free_cmd:
- kfree(cmd);
- return ret;
-}
-
-static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name,
- int size)
-{
- struct nvme_ns *ns = nvmdev->q->queuedata;
-
- return dma_pool_create(name, ns->ctrl->dev, size, PAGE_SIZE, 0);
-}
-
-static void nvme_nvm_destroy_dma_pool(void *pool)
-{
- struct dma_pool *dma_pool = pool;
-
- dma_pool_destroy(dma_pool);
-}
-
-static void *nvme_nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool,
- gfp_t mem_flags, dma_addr_t *dma_handler)
-{
- return dma_pool_alloc(pool, mem_flags, dma_handler);
-}
-
-static void nvme_nvm_dev_dma_free(void *pool, void *addr,
- dma_addr_t dma_handler)
-{
- dma_pool_free(pool, addr, dma_handler);
-}
-
-static struct nvm_dev_ops nvme_nvm_dev_ops = {
- .identity = nvme_nvm_identity,
-
- .get_bb_tbl = nvme_nvm_get_bb_tbl,
- .set_bb_tbl = nvme_nvm_set_bb_tbl,
-
- .get_chk_meta = nvme_nvm_get_chk_meta,
-
- .submit_io = nvme_nvm_submit_io,
-
- .create_dma_pool = nvme_nvm_create_dma_pool,
- .destroy_dma_pool = nvme_nvm_destroy_dma_pool,
- .dev_dma_alloc = nvme_nvm_dev_dma_alloc,
- .dev_dma_free = nvme_nvm_dev_dma_free,
-};
-
-static int nvme_nvm_submit_user_cmd(struct request_queue *q,
- struct nvme_ns *ns,
- struct nvme_nvm_command *vcmd,
- void __user *ubuf, unsigned int bufflen,
- void __user *meta_buf, unsigned int meta_len,
- void __user *ppa_buf, unsigned int ppa_len,
- u32 *result, u64 *status, unsigned int timeout)
-{
- bool write = nvme_is_write((struct nvme_command *)vcmd);
- struct nvm_dev *dev = ns->ndev;
- struct request *rq;
- struct bio *bio = NULL;
- __le64 *ppa_list = NULL;
- dma_addr_t ppa_dma;
- __le64 *metadata = NULL;
- dma_addr_t metadata_dma;
- DECLARE_COMPLETION_ONSTACK(wait);
- int ret = 0;
-
- rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0);
- if (IS_ERR(rq)) {
- ret = -ENOMEM;
- goto err_cmd;
- }
-
- if (timeout)
- rq->timeout = timeout;
-
- if (ppa_buf && ppa_len) {
- ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma);
- if (!ppa_list) {
- ret = -ENOMEM;
- goto err_rq;
- }
- if (copy_from_user(ppa_list, (void __user *)ppa_buf,
- sizeof(u64) * (ppa_len + 1))) {
- ret = -EFAULT;
- goto err_ppa;
- }
- vcmd->ph_rw.spba = cpu_to_le64(ppa_dma);
- } else {
- vcmd->ph_rw.spba = cpu_to_le64((uintptr_t)ppa_buf);
- }
-
- if (ubuf && bufflen) {
- ret = blk_rq_map_user(q, rq, NULL, ubuf, bufflen, GFP_KERNEL);
- if (ret)
- goto err_ppa;
- bio = rq->bio;
-
- if (meta_buf && meta_len) {
- metadata = dma_pool_alloc(dev->dma_pool, GFP_KERNEL,
- &metadata_dma);
- if (!metadata) {
- ret = -ENOMEM;
- goto err_map;
- }
-
- if (write) {
- if (copy_from_user(metadata,
- (void __user *)meta_buf,
- meta_len)) {
- ret = -EFAULT;
- goto err_meta;
- }
- }
- vcmd->ph_rw.metadata = cpu_to_le64(metadata_dma);
- }
-
- bio_set_dev(bio, ns->disk->part0);
- }
-
- blk_execute_rq(NULL, rq, 0);
-
- if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
- ret = -EINTR;
- else if (nvme_req(rq)->status & 0x7ff)
- ret = -EIO;
- if (result)
- *result = nvme_req(rq)->status & 0x7ff;
- if (status)
- *status = le64_to_cpu(nvme_req(rq)->result.u64);
-
- if (metadata && !ret && !write) {
- if (copy_to_user(meta_buf, (void *)metadata, meta_len))
- ret = -EFAULT;
- }
-err_meta:
- if (meta_buf && meta_len)
- dma_pool_free(dev->dma_pool, metadata, metadata_dma);
-err_map:
- if (bio)
- blk_rq_unmap_user(bio);
-err_ppa:
- if (ppa_buf && ppa_len)
- dma_pool_free(dev->dma_pool, ppa_list, ppa_dma);
-err_rq:
- blk_mq_free_request(rq);
-err_cmd:
- return ret;
-}
-
-static int nvme_nvm_submit_vio(struct nvme_ns *ns,
- struct nvm_user_vio __user *uvio)
-{
- struct nvm_user_vio vio;
- struct nvme_nvm_command c;
- unsigned int length;
- int ret;
-
- if (copy_from_user(&vio, uvio, sizeof(vio)))
- return -EFAULT;
- if (vio.flags)
- return -EINVAL;
-
- memset(&c, 0, sizeof(c));
- c.ph_rw.opcode = vio.opcode;
- c.ph_rw.nsid = cpu_to_le32(ns->head->ns_id);
- c.ph_rw.control = cpu_to_le16(vio.control);
- c.ph_rw.length = cpu_to_le16(vio.nppas);
-
- length = (vio.nppas + 1) << ns->lba_shift;
-
- ret = nvme_nvm_submit_user_cmd(ns->queue, ns, &c,
- (void __user *)(uintptr_t)vio.addr, length,
- (void __user *)(uintptr_t)vio.metadata,
- vio.metadata_len,
- (void __user *)(uintptr_t)vio.ppa_list, vio.nppas,
- &vio.result, &vio.status, 0);
-
- if (ret && copy_to_user(uvio, &vio, sizeof(vio)))
- return -EFAULT;
-
- return ret;
-}
-
-static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin,
- struct nvm_passthru_vio __user *uvcmd)
-{
- struct nvm_passthru_vio vcmd;
- struct nvme_nvm_command c;
- struct request_queue *q;
- unsigned int timeout = 0;
- int ret;
-
- if (copy_from_user(&vcmd, uvcmd, sizeof(vcmd)))
- return -EFAULT;
- if ((vcmd.opcode != 0xF2) && (!capable(CAP_SYS_ADMIN)))
- return -EACCES;
- if (vcmd.flags)
- return -EINVAL;
-
- memset(&c, 0, sizeof(c));
- c.common.opcode = vcmd.opcode;
- c.common.nsid = cpu_to_le32(ns->head->ns_id);
- c.common.cdw2[0] = cpu_to_le32(vcmd.cdw2);
- c.common.cdw2[1] = cpu_to_le32(vcmd.cdw3);
- /* cdw11-12 */
- c.ph_rw.length = cpu_to_le16(vcmd.nppas);
- c.ph_rw.control = cpu_to_le16(vcmd.control);
- c.common.cdw13 = cpu_to_le32(vcmd.cdw13);
- c.common.cdw14 = cpu_to_le32(vcmd.cdw14);
- c.common.cdw15 = cpu_to_le32(vcmd.cdw15);
-
- if (vcmd.timeout_ms)
- timeout = msecs_to_jiffies(vcmd.timeout_ms);
-
- q = admin ? ns->ctrl->admin_q : ns->queue;
-
- ret = nvme_nvm_submit_user_cmd(q, ns,
- (struct nvme_nvm_command *)&c,
- (void __user *)(uintptr_t)vcmd.addr, vcmd.data_len,
- (void __user *)(uintptr_t)vcmd.metadata,
- vcmd.metadata_len,
- (void __user *)(uintptr_t)vcmd.ppa_list, vcmd.nppas,
- &vcmd.result, &vcmd.status, timeout);
-
- if (ret && copy_to_user(uvcmd, &vcmd, sizeof(vcmd)))
- return -EFAULT;
-
- return ret;
-}
-
-int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *argp)
-{
- switch (cmd) {
- case NVME_NVM_IOCTL_ADMIN_VIO:
- return nvme_nvm_user_vcmd(ns, 1, argp);
- case NVME_NVM_IOCTL_IO_VIO:
- return nvme_nvm_user_vcmd(ns, 0, argp);
- case NVME_NVM_IOCTL_SUBMIT_VIO:
- return nvme_nvm_submit_vio(ns, argp);
- default:
- return -ENOTTY;
- }
-}
-
-int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
-{
- struct request_queue *q = ns->queue;
- struct nvm_dev *dev;
- struct nvm_geo *geo;
-
- _nvme_nvm_check_size();
-
- dev = nvm_alloc_dev(node);
- if (!dev)
- return -ENOMEM;
-
- /* Note that csecs and sos will be overridden if it is a 1.2 drive. */
- geo = &dev->geo;
- geo->csecs = 1 << ns->lba_shift;
- geo->sos = ns->ms;
- if (ns->features & NVME_NS_EXT_LBAS)
- geo->ext = true;
- else
- geo->ext = false;
- geo->mdts = ns->ctrl->max_hw_sectors;
-
- dev->q = q;
- memcpy(dev->name, disk_name, DISK_NAME_LEN);
- dev->ops = &nvme_nvm_dev_ops;
- dev->private_data = ns;
- ns->ndev = dev;
-
- return nvm_register(dev);
-}
-
-void nvme_nvm_unregister(struct nvme_ns *ns)
-{
- nvm_unregister(ns->ndev);
-}
-
-static ssize_t nvm_dev_attr_show(struct device *dev,
- struct device_attribute *dattr, char *page)
-{
- struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
- struct nvm_dev *ndev = ns->ndev;
- struct nvm_geo *geo = &ndev->geo;
- struct attribute *attr;
-
- if (!ndev)
- return 0;
-
- attr = &dattr->attr;
-
- if (strcmp(attr->name, "version") == 0) {
- if (geo->major_ver_id == 1)
- return scnprintf(page, PAGE_SIZE, "%u\n",
- geo->major_ver_id);
- else
- return scnprintf(page, PAGE_SIZE, "%u.%u\n",
- geo->major_ver_id,
- geo->minor_ver_id);
- } else if (strcmp(attr->name, "capabilities") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->cap);
- } else if (strcmp(attr->name, "read_typ") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdt);
- } else if (strcmp(attr->name, "read_max") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdm);
- } else {
- return scnprintf(page,
- PAGE_SIZE,
- "Unhandled attr(%s) in `%s`\n",
- attr->name, __func__);
- }
-}
-
-static ssize_t nvm_dev_attr_show_ppaf(struct nvm_addrf_12 *ppaf, char *page)
-{
- return scnprintf(page, PAGE_SIZE,
- "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
- ppaf->ch_offset, ppaf->ch_len,
- ppaf->lun_offset, ppaf->lun_len,
- ppaf->pln_offset, ppaf->pln_len,
- ppaf->blk_offset, ppaf->blk_len,
- ppaf->pg_offset, ppaf->pg_len,
- ppaf->sec_offset, ppaf->sec_len);
-}
-
-static ssize_t nvm_dev_attr_show_12(struct device *dev,
- struct device_attribute *dattr, char *page)
-{
- struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
- struct nvm_dev *ndev = ns->ndev;
- struct nvm_geo *geo = &ndev->geo;
- struct attribute *attr;
-
- if (!ndev)
- return 0;
-
- attr = &dattr->attr;
-
- if (strcmp(attr->name, "vendor_opcode") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->vmnt);
- } else if (strcmp(attr->name, "device_mode") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->dom);
- /* kept for compatibility */
- } else if (strcmp(attr->name, "media_manager") == 0) {
- return scnprintf(page, PAGE_SIZE, "%s\n", "gennvm");
- } else if (strcmp(attr->name, "ppa_format") == 0) {
- return nvm_dev_attr_show_ppaf((void *)&geo->addrf, page);
- } else if (strcmp(attr->name, "media_type") == 0) { /* u8 */
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->mtype);
- } else if (strcmp(attr->name, "flash_media_type") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->fmtype);
- } else if (strcmp(attr->name, "num_channels") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch);
- } else if (strcmp(attr->name, "num_luns") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun);
- } else if (strcmp(attr->name, "num_planes") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pln);
- } else if (strcmp(attr->name, "num_blocks") == 0) { /* u16 */
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk);
- } else if (strcmp(attr->name, "num_pages") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pg);
- } else if (strcmp(attr->name, "page_size") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->fpg_sz);
- } else if (strcmp(attr->name, "hw_sector_size") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->csecs);
- } else if (strcmp(attr->name, "oob_sector_size") == 0) {/* u32 */
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->sos);
- } else if (strcmp(attr->name, "prog_typ") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt);
- } else if (strcmp(attr->name, "prog_max") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm);
- } else if (strcmp(attr->name, "erase_typ") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet);
- } else if (strcmp(attr->name, "erase_max") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem);
- } else if (strcmp(attr->name, "multiplane_modes") == 0) {
- return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mpos);
- } else if (strcmp(attr->name, "media_capabilities") == 0) {
- return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mccap);
- } else if (strcmp(attr->name, "max_phys_secs") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", NVM_MAX_VLBA);
- } else {
- return scnprintf(page, PAGE_SIZE,
- "Unhandled attr(%s) in `%s`\n",
- attr->name, __func__);
- }
-}
-
-static ssize_t nvm_dev_attr_show_20(struct device *dev,
- struct device_attribute *dattr, char *page)
-{
- struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
- struct nvm_dev *ndev = ns->ndev;
- struct nvm_geo *geo = &ndev->geo;
- struct attribute *attr;
-
- if (!ndev)
- return 0;
-
- attr = &dattr->attr;
-
- if (strcmp(attr->name, "groups") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch);
- } else if (strcmp(attr->name, "punits") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun);
- } else if (strcmp(attr->name, "chunks") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk);
- } else if (strcmp(attr->name, "clba") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->clba);
- } else if (strcmp(attr->name, "ws_min") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_min);
- } else if (strcmp(attr->name, "ws_opt") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_opt);
- } else if (strcmp(attr->name, "maxoc") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxoc);
- } else if (strcmp(attr->name, "maxocpu") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxocpu);
- } else if (strcmp(attr->name, "mw_cunits") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->mw_cunits);
- } else if (strcmp(attr->name, "write_typ") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt);
- } else if (strcmp(attr->name, "write_max") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm);
- } else if (strcmp(attr->name, "reset_typ") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet);
- } else if (strcmp(attr->name, "reset_max") == 0) {
- return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem);
- } else {
- return scnprintf(page, PAGE_SIZE,
- "Unhandled attr(%s) in `%s`\n",
- attr->name, __func__);
- }
-}
-
-#define NVM_DEV_ATTR_RO(_name) \
- DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show, NULL)
-#define NVM_DEV_ATTR_12_RO(_name) \
- DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_12, NULL)
-#define NVM_DEV_ATTR_20_RO(_name) \
- DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_20, NULL)
-
-/* general attributes */
-static NVM_DEV_ATTR_RO(version);
-static NVM_DEV_ATTR_RO(capabilities);
-
-static NVM_DEV_ATTR_RO(read_typ);
-static NVM_DEV_ATTR_RO(read_max);
-
-/* 1.2 values */
-static NVM_DEV_ATTR_12_RO(vendor_opcode);
-static NVM_DEV_ATTR_12_RO(device_mode);
-static NVM_DEV_ATTR_12_RO(ppa_format);
-static NVM_DEV_ATTR_12_RO(media_manager);
-static NVM_DEV_ATTR_12_RO(media_type);
-static NVM_DEV_ATTR_12_RO(flash_media_type);
-static NVM_DEV_ATTR_12_RO(num_channels);
-static NVM_DEV_ATTR_12_RO(num_luns);
-static NVM_DEV_ATTR_12_RO(num_planes);
-static NVM_DEV_ATTR_12_RO(num_blocks);
-static NVM_DEV_ATTR_12_RO(num_pages);
-static NVM_DEV_ATTR_12_RO(page_size);
-static NVM_DEV_ATTR_12_RO(hw_sector_size);
-static NVM_DEV_ATTR_12_RO(oob_sector_size);
-static NVM_DEV_ATTR_12_RO(prog_typ);
-static NVM_DEV_ATTR_12_RO(prog_max);
-static NVM_DEV_ATTR_12_RO(erase_typ);
-static NVM_DEV_ATTR_12_RO(erase_max);
-static NVM_DEV_ATTR_12_RO(multiplane_modes);
-static NVM_DEV_ATTR_12_RO(media_capabilities);
-static NVM_DEV_ATTR_12_RO(max_phys_secs);
-
-/* 2.0 values */
-static NVM_DEV_ATTR_20_RO(groups);
-static NVM_DEV_ATTR_20_RO(punits);
-static NVM_DEV_ATTR_20_RO(chunks);
-static NVM_DEV_ATTR_20_RO(clba);
-static NVM_DEV_ATTR_20_RO(ws_min);
-static NVM_DEV_ATTR_20_RO(ws_opt);
-static NVM_DEV_ATTR_20_RO(maxoc);
-static NVM_DEV_ATTR_20_RO(maxocpu);
-static NVM_DEV_ATTR_20_RO(mw_cunits);
-static NVM_DEV_ATTR_20_RO(write_typ);
-static NVM_DEV_ATTR_20_RO(write_max);
-static NVM_DEV_ATTR_20_RO(reset_typ);
-static NVM_DEV_ATTR_20_RO(reset_max);
-
-static struct attribute *nvm_dev_attrs[] = {
- /* version agnostic attrs */
- &dev_attr_version.attr,
- &dev_attr_capabilities.attr,
- &dev_attr_read_typ.attr,
- &dev_attr_read_max.attr,
-
- /* 1.2 attrs */
- &dev_attr_vendor_opcode.attr,
- &dev_attr_device_mode.attr,
- &dev_attr_media_manager.attr,
- &dev_attr_ppa_format.attr,
- &dev_attr_media_type.attr,
- &dev_attr_flash_media_type.attr,
- &dev_attr_num_channels.attr,
- &dev_attr_num_luns.attr,
- &dev_attr_num_planes.attr,
- &dev_attr_num_blocks.attr,
- &dev_attr_num_pages.attr,
- &dev_attr_page_size.attr,
- &dev_attr_hw_sector_size.attr,
- &dev_attr_oob_sector_size.attr,
- &dev_attr_prog_typ.attr,
- &dev_attr_prog_max.attr,
- &dev_attr_erase_typ.attr,
- &dev_attr_erase_max.attr,
- &dev_attr_multiplane_modes.attr,
- &dev_attr_media_capabilities.attr,
- &dev_attr_max_phys_secs.attr,
-
- /* 2.0 attrs */
- &dev_attr_groups.attr,
- &dev_attr_punits.attr,
- &dev_attr_chunks.attr,
- &dev_attr_clba.attr,
- &dev_attr_ws_min.attr,
- &dev_attr_ws_opt.attr,
- &dev_attr_maxoc.attr,
- &dev_attr_maxocpu.attr,
- &dev_attr_mw_cunits.attr,
-
- &dev_attr_write_typ.attr,
- &dev_attr_write_max.attr,
- &dev_attr_reset_typ.attr,
- &dev_attr_reset_max.attr,
-
- NULL,
-};
-
-static umode_t nvm_dev_attrs_visible(struct kobject *kobj,
- struct attribute *attr, int index)
-{
- struct device *dev = kobj_to_dev(kobj);
- struct gendisk *disk = dev_to_disk(dev);
- struct nvme_ns *ns = disk->private_data;
- struct nvm_dev *ndev = ns->ndev;
- struct device_attribute *dev_attr =
- container_of(attr, typeof(*dev_attr), attr);
-
- if (!ndev)
- return 0;
-
- if (dev_attr->show == nvm_dev_attr_show)
- return attr->mode;
-
- switch (ndev->geo.major_ver_id) {
- case 1:
- if (dev_attr->show == nvm_dev_attr_show_12)
- return attr->mode;
- break;
- case 2:
- if (dev_attr->show == nvm_dev_attr_show_20)
- return attr->mode;
- break;
- }
-
- return 0;
-}
-
-const struct attribute_group nvme_nvm_attr_group = {
- .name = "lightnvm",
- .attrs = nvm_dev_attrs,
- .is_visible = nvm_dev_attrs_visible,
-};
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 5cd1fa3b8464..a2e1f298b217 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -11,7 +11,6 @@
#include <linux/pci.h>
#include <linux/kref.h>
#include <linux/blk-mq.h>
-#include <linux/lightnvm.h>
#include <linux/sed-opal.h>
#include <linux/fault-inject.h>
#include <linux/rcupdate.h>
@@ -48,11 +47,6 @@ extern struct workqueue_struct *nvme_wq;
extern struct workqueue_struct *nvme_reset_wq;
extern struct workqueue_struct *nvme_delete_wq;
-enum {
- NVME_NS_LBA = 0,
- NVME_NS_LIGHTNVM = 1,
-};
-
/*
* List of workarounds for devices that required behavior not specified in
* the standard.
@@ -93,11 +87,6 @@ enum nvme_quirks {
NVME_QUIRK_NO_DEEPEST_PS = (1 << 5),
/*
- * Supports the LighNVM command set if indicated in vs[1].
- */
- NVME_QUIRK_LIGHTNVM = (1 << 6),
-
- /*
* Set MEDIUM priority on SQ creation
*/
NVME_QUIRK_MEDIUM_PRIO_SQ = (1 << 7),
@@ -158,6 +147,7 @@ enum nvme_quirks {
struct nvme_request {
struct nvme_command *cmd;
union nvme_result result;
+ u8 genctr;
u8 retries;
u8 flags;
u16 status;
@@ -449,7 +439,6 @@ struct nvme_ns {
u32 ana_grpid;
#endif
struct list_head siblings;
- struct nvm_dev *ndev;
struct kref kref;
struct nvme_ns_head *head;
@@ -497,6 +486,49 @@ struct nvme_ctrl_ops {
int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
};
+/*
+ * nvme command_id is constructed as such:
+ * | xxxx | xxxxxxxxxxxx |
+ * gen request tag
+ */
+#define nvme_genctr_mask(gen) (gen & 0xf)
+#define nvme_cid_install_genctr(gen) (nvme_genctr_mask(gen) << 12)
+#define nvme_genctr_from_cid(cid) ((cid & 0xf000) >> 12)
+#define nvme_tag_from_cid(cid) (cid & 0xfff)
+
+static inline u16 nvme_cid(struct request *rq)
+{
+ return nvme_cid_install_genctr(nvme_req(rq)->genctr) | rq->tag;
+}
+
+static inline struct request *nvme_find_rq(struct blk_mq_tags *tags,
+ u16 command_id)
+{
+ u8 genctr = nvme_genctr_from_cid(command_id);
+ u16 tag = nvme_tag_from_cid(command_id);
+ struct request *rq;
+
+ rq = blk_mq_tag_to_rq(tags, tag);
+ if (unlikely(!rq)) {
+ pr_err("could not locate request for tag %#x\n",
+ tag);
+ return NULL;
+ }
+ if (unlikely(nvme_genctr_mask(nvme_req(rq)->genctr) != genctr)) {
+ dev_err(nvme_req(rq)->ctrl->device,
+ "request %#x genctr mismatch (got %#x expected %#x)\n",
+ tag, genctr, nvme_genctr_mask(nvme_req(rq)->genctr));
+ return NULL;
+ }
+ return rq;
+}
+
+static inline struct request *nvme_cid_to_rq(struct blk_mq_tags *tags,
+ u16 command_id)
+{
+ return blk_mq_tag_to_rq(tags, nvme_tag_from_cid(command_id));
+}
+
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj,
const char *dev_name);
@@ -594,7 +626,8 @@ static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl)
static inline bool nvme_is_aen_req(u16 qid, __u16 command_id)
{
- return !qid && command_id >= NVME_AQ_BLK_MQ_DEPTH;
+ return !qid &&
+ nvme_tag_from_cid(command_id) >= NVME_AQ_BLK_MQ_DEPTH;
}
void nvme_complete_rq(struct request *req);
@@ -823,26 +856,6 @@ static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
}
#endif
-#ifdef CONFIG_NVM
-int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
-void nvme_nvm_unregister(struct nvme_ns *ns);
-extern const struct attribute_group nvme_nvm_attr_group;
-int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *argp);
-#else
-static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name,
- int node)
-{
- return 0;
-}
-
-static inline void nvme_nvm_unregister(struct nvme_ns *ns) {};
-static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd,
- void __user *argp)
-{
- return -ENOTTY;
-}
-#endif /* CONFIG_NVM */
-
static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
{
return dev_to_disk(dev)->private_data;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 51852085239e..b82492cd7503 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -60,6 +60,8 @@ MODULE_PARM_DESC(sgl_threshold,
"Use SGLs when average request segment size is larger or equal to "
"this size. Use 0 to disable SGLs.");
+#define NVME_PCI_MIN_QUEUE_SIZE 2
+#define NVME_PCI_MAX_QUEUE_SIZE 4095
static int io_queue_depth_set(const char *val, const struct kernel_param *kp);
static const struct kernel_param_ops io_queue_depth_ops = {
.set = io_queue_depth_set,
@@ -68,7 +70,7 @@ static const struct kernel_param_ops io_queue_depth_ops = {
static unsigned int io_queue_depth = 1024;
module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
-MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
+MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2 and < 4096");
static int io_queue_count_set(const char *val, const struct kernel_param *kp)
{
@@ -135,6 +137,7 @@ struct nvme_dev {
u32 cmbloc;
struct nvme_ctrl ctrl;
u32 last_ps;
+ bool hmb;
mempool_t *iod_mempool;
@@ -153,18 +156,14 @@ struct nvme_dev {
unsigned int nr_allocated_queues;
unsigned int nr_write_queues;
unsigned int nr_poll_queues;
+
+ bool attrs_added;
};
static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
{
- int ret;
- u32 n;
-
- ret = kstrtou32(val, 10, &n);
- if (ret != 0 || n < 2)
- return -EINVAL;
-
- return param_set_uint(val, kp);
+ return param_set_uint_minmax(val, kp, NVME_PCI_MIN_QUEUE_SIZE,
+ NVME_PCI_MAX_QUEUE_SIZE);
}
static inline unsigned int sq_idx(unsigned int qid, u32 stride)
@@ -1014,7 +1013,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
return;
}
- req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), command_id);
+ req = nvme_find_rq(nvme_queue_tagset(nvmeq), command_id);
if (unlikely(!req)) {
dev_warn(nvmeq->dev->ctrl.device,
"invalid id %d completed on queue %d\n",
@@ -1808,17 +1807,6 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
return ret >= 0 ? 0 : ret;
}
-static ssize_t nvme_cmb_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
-
- return scnprintf(buf, PAGE_SIZE, "cmbloc : x%08x\ncmbsz : x%08x\n",
- ndev->cmbloc, ndev->cmbsz);
-}
-static DEVICE_ATTR(cmb, S_IRUGO, nvme_cmb_show, NULL);
-
static u64 nvme_cmb_size_unit(struct nvme_dev *dev)
{
u8 szu = (dev->cmbsz >> NVME_CMBSZ_SZU_SHIFT) & NVME_CMBSZ_SZU_MASK;
@@ -1887,20 +1875,6 @@ static void nvme_map_cmb(struct nvme_dev *dev)
if ((dev->cmbsz & (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) ==
(NVME_CMBSZ_WDS | NVME_CMBSZ_RDS))
pci_p2pmem_publish(pdev, true);
-
- if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
- &dev_attr_cmb.attr, NULL))
- dev_warn(dev->ctrl.device,
- "failed to add sysfs attribute for CMB\n");
-}
-
-static inline void nvme_release_cmb(struct nvme_dev *dev)
-{
- if (dev->cmb_size) {
- sysfs_remove_file_from_group(&dev->ctrl.device->kobj,
- &dev_attr_cmb.attr, NULL);
- dev->cmb_size = 0;
- }
}
static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
@@ -1923,7 +1897,9 @@ static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
dev_warn(dev->ctrl.device,
"failed to set host mem (err %d, flags %#x).\n",
ret, bits);
- }
+ } else
+ dev->hmb = bits & NVME_HOST_MEM_ENABLE;
+
return ret;
}
@@ -2080,6 +2056,102 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
return ret;
}
+static ssize_t cmb_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
+
+ return sysfs_emit(buf, "cmbloc : x%08x\ncmbsz : x%08x\n",
+ ndev->cmbloc, ndev->cmbsz);
+}
+static DEVICE_ATTR_RO(cmb);
+
+static ssize_t cmbloc_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
+
+ return sysfs_emit(buf, "%u\n", ndev->cmbloc);
+}
+static DEVICE_ATTR_RO(cmbloc);
+
+static ssize_t cmbsz_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
+
+ return sysfs_emit(buf, "%u\n", ndev->cmbsz);
+}
+static DEVICE_ATTR_RO(cmbsz);
+
+static ssize_t hmb_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
+
+ return sysfs_emit(buf, "%d\n", ndev->hmb);
+}
+
+static ssize_t hmb_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
+ bool new;
+ int ret;
+
+ if (strtobool(buf, &new) < 0)
+ return -EINVAL;
+
+ if (new == ndev->hmb)
+ return count;
+
+ if (new) {
+ ret = nvme_setup_host_mem(ndev);
+ } else {
+ ret = nvme_set_host_mem(ndev, 0);
+ if (!ret)
+ nvme_free_host_mem(ndev);
+ }
+
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+static DEVICE_ATTR_RW(hmb);
+
+static umode_t nvme_pci_attrs_are_visible(struct kobject *kobj,
+ struct attribute *a, int n)
+{
+ struct nvme_ctrl *ctrl =
+ dev_get_drvdata(container_of(kobj, struct device, kobj));
+ struct nvme_dev *dev = to_nvme_dev(ctrl);
+
+ if (a == &dev_attr_cmb.attr ||
+ a == &dev_attr_cmbloc.attr ||
+ a == &dev_attr_cmbsz.attr) {
+ if (!dev->cmbsz)
+ return 0;
+ }
+ if (a == &dev_attr_hmb.attr && !ctrl->hmpre)
+ return 0;
+
+ return a->mode;
+}
+
+static struct attribute *nvme_pci_attrs[] = {
+ &dev_attr_cmb.attr,
+ &dev_attr_cmbloc.attr,
+ &dev_attr_cmbsz.attr,
+ &dev_attr_hmb.attr,
+ NULL,
+};
+
+static const struct attribute_group nvme_pci_attr_group = {
+ .attrs = nvme_pci_attrs,
+ .is_visible = nvme_pci_attrs_are_visible,
+};
+
/*
* nirqs is the number of interrupts available for write and read
* queues. The core already reserved an interrupt for the admin queue.
@@ -2751,6 +2823,10 @@ static void nvme_reset_work(struct work_struct *work)
goto out;
}
+ if (!dev->attrs_added && !sysfs_create_group(&dev->ctrl.device->kobj,
+ &nvme_pci_attr_group))
+ dev->attrs_added = true;
+
nvme_start_ctrl(&dev->ctrl);
return;
@@ -2999,6 +3075,13 @@ static void nvme_shutdown(struct pci_dev *pdev)
nvme_disable_prepare_reset(dev, true);
}
+static void nvme_remove_attrs(struct nvme_dev *dev)
+{
+ if (dev->attrs_added)
+ sysfs_remove_group(&dev->ctrl.device->kobj,
+ &nvme_pci_attr_group);
+}
+
/*
* The driver's remove may be called on a device in a partially initialized
* state. This function must not have any dependencies on the device state in
@@ -3020,7 +3103,7 @@ static void nvme_remove(struct pci_dev *pdev)
nvme_stop_ctrl(&dev->ctrl);
nvme_remove_namespaces(&dev->ctrl);
nvme_dev_disable(dev, true);
- nvme_release_cmb(dev);
+ nvme_remove_attrs(dev);
nvme_free_host_mem(dev);
nvme_dev_remove_admin(dev);
nvme_free_queues(dev, 0);
@@ -3047,8 +3130,13 @@ static int nvme_resume(struct device *dev)
if (ndev->last_ps == U32_MAX ||
nvme_set_power_state(ctrl, ndev->last_ps) != 0)
- return nvme_try_sched_reset(&ndev->ctrl);
+ goto reset;
+ if (ctrl->hmpre && nvme_setup_host_mem(ndev))
+ goto reset;
+
return 0;
+reset:
+ return nvme_try_sched_reset(ctrl);
}
static int nvme_suspend(struct device *dev)
@@ -3072,15 +3160,9 @@ static int nvme_suspend(struct device *dev)
* the PCI bus layer to put it into D3 in order to take the PCIe link
* down, so as to allow the platform to achieve its minimum low-power
* state (which may not be possible if the link is up).
- *
- * If a host memory buffer is enabled, shut down the device as the NVMe
- * specification allows the device to access the host memory buffer in
- * host DRAM from all power states, but hosts will fail access to DRAM
- * during S3.
*/
if (pm_suspend_via_firmware() || !ctrl->npss ||
!pcie_aspm_enabled(pdev) ||
- ndev->nr_host_mem_descs ||
(ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND))
return nvme_disable_prepare_reset(ndev, true);
@@ -3091,6 +3173,17 @@ static int nvme_suspend(struct device *dev)
if (ctrl->state != NVME_CTRL_LIVE)
goto unfreeze;
+ /*
+ * Host memory access may not be successful in a system suspend state,
+ * but the specification allows the controller to access memory in a
+ * non-operational power state.
+ */
+ if (ndev->hmb) {
+ ret = nvme_set_host_mem(ndev, 0);
+ if (ret < 0)
+ goto unfreeze;
+ }
+
ret = nvme_get_power_state(ctrl, &ndev->last_ps);
if (ret < 0)
goto unfreeze;
@@ -3243,12 +3336,6 @@ static const struct pci_device_id nvme_id_table[] = {
{ PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */
.driver_data = NVME_QUIRK_NO_NS_DESC_LIST |
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
- { PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */
- .driver_data = NVME_QUIRK_LIGHTNVM, },
- { PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */
- .driver_data = NVME_QUIRK_LIGHTNVM, },
- { PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */
- .driver_data = NVME_QUIRK_LIGHTNVM, },
{ PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */
.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 7f6b3a991501..a68704e39084 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -735,13 +735,13 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
if (ret)
return ret;
- ctrl->ctrl.queue_count = nr_io_queues + 1;
- if (ctrl->ctrl.queue_count < 2) {
+ if (nr_io_queues == 0) {
dev_err(ctrl->ctrl.device,
"unable to set any I/O queues\n");
return -ENOMEM;
}
+ ctrl->ctrl.queue_count = nr_io_queues + 1;
dev_info(ctrl->ctrl.device,
"creating %d I/O queues.\n", nr_io_queues);
@@ -1730,10 +1730,10 @@ static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
struct request *rq;
struct nvme_rdma_request *req;
- rq = blk_mq_tag_to_rq(nvme_rdma_tagset(queue), cqe->command_id);
+ rq = nvme_find_rq(nvme_rdma_tagset(queue), cqe->command_id);
if (!rq) {
dev_err(queue->ctrl->ctrl.device,
- "tag 0x%x on QP %#x not found\n",
+ "got bad command_id %#x on QP %#x\n",
cqe->command_id, queue->qp->qp_num);
nvme_rdma_error_recovery(queue->ctrl);
return;
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 8cb15ee5b249..645025620154 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -487,11 +487,11 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
{
struct request *rq;
- rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), cqe->command_id);
+ rq = nvme_find_rq(nvme_tcp_tagset(queue), cqe->command_id);
if (!rq) {
dev_err(queue->ctrl->ctrl.device,
- "queue %d tag 0x%x not found\n",
- nvme_tcp_queue_id(queue), cqe->command_id);
+ "got bad cqe.command_id %#x on queue %d\n",
+ cqe->command_id, nvme_tcp_queue_id(queue));
nvme_tcp_error_recovery(&queue->ctrl->ctrl);
return -EINVAL;
}
@@ -508,11 +508,11 @@ static int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue,
{
struct request *rq;
- rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
+ rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id);
if (!rq) {
dev_err(queue->ctrl->ctrl.device,
- "queue %d tag %#x not found\n",
- nvme_tcp_queue_id(queue), pdu->command_id);
+ "got bad c2hdata.command_id %#x on queue %d\n",
+ pdu->command_id, nvme_tcp_queue_id(queue));
return -ENOENT;
}
@@ -606,7 +606,7 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
data->hdr.plen =
cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst);
data->ttag = pdu->ttag;
- data->command_id = rq->tag;
+ data->command_id = nvme_cid(rq);
data->data_offset = cpu_to_le32(req->data_sent);
data->data_length = cpu_to_le32(req->pdu_len);
return 0;
@@ -619,11 +619,11 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
struct request *rq;
int ret;
- rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
+ rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id);
if (!rq) {
dev_err(queue->ctrl->ctrl.device,
- "queue %d tag %#x not found\n",
- nvme_tcp_queue_id(queue), pdu->command_id);
+ "got bad r2t.command_id %#x on queue %d\n",
+ pdu->command_id, nvme_tcp_queue_id(queue));
return -ENOENT;
}
req = blk_mq_rq_to_pdu(rq);
@@ -702,17 +702,9 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
unsigned int *offset, size_t *len)
{
struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu;
- struct nvme_tcp_request *req;
- struct request *rq;
-
- rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
- if (!rq) {
- dev_err(queue->ctrl->ctrl.device,
- "queue %d tag %#x not found\n",
- nvme_tcp_queue_id(queue), pdu->command_id);
- return -ENOENT;
- }
- req = blk_mq_rq_to_pdu(rq);
+ struct request *rq =
+ nvme_cid_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
+ struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
while (true) {
int recv_len, ret;
@@ -804,8 +796,8 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
}
if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
- struct request *rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue),
- pdu->command_id);
+ struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue),
+ pdu->command_id);
nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
queue->nr_cqe++;
@@ -1228,6 +1220,7 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
sock_release(queue->sock);
kfree(queue->pdu);
+ mutex_destroy(&queue->send_mutex);
mutex_destroy(&queue->queue_lock);
}
@@ -1533,6 +1526,7 @@ err_sock:
sock_release(queue->sock);
queue->sock = NULL;
err_destroy_mutex:
+ mutex_destroy(&queue->send_mutex);
mutex_destroy(&queue->queue_lock);
return ret;
}
@@ -1769,13 +1763,13 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
if (ret)
return ret;
- ctrl->queue_count = nr_io_queues + 1;
- if (ctrl->queue_count < 2) {
+ if (nr_io_queues == 0) {
dev_err(ctrl->device,
"unable to set any I/O queues\n");
return -ENOMEM;
}
+ ctrl->queue_count = nr_io_queues + 1;
dev_info(ctrl->device,
"creating %d I/O queues.\n", nr_io_queues);
diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c
index 6543015b6121..2a89c5aa0790 100644
--- a/drivers/nvme/host/trace.c
+++ b/drivers/nvme/host/trace.c
@@ -72,6 +72,20 @@ static const char *nvme_trace_admin_identify(struct trace_seq *p, u8 *cdw10)
return ret;
}
+static const char *nvme_trace_admin_set_features(struct trace_seq *p,
+ u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u8 fid = cdw10[0];
+ u8 sv = cdw10[3] & 0x8;
+ u32 cdw11 = get_unaligned_le32(cdw10 + 4);
+
+ trace_seq_printf(p, "fid=0x%x, sv=0x%x, cdw11=0x%x", fid, sv, cdw11);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
static const char *nvme_trace_admin_get_features(struct trace_seq *p,
u8 *cdw10)
{
@@ -80,7 +94,7 @@ static const char *nvme_trace_admin_get_features(struct trace_seq *p,
u8 sel = cdw10[1] & 0x7;
u32 cdw11 = get_unaligned_le32(cdw10 + 4);
- trace_seq_printf(p, "fid=0x%x sel=0x%x cdw11=0x%x", fid, sel, cdw11);
+ trace_seq_printf(p, "fid=0x%x, sel=0x%x, cdw11=0x%x", fid, sel, cdw11);
trace_seq_putc(p, 0);
return ret;
@@ -201,6 +215,8 @@ const char *nvme_trace_parse_admin_cmd(struct trace_seq *p,
return nvme_trace_create_cq(p, cdw10);
case nvme_admin_identify:
return nvme_trace_admin_identify(p, cdw10);
+ case nvme_admin_set_features:
+ return nvme_trace_admin_set_features(p, cdw10);
case nvme_admin_get_features:
return nvme_trace_admin_get_features(p, cdw10);
case nvme_admin_get_lba_status:
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index 4be2ececbc45..973561c93888 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -31,7 +31,6 @@ config NVME_TARGET_PASSTHRU
config NVME_TARGET_LOOP
tristate "NVMe loopback device support"
depends on NVME_TARGET
- select NVME_CORE
select NVME_FABRICS
select SG_POOL
help
@@ -65,7 +64,6 @@ config NVME_TARGET_FC
config NVME_TARGET_FCLOOP
tristate "NVMe over Fabrics FC Transport Loopback Test driver"
depends on NVME_TARGET
- select NVME_CORE
select NVME_FABRICS
select SG_POOL
depends on NVME_FC
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index ac7210a3ea1c..66d05eecc2a9 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -802,6 +802,7 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
* controller teardown as a result of a keep-alive expiration.
*/
ctrl->reset_tbkas = true;
+ sq->ctrl->sqs[sq->qid] = NULL;
nvmet_ctrl_put(ctrl);
sq->ctrl = NULL; /* allows reusing the queue later */
}
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index 7d0f3523fdab..7d0454cee920 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -109,21 +109,38 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
u16 qid = le16_to_cpu(c->qid);
u16 sqsize = le16_to_cpu(c->sqsize);
struct nvmet_ctrl *old;
+ u16 mqes = NVME_CAP_MQES(ctrl->cap);
u16 ret;
- old = cmpxchg(&req->sq->ctrl, NULL, ctrl);
- if (old) {
- pr_warn("queue already connected!\n");
- req->error_loc = offsetof(struct nvmf_connect_command, opcode);
- return NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
- }
if (!sqsize) {
pr_warn("queue size zero!\n");
req->error_loc = offsetof(struct nvmf_connect_command, sqsize);
+ req->cqe->result.u32 = IPO_IATTR_CONNECT_SQE(sqsize);
ret = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
goto err;
}
+ if (ctrl->sqs[qid] != NULL) {
+ pr_warn("qid %u has already been created\n", qid);
+ req->error_loc = offsetof(struct nvmf_connect_command, qid);
+ return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
+ }
+
+ if (sqsize > mqes) {
+ pr_warn("sqsize %u is larger than MQES supported %u cntlid %d\n",
+ sqsize, mqes, ctrl->cntlid);
+ req->error_loc = offsetof(struct nvmf_connect_command, sqsize);
+ req->cqe->result.u32 = IPO_IATTR_CONNECT_SQE(sqsize);
+ return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+ }
+
+ old = cmpxchg(&req->sq->ctrl, NULL, ctrl);
+ if (old) {
+ pr_warn("queue already connected!\n");
+ req->error_loc = offsetof(struct nvmf_connect_command, opcode);
+ return NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
+ }
+
/* note: convert queue size from 0's-based value to 1's-based value */
nvmet_cq_setup(ctrl, req->cq, qid, sqsize + 1);
nvmet_sq_setup(ctrl, req->sq, qid, sqsize + 1);
@@ -138,6 +155,7 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
if (ret) {
pr_err("failed to install queue %d cntlid %d ret %x\n",
qid, ctrl->cntlid, ret);
+ ctrl->sqs[qid] = NULL;
goto err;
}
}
@@ -260,11 +278,11 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
}
status = nvmet_install_queue(ctrl, req);
- if (status) {
- /* pass back cntlid that had the issue of installing queue */
- req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
+ if (status)
goto out_ctrl_put;
- }
+
+ /* pass back cntlid for successful completion */
+ req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid);
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 3a17a7e26bbf..0285ccc7541f 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -107,10 +107,10 @@ static void nvme_loop_queue_response(struct nvmet_req *req)
} else {
struct request *rq;
- rq = blk_mq_tag_to_rq(nvme_loop_tagset(queue), cqe->command_id);
+ rq = nvme_find_rq(nvme_loop_tagset(queue), cqe->command_id);
if (!rq) {
dev_err(queue->ctrl->ctrl.device,
- "tag 0x%x on queue %d not found\n",
+ "got bad command_id %#x on queue %d\n",
cqe->command_id, nvme_loop_queue_idx(queue));
return;
}
diff --git a/drivers/nvme/target/trace.c b/drivers/nvme/target/trace.c
index 1373a3c67962..bff454d46255 100644
--- a/drivers/nvme/target/trace.c
+++ b/drivers/nvme/target/trace.c
@@ -27,7 +27,7 @@ static const char *nvmet_trace_admin_get_features(struct trace_seq *p,
u8 sel = cdw10[1] & 0x7;
u32 cdw11 = get_unaligned_le32(cdw10 + 4);
- trace_seq_printf(p, "fid=0x%x sel=0x%x cdw11=0x%x", fid, sel, cdw11);
+ trace_seq_printf(p, "fid=0x%x, sel=0x%x, cdw11=0x%x", fid, sel, cdw11);
trace_seq_putc(p, 0);
return ret;
@@ -49,6 +49,20 @@ static const char *nvmet_trace_get_lba_status(struct trace_seq *p,
return ret;
}
+static const char *nvmet_trace_admin_set_features(struct trace_seq *p,
+ u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u8 fid = cdw10[0];
+ u8 sv = cdw10[3] & 0x8;
+ u32 cdw11 = get_unaligned_le32(cdw10 + 4);
+
+ trace_seq_printf(p, "fid=0x%x, sv=0x%x, cdw11=0x%x", fid, sv, cdw11);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
static const char *nvmet_trace_read_write(struct trace_seq *p, u8 *cdw10)
{
const char *ret = trace_seq_buffer_ptr(p);
@@ -94,6 +108,8 @@ const char *nvmet_trace_parse_admin_cmd(struct trace_seq *p,
switch (opcode) {
case nvme_admin_identify:
return nvmet_trace_admin_identify(p, cdw10);
+ case nvme_admin_set_features:
+ return nvmet_trace_admin_set_features(p, cdw10);
case nvme_admin_get_features:
return nvmet_trace_admin_get_features(p, cdw10);
case nvme_admin_get_lba_status:
diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
index 17f8b7a45f21..46bc30fe85d2 100644
--- a/drivers/nvme/target/zns.c
+++ b/drivers/nvme/target/zns.c
@@ -115,14 +115,11 @@ void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
}
status = nvmet_req_find_ns(req);
- if (status) {
- status = NVME_SC_INTERNAL;
+ if (status)
goto done;
- }
if (!bdev_is_zoned(req->ns->bdev)) {
req->error_loc = offsetof(struct nvme_identify, nsid);
- status = NVME_SC_INVALID_NS | NVME_SC_DNR;
goto done;
}