diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-08-31 04:01:46 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-08-31 04:01:46 +0200 |
commit | 9a1d6c9e3f53732f2f48f4424e028642db616663 (patch) | |
tree | 5308c856a8b7a56b705cde940eb946ee88e1ca88 /drivers | |
parent | Merge tag 'for-5.15/block-2021-08-30' of git://git.kernel.dk/linux-block (diff) | |
parent | Merge tag 'floppy-for-5.15' of https://github.com/evdenis/linux-floppy into f... (diff) | |
download | linux-9a1d6c9e3f53732f2f48f4424e028642db616663.tar.xz linux-9a1d6c9e3f53732f2f48f4424e028642db616663.zip |
Merge tag 'for-5.15/drivers-2021-08-30' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe:
"Sitting on top of the core block changes, here are the driver changes
for the 5.15 merge window:
- NVMe updates via Christoph:
- suspend improvements for devices with an HMB (Keith Busch)
- handle double completions more gacefull (Sagi Grimberg)
- cleanup the selects for the nvme core code a bit (Sagi Grimberg)
- don't update queue count when failing to set io queues (Ruozhu Li)
- various nvmet connect fixes (Amit Engel)
- cleanup lightnvm leftovers (Keith Busch, me)
- small cleanups (Colin Ian King, Hou Pu)
- add tracing for the Set Features command (Hou Pu)
- CMB sysfs cleanups (Keith Busch)
- add a mutex_destroy call (Keith Busch)
- remove lightnvm subsystem. It's served its purpose and ultimately
led to zoned nvme support, we no longer need it (Christoph)
- revert floppy O_NDELAY fix (Denis)
- nbd fixes (Hou, Pavel, Baokun)
- nbd locking fixes (Tetsuo)
- nbd device removal fixes (Christoph)
- raid10 rcu warning fix (Xiao)
- raid1 write behind fix (Guoqing)
- rnbd fixes (Gioh, Md Haris)
- misc fixes (Colin)"
* tag 'for-5.15/drivers-2021-08-30' of git://git.kernel.dk/linux-block: (42 commits)
Revert "floppy: reintroduce O_NDELAY fix"
raid1: ensure write behind bio has less than BIO_MAX_VECS sectors
md/raid10: Remove unnecessary rcu_dereference in raid10_handle_discard
nbd: remove nbd->destroy_complete
nbd: only return usable devices from nbd_find_unused
nbd: set nbd->index before releasing nbd_index_mutex
nbd: prevent IDR lookups from finding partially initialized devices
nbd: reset NBD to NULL when restarting in nbd_genl_connect
nbd: add missing locking to the nbd_dev_add error path
nvme: remove the unused NVME_NS_* enum
nvme: remove nvm_ndev from ns
nvme: Have NVME_FABRICS select NVME_CORE instead of transport drivers
block: nbd: add sanity check for first_minor
nvmet: check that host sqsize does not exceed ctrl MQES
nvmet: avoid duplicate qid in connect cmd
nvmet: pass back cntlid on successful completion
nvme-rdma: don't update queue count when failing to set io queues
nvme-tcp: don't update queue count when failing to set io queues
nvme-tcp: pair send_mutex init with destroy
nvme: allow user toggling hmb usage
...
Diffstat (limited to 'drivers')
43 files changed, 442 insertions, 12955 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig index 8bad63417a50..30d2db37cc87 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -51,8 +51,6 @@ source "drivers/net/Kconfig" source "drivers/isdn/Kconfig" -source "drivers/lightnvm/Kconfig" - # input before char - char/joystick depends on it. As does USB. source "drivers/input/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index 27c018bdf4de..be5d40ae1488 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -70,7 +70,6 @@ obj-$(CONFIG_FB_I810) += video/fbdev/i810/ obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/ obj-$(CONFIG_PARPORT) += parport/ -obj-$(CONFIG_NVM) += lightnvm/ obj-y += base/ block/ misc/ mfd/ nfc/ obj-$(CONFIG_LIBNVDIMM) += nvdimm/ obj-$(CONFIG_DAX) += dax/ diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 87460e0e5c72..fef79ea52e3e 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4029,23 +4029,23 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) if (fdc_state[FDC(drive)].rawcmd == 1) fdc_state[FDC(drive)].rawcmd = 2; - if (mode & (FMODE_READ|FMODE_WRITE)) { - drive_state[drive].last_checked = 0; - clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags); - if (bdev_check_media_change(bdev)) - floppy_revalidate(bdev->bd_disk); - if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags)) - goto out; - if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags)) + if (!(mode & FMODE_NDELAY)) { + if (mode & (FMODE_READ|FMODE_WRITE)) { + drive_state[drive].last_checked = 0; + clear_bit(FD_OPEN_SHOULD_FAIL_BIT, + &drive_state[drive].flags); + if (bdev_check_media_change(bdev)) + floppy_revalidate(bdev->bd_disk); + if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags)) + goto out; + if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags)) + goto out; + } + res = -EROFS; + if ((mode & FMODE_WRITE) && + !test_bit(FD_DISK_WRITABLE_BIT, &drive_state[drive].flags)) goto out; } - - res = -EROFS; - - if ((mode & FMODE_WRITE) && - !test_bit(FD_DISK_WRITABLE_BIT, &drive_state[drive].flags)) - goto out; - mutex_unlock(&open_lock); mutex_unlock(&floppy_mutex); return 0; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 19f5d5a8b16a..5170a630778d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -49,6 +49,7 @@ static DEFINE_IDR(nbd_index_idr); static DEFINE_MUTEX(nbd_index_mutex); +static struct workqueue_struct *nbd_del_wq; static int nbd_total_devices = 0; struct nbd_sock { @@ -113,12 +114,12 @@ struct nbd_device { struct mutex config_lock; struct gendisk *disk; struct workqueue_struct *recv_workq; + struct work_struct remove_work; struct list_head list; struct task_struct *task_recv; struct task_struct *task_setup; - struct completion *destroy_complete; unsigned long flags; char *backend; @@ -237,32 +238,36 @@ static void nbd_dev_remove(struct nbd_device *nbd) { struct gendisk *disk = nbd->disk; - if (disk) { - del_gendisk(disk); - blk_cleanup_disk(disk); - blk_mq_free_tag_set(&nbd->tag_set); - } + del_gendisk(disk); + blk_cleanup_disk(disk); + blk_mq_free_tag_set(&nbd->tag_set); /* - * Place this in the last just before the nbd is freed to - * make sure that the disk and the related kobject are also - * totally removed to avoid duplicate creation of the same - * one. + * Remove from idr after del_gendisk() completes, so if the same ID is + * reused, the following add_disk() will succeed. */ - if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) && nbd->destroy_complete) - complete(nbd->destroy_complete); + mutex_lock(&nbd_index_mutex); + idr_remove(&nbd_index_idr, nbd->index); + mutex_unlock(&nbd_index_mutex); kfree(nbd); } +static void nbd_dev_remove_work(struct work_struct *work) +{ + nbd_dev_remove(container_of(work, struct nbd_device, remove_work)); +} + static void nbd_put(struct nbd_device *nbd) { - if (refcount_dec_and_mutex_lock(&nbd->refs, - &nbd_index_mutex)) { - idr_remove(&nbd_index_idr, nbd->index); + if (!refcount_dec_and_test(&nbd->refs)) + return; + + /* Call del_gendisk() asynchrounously to prevent deadlock */ + if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags)) + queue_work(nbd_del_wq, &nbd->remove_work); + else nbd_dev_remove(nbd); - mutex_unlock(&nbd_index_mutex); - } } static int nbd_disconnected(struct nbd_config *config) @@ -1388,6 +1393,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, unsigned int cmd, unsigned long arg) { struct nbd_config *config = nbd->config; + loff_t bytesize; switch (cmd) { case NBD_DISCONNECT: @@ -1402,8 +1408,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, case NBD_SET_SIZE: return nbd_set_size(nbd, arg, config->blksize); case NBD_SET_SIZE_BLOCKS: - return nbd_set_size(nbd, arg * config->blksize, - config->blksize); + if (check_mul_overflow((loff_t)arg, config->blksize, &bytesize)) + return -EINVAL; + return nbd_set_size(nbd, bytesize, config->blksize); case NBD_SET_TIMEOUT: nbd_set_cmd_timeout(nbd, arg); return 0; @@ -1665,7 +1672,7 @@ static const struct blk_mq_ops nbd_mq_ops = { .timeout = nbd_xmit_timeout, }; -static int nbd_dev_add(int index) +static struct nbd_device *nbd_dev_add(int index, unsigned int refs) { struct nbd_device *nbd; struct gendisk *disk; @@ -1683,13 +1690,14 @@ static int nbd_dev_add(int index) nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; nbd->tag_set.driver_data = nbd; - nbd->destroy_complete = NULL; + INIT_WORK(&nbd->remove_work, nbd_dev_remove_work); nbd->backend = NULL; err = blk_mq_alloc_tag_set(&nbd->tag_set); if (err) goto out_free_nbd; + mutex_lock(&nbd_index_mutex); if (index >= 0) { err = idr_alloc(&nbd_index_idr, nbd, index, index + 1, GFP_KERNEL); @@ -1700,9 +1708,10 @@ static int nbd_dev_add(int index) if (err >= 0) index = err; } + nbd->index = index; + mutex_unlock(&nbd_index_mutex); if (err < 0) goto out_free_tags; - nbd->index = index; disk = blk_mq_alloc_disk(&nbd->tag_set, NULL); if (IS_ERR(disk)) { @@ -1726,38 +1735,65 @@ static int nbd_dev_add(int index) mutex_init(&nbd->config_lock); refcount_set(&nbd->config_refs, 0); - refcount_set(&nbd->refs, 1); + /* + * Start out with a zero references to keep other threads from using + * this device until it is fully initialized. + */ + refcount_set(&nbd->refs, 0); INIT_LIST_HEAD(&nbd->list); disk->major = NBD_MAJOR; + + /* Too big first_minor can cause duplicate creation of + * sysfs files/links, since first_minor will be truncated to + * byte in __device_add_disk(). + */ disk->first_minor = index << part_shift; + if (disk->first_minor > 0xff) { + err = -EINVAL; + goto out_free_idr; + } + disk->minors = 1 << part_shift; disk->fops = &nbd_fops; disk->private_data = nbd; sprintf(disk->disk_name, "nbd%d", index); add_disk(disk); + + /* + * Now publish the device. + */ + refcount_set(&nbd->refs, refs); nbd_total_devices++; - return index; + return nbd; out_free_idr: + mutex_lock(&nbd_index_mutex); idr_remove(&nbd_index_idr, index); + mutex_unlock(&nbd_index_mutex); out_free_tags: blk_mq_free_tag_set(&nbd->tag_set); out_free_nbd: kfree(nbd); out: - return err; + return ERR_PTR(err); } -static int find_free_cb(int id, void *ptr, void *data) +static struct nbd_device *nbd_find_get_unused(void) { - struct nbd_device *nbd = ptr; - struct nbd_device **found = data; + struct nbd_device *nbd; + int id; - if (!refcount_read(&nbd->config_refs)) { - *found = nbd; - return 1; + lockdep_assert_held(&nbd_index_mutex); + + idr_for_each_entry(&nbd_index_idr, nbd, id) { + if (refcount_read(&nbd->config_refs) || + test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags)) + continue; + if (refcount_inc_not_zero(&nbd->refs)) + return nbd; } - return 0; + + return NULL; } /* Netlink interface. */ @@ -1806,8 +1842,7 @@ static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd) static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) { - DECLARE_COMPLETION_ONSTACK(destroy_complete); - struct nbd_device *nbd = NULL; + struct nbd_device *nbd; struct nbd_config *config; int index = -1; int ret; @@ -1829,55 +1864,29 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) again: mutex_lock(&nbd_index_mutex); if (index == -1) { - ret = idr_for_each(&nbd_index_idr, &find_free_cb, &nbd); - if (ret == 0) { - int new_index; - new_index = nbd_dev_add(-1); - if (new_index < 0) { - mutex_unlock(&nbd_index_mutex); - printk(KERN_ERR "nbd: failed to add new device\n"); - return new_index; - } - nbd = idr_find(&nbd_index_idr, new_index); - } + nbd = nbd_find_get_unused(); } else { nbd = idr_find(&nbd_index_idr, index); - if (!nbd) { - ret = nbd_dev_add(index); - if (ret < 0) { + if (nbd) { + if ((test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) && + test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) || + !refcount_inc_not_zero(&nbd->refs)) { mutex_unlock(&nbd_index_mutex); - printk(KERN_ERR "nbd: failed to add new device\n"); - return ret; + pr_err("nbd: device at index %d is going down\n", + index); + return -EINVAL; } - nbd = idr_find(&nbd_index_idr, index); } } - if (!nbd) { - printk(KERN_ERR "nbd: couldn't find device at index %d\n", - index); - mutex_unlock(&nbd_index_mutex); - return -EINVAL; - } - - if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) && - test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) { - nbd->destroy_complete = &destroy_complete; - mutex_unlock(&nbd_index_mutex); - - /* Wait untill the the nbd stuff is totally destroyed */ - wait_for_completion(&destroy_complete); - goto again; - } + mutex_unlock(&nbd_index_mutex); - if (!refcount_inc_not_zero(&nbd->refs)) { - mutex_unlock(&nbd_index_mutex); - if (index == -1) - goto again; - printk(KERN_ERR "nbd: device at index %d is going down\n", - index); - return -EINVAL; + if (!nbd) { + nbd = nbd_dev_add(index, 2); + if (IS_ERR(nbd)) { + pr_err("nbd: failed to add new device\n"); + return PTR_ERR(nbd); + } } - mutex_unlock(&nbd_index_mutex); mutex_lock(&nbd->config_lock); if (refcount_read(&nbd->config_refs)) { @@ -2424,16 +2433,21 @@ static int __init nbd_init(void) if (register_blkdev(NBD_MAJOR, "nbd")) return -EIO; + nbd_del_wq = alloc_workqueue("nbd-del", WQ_UNBOUND, 0); + if (!nbd_del_wq) { + unregister_blkdev(NBD_MAJOR, "nbd"); + return -ENOMEM; + } + if (genl_register_family(&nbd_genl_family)) { + destroy_workqueue(nbd_del_wq); unregister_blkdev(NBD_MAJOR, "nbd"); return -EINVAL; } nbd_dbg_init(); - mutex_lock(&nbd_index_mutex); for (i = 0; i < nbds_max; i++) - nbd_dev_add(i); - mutex_unlock(&nbd_index_mutex); + nbd_dev_add(i, 1); return 0; } @@ -2442,7 +2456,10 @@ static int nbd_exit_cb(int id, void *ptr, void *data) struct list_head *list = (struct list_head *)data; struct nbd_device *nbd = ptr; - list_add_tail(&nbd->list, list); + /* Skip nbd that is being removed asynchronously */ + if (refcount_read(&nbd->refs)) + list_add_tail(&nbd->list, list); + return 0; } @@ -2465,6 +2482,9 @@ static void __exit nbd_cleanup(void) nbd_put(nbd); } + /* Also wait for nbd_dev_remove_work() completes */ + destroy_workqueue(nbd_del_wq); + idr_destroy(&nbd_index_idr); genl_unregister_family(&nbd_genl_family); unregister_blkdev(NBD_MAJOR, "nbd"); diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c index 324afdd63a96..4b93fd83bf79 100644 --- a/drivers/block/rnbd/rnbd-clt-sysfs.c +++ b/drivers/block/rnbd/rnbd-clt-sysfs.c @@ -227,17 +227,17 @@ static ssize_t state_show(struct kobject *kobj, switch (dev->dev_state) { case DEV_STATE_INIT: - return snprintf(page, PAGE_SIZE, "init\n"); + return sysfs_emit(page, "init\n"); case DEV_STATE_MAPPED: /* TODO fix cli tool before changing to proper state */ - return snprintf(page, PAGE_SIZE, "open\n"); + return sysfs_emit(page, "open\n"); case DEV_STATE_MAPPED_DISCONNECTED: /* TODO fix cli tool before changing to proper state */ - return snprintf(page, PAGE_SIZE, "closed\n"); + return sysfs_emit(page, "closed\n"); case DEV_STATE_UNMAPPED: - return snprintf(page, PAGE_SIZE, "unmapped\n"); + return sysfs_emit(page, "unmapped\n"); default: - return snprintf(page, PAGE_SIZE, "unknown\n"); + return sysfs_emit(page, "unknown\n"); } } @@ -263,7 +263,7 @@ static ssize_t mapping_path_show(struct kobject *kobj, dev = container_of(kobj, struct rnbd_clt_dev, kobj); - return scnprintf(page, PAGE_SIZE, "%s\n", dev->pathname); + return sysfs_emit(page, "%s\n", dev->pathname); } static struct kobj_attribute rnbd_clt_mapping_path_attr = @@ -276,8 +276,7 @@ static ssize_t access_mode_show(struct kobject *kobj, dev = container_of(kobj, struct rnbd_clt_dev, kobj); - return snprintf(page, PAGE_SIZE, "%s\n", - rnbd_access_mode_str(dev->access_mode)); + return sysfs_emit(page, "%s\n", rnbd_access_mode_str(dev->access_mode)); } static struct kobj_attribute rnbd_clt_access_mode = @@ -286,8 +285,8 @@ static struct kobj_attribute rnbd_clt_access_mode = static ssize_t rnbd_clt_unmap_dev_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return scnprintf(page, PAGE_SIZE, "Usage: echo <normal|force> > %s\n", - attr->attr.name); + return sysfs_emit(page, "Usage: echo <normal|force> > %s\n", + attr->attr.name); } static ssize_t rnbd_clt_unmap_dev_store(struct kobject *kobj, @@ -357,9 +356,8 @@ static ssize_t rnbd_clt_resize_dev_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return scnprintf(page, PAGE_SIZE, - "Usage: echo <new size in sectors> > %s\n", - attr->attr.name); + return sysfs_emit(page, "Usage: echo <new size in sectors> > %s\n", + attr->attr.name); } static ssize_t rnbd_clt_resize_dev_store(struct kobject *kobj, @@ -390,8 +388,7 @@ static struct kobj_attribute rnbd_clt_resize_dev_attr = static ssize_t rnbd_clt_remap_dev_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return scnprintf(page, PAGE_SIZE, "Usage: echo <1> > %s\n", - attr->attr.name); + return sysfs_emit(page, "Usage: echo <1> > %s\n", attr->attr.name); } static ssize_t rnbd_clt_remap_dev_store(struct kobject *kobj, @@ -436,7 +433,7 @@ static ssize_t session_show(struct kobject *kobj, struct kobj_attribute *attr, dev = container_of(kobj, struct rnbd_clt_dev, kobj); - return scnprintf(page, PAGE_SIZE, "%s\n", dev->sess->sessname); + return sysfs_emit(page, "%s\n", dev->sess->sessname); } static struct kobj_attribute rnbd_clt_session_attr = @@ -499,8 +496,8 @@ static ssize_t rnbd_clt_map_device_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return scnprintf(page, PAGE_SIZE, - "Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>] [nr_poll_queues=<number of queues>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n", + return sysfs_emit(page, + "Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>] [nr_poll_queues=<number of queues>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n", attr->attr.name); } diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index e9cc413495f0..bd4a41afbbfc 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -271,7 +271,7 @@ unlock: */ if (cpu_q) *cpup = cpu_q->cpu; - put_cpu_var(sess->cpu_rr); + put_cpu_ptr(sess->cpu_rr); if (q) rnbd_clt_dev_requeue(q); diff --git a/drivers/block/rnbd/rnbd-srv-sysfs.c b/drivers/block/rnbd/rnbd-srv-sysfs.c index acf5fced11ef..4db98e0e76f0 100644 --- a/drivers/block/rnbd/rnbd-srv-sysfs.c +++ b/drivers/block/rnbd/rnbd-srv-sysfs.c @@ -90,8 +90,8 @@ static ssize_t read_only_show(struct kobject *kobj, struct kobj_attribute *attr, sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj); - return scnprintf(page, PAGE_SIZE, "%d\n", - !(sess_dev->open_flags & FMODE_WRITE)); + return sysfs_emit(page, "%d\n", + !(sess_dev->open_flags & FMODE_WRITE)); } static struct kobj_attribute rnbd_srv_dev_session_ro_attr = @@ -105,8 +105,8 @@ static ssize_t access_mode_show(struct kobject *kobj, sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj); - return scnprintf(page, PAGE_SIZE, "%s\n", - rnbd_access_mode_str(sess_dev->access_mode)); + return sysfs_emit(page, "%s\n", + rnbd_access_mode_str(sess_dev->access_mode)); } static struct kobj_attribute rnbd_srv_dev_session_access_mode_attr = @@ -119,7 +119,7 @@ static ssize_t mapping_path_show(struct kobject *kobj, sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj); - return scnprintf(page, PAGE_SIZE, "%s\n", sess_dev->pathname); + return sysfs_emit(page, "%s\n", sess_dev->pathname); } static struct kobj_attribute rnbd_srv_dev_session_mapping_path_attr = @@ -128,8 +128,8 @@ static struct kobj_attribute rnbd_srv_dev_session_mapping_path_attr = static ssize_t rnbd_srv_dev_session_force_close_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", - attr->attr.name); + return sysfs_emit(page, "Usage: echo 1 > %s\n", + attr->attr.name); } static ssize_t rnbd_srv_dev_session_force_close_store(struct kobject *kobj, diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index d83fee21f6c5..715bfa8aca7f 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1092,7 +1092,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, err = xlbd_reserve_minors(minor, nr_minors); if (err) return err; - err = -ENODEV; memset(&info->tag_set, 0, sizeof(info->tag_set)); info->tag_set.ops = &blkfront_mq_ops; diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig deleted file mode 100644 index 04caa0f2d445..000000000000 --- a/drivers/lightnvm/Kconfig +++ /dev/null @@ -1,44 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Open-Channel SSD NVM configuration -# - -menuconfig NVM - bool "Open-Channel SSD target support (DEPRECATED)" - depends on BLOCK - help - Say Y here to get to enable Open-channel SSDs. - - Open-Channel SSDs implement a set of extension to SSDs, that - exposes direct access to the underlying non-volatile memory. - - If you say N, all options in this submenu will be skipped and disabled - only do this if you know what you are doing. - - This code is deprecated and will be removed in Linux 5.15. - -if NVM - -config NVM_PBLK - tristate "Physical Block Device Open-Channel SSD target" - select CRC32 - help - Allows an open-channel SSD to be exposed as a block device to the - host. The target assumes the device exposes raw flash and must be - explicitly managed by the host. - - Please note the disk format is considered EXPERIMENTAL for now. - -if NVM_PBLK - -config NVM_PBLK_DEBUG - bool "PBlk Debug Support" - default n - help - Enables debug support for pblk. This includes extra checks, more - vocal error messages, and extra tracking fields in the pblk sysfs - entries. - -endif # NVM_PBLK_DEBUG - -endif # NVM diff --git a/drivers/lightnvm/Makefile b/drivers/lightnvm/Makefile deleted file mode 100644 index 97d9d7c71550..000000000000 --- a/drivers/lightnvm/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Makefile for Open-Channel SSDs. -# - -obj-$(CONFIG_NVM) := core.o -obj-$(CONFIG_NVM_PBLK) += pblk.o -pblk-y := pblk-init.o pblk-core.o pblk-rb.o \ - pblk-write.o pblk-cache.o pblk-read.o \ - pblk-gc.o pblk-recovery.o pblk-map.o \ - pblk-rl.o pblk-sysfs.o diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c deleted file mode 100644 index cf8a75494833..000000000000 --- a/drivers/lightnvm/core.c +++ /dev/null @@ -1,1440 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2015 IT University of Copenhagen. All rights reserved. - * Initial release: Matias Bjorling <m@bjorling.me> - */ - -#define pr_fmt(fmt) "nvm: " fmt - -#include <linux/list.h> -#include <linux/types.h> -#include <linux/sem.h> -#include <linux/bitmap.h> -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/miscdevice.h> -#include <linux/lightnvm.h> -#include <linux/sched/sysctl.h> - -static LIST_HEAD(nvm_tgt_types); -static DECLARE_RWSEM(nvm_tgtt_lock); -static LIST_HEAD(nvm_devices); -static DECLARE_RWSEM(nvm_lock); - -/* Map between virtual and physical channel and lun */ -struct nvm_ch_map { - int ch_off; - int num_lun; - int *lun_offs; -}; - -struct nvm_dev_map { - struct nvm_ch_map *chnls; - int num_ch; -}; - -static void nvm_free(struct kref *ref); - -static struct nvm_target *nvm_find_target(struct nvm_dev *dev, const char *name) -{ - struct nvm_target *tgt; - - list_for_each_entry(tgt, &dev->targets, list) - if (!strcmp(name, tgt->disk->disk_name)) - return tgt; - - return NULL; -} - -static bool nvm_target_exists(const char *name) -{ - struct nvm_dev *dev; - struct nvm_target *tgt; - bool ret = false; - - down_write(&nvm_lock); - list_for_each_entry(dev, &nvm_devices, devices) { - mutex_lock(&dev->mlock); - list_for_each_entry(tgt, &dev->targets, list) { - if (!strcmp(name, tgt->disk->disk_name)) { - ret = true; - mutex_unlock(&dev->mlock); - goto out; - } - } - mutex_unlock(&dev->mlock); - } - -out: - up_write(&nvm_lock); - return ret; -} - -static int nvm_reserve_luns(struct nvm_dev *dev, int lun_begin, int lun_end) -{ - int i; - - for (i = lun_begin; i <= lun_end; i++) { - if (test_and_set_bit(i, dev->lun_map)) { - pr_err("lun %d already allocated\n", i); - goto err; - } - } - - return 0; -err: - while (--i >= lun_begin) - clear_bit(i, dev->lun_map); - - return -EBUSY; -} - -static void nvm_release_luns_err(struct nvm_dev *dev, int lun_begin, - int lun_end) -{ - int i; - - for (i = lun_begin; i <= lun_end; i++) - WARN_ON(!test_and_clear_bit(i, dev->lun_map)); -} - -static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev, int clear) -{ - struct nvm_dev *dev = tgt_dev->parent; - struct nvm_dev_map *dev_map = tgt_dev->map; - int i, j; - - for (i = 0; i < dev_map->num_ch; i++) { - struct nvm_ch_map *ch_map = &dev_map->chnls[i]; - int *lun_offs = ch_map->lun_offs; - int ch = i + ch_map->ch_off; - - if (clear) { - for (j = 0; j < ch_map->num_lun; j++) { - int lun = j + lun_offs[j]; - int lunid = (ch * dev->geo.num_lun) + lun; - - WARN_ON(!test_and_clear_bit(lunid, - dev->lun_map)); - } - } - - kfree(ch_map->lun_offs); - } - - kfree(dev_map->chnls); - kfree(dev_map); - - kfree(tgt_dev->luns); - kfree(tgt_dev); -} - -static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev, - u16 lun_begin, u16 lun_end, - u16 op) -{ - struct nvm_tgt_dev *tgt_dev = NULL; - struct nvm_dev_map *dev_rmap = dev->rmap; - struct nvm_dev_map *dev_map; - struct ppa_addr *luns; - int num_lun = lun_end - lun_begin + 1; - int luns_left = num_lun; - int num_ch = num_lun / dev->geo.num_lun; - int num_ch_mod = num_lun % dev->geo.num_lun; - int bch = lun_begin / dev->geo.num_lun; - int blun = lun_begin % dev->geo.num_lun; - int lunid = 0; - int lun_balanced = 1; - int sec_per_lun, prev_num_lun; - int i, j; - - num_ch = (num_ch_mod == 0) ? num_ch : num_ch + 1; - - dev_map = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL); - if (!dev_map) - goto err_dev; - - dev_map->chnls = kcalloc(num_ch, sizeof(struct nvm_ch_map), GFP_KERNEL); - if (!dev_map->chnls) - goto err_chnls; - - luns = kcalloc(num_lun, sizeof(struct ppa_addr), GFP_KERNEL); - if (!luns) - goto err_luns; - - prev_num_lun = (luns_left > dev->geo.num_lun) ? - dev->geo.num_lun : luns_left; - for (i = 0; i < num_ch; i++) { - struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[i + bch]; - int *lun_roffs = ch_rmap->lun_offs; - struct nvm_ch_map *ch_map = &dev_map->chnls[i]; - int *lun_offs; - int luns_in_chnl = (luns_left > dev->geo.num_lun) ? - dev->geo.num_lun : luns_left; - - if (lun_balanced && prev_num_lun != luns_in_chnl) - lun_balanced = 0; - - ch_map->ch_off = ch_rmap->ch_off = bch; - ch_map->num_lun = luns_in_chnl; - - lun_offs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL); - if (!lun_offs) - goto err_ch; - - for (j = 0; j < luns_in_chnl; j++) { - luns[lunid].ppa = 0; - luns[lunid].a.ch = i; - luns[lunid++].a.lun = j; - - lun_offs[j] = blun; - lun_roffs[j + blun] = blun; - } - - ch_map->lun_offs = lun_offs; - - /* when starting a new channel, lun offset is reset */ - blun = 0; - luns_left -= luns_in_chnl; - } - - dev_map->num_ch = num_ch; - - tgt_dev = kmalloc(sizeof(struct nvm_tgt_dev), GFP_KERNEL); - if (!tgt_dev) - goto err_ch; - - /* Inherit device geometry from parent */ - memcpy(&tgt_dev->geo, &dev->geo, sizeof(struct nvm_geo)); - - /* Target device only owns a portion of the physical device */ - tgt_dev->geo.num_ch = num_ch; - tgt_dev->geo.num_lun = (lun_balanced) ? prev_num_lun : -1; - tgt_dev->geo.all_luns = num_lun; - tgt_dev->geo.all_chunks = num_lun * dev->geo.num_chk; - - tgt_dev->geo.op = op; - - sec_per_lun = dev->geo.clba * dev->geo.num_chk; - tgt_dev->geo.total_secs = num_lun * sec_per_lun; - - tgt_dev->q = dev->q; - tgt_dev->map = dev_map; - tgt_dev->luns = luns; - tgt_dev->parent = dev; - - return tgt_dev; -err_ch: - while (--i >= 0) - kfree(dev_map->chnls[i].lun_offs); - kfree(luns); -err_luns: - kfree(dev_map->chnls); -err_chnls: - kfree(dev_map); -err_dev: - return tgt_dev; -} - -static struct nvm_tgt_type *__nvm_find_target_type(const char *name) -{ - struct nvm_tgt_type *tt; - - list_for_each_entry(tt, &nvm_tgt_types, list) - if (!strcmp(name, tt->name)) - return tt; - - return NULL; -} - -static struct nvm_tgt_type *nvm_find_target_type(const char *name) -{ - struct nvm_tgt_type *tt; - - down_write(&nvm_tgtt_lock); - tt = __nvm_find_target_type(name); - up_write(&nvm_tgtt_lock); - - return tt; -} - -static int nvm_config_check_luns(struct nvm_geo *geo, int lun_begin, - int lun_end) -{ - if (lun_begin > lun_end || lun_end >= geo->all_luns) { - pr_err("lun out of bound (%u:%u > %u)\n", - lun_begin, lun_end, geo->all_luns - 1); - return -EINVAL; - } - - return 0; -} - -static int __nvm_config_simple(struct nvm_dev *dev, - struct nvm_ioctl_create_simple *s) -{ - struct nvm_geo *geo = &dev->geo; - - if (s->lun_begin == -1 && s->lun_end == -1) { - s->lun_begin = 0; - s->lun_end = geo->all_luns - 1; - } - - return nvm_config_check_luns(geo, s->lun_begin, s->lun_end); -} - -static int __nvm_config_extended(struct nvm_dev *dev, - struct nvm_ioctl_create_extended *e) -{ - if (e->lun_begin == 0xFFFF && e->lun_end == 0xFFFF) { - e->lun_begin = 0; - e->lun_end = dev->geo.all_luns - 1; - } - - /* op not set falls into target's default */ - if (e->op == 0xFFFF) { - e->op = NVM_TARGET_DEFAULT_OP; - } else if (e->op < NVM_TARGET_MIN_OP || e->op > NVM_TARGET_MAX_OP) { - pr_err("invalid over provisioning value\n"); - return -EINVAL; - } - - return nvm_config_check_luns(&dev->geo, e->lun_begin, e->lun_end); -} - -static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) -{ - struct nvm_ioctl_create_extended e; - struct gendisk *tdisk; - struct nvm_tgt_type *tt; - struct nvm_target *t; - struct nvm_tgt_dev *tgt_dev; - void *targetdata; - unsigned int mdts; - int ret; - - switch (create->conf.type) { - case NVM_CONFIG_TYPE_SIMPLE: - ret = __nvm_config_simple(dev, &create->conf.s); - if (ret) - return ret; - - e.lun_begin = create->conf.s.lun_begin; - e.lun_end = create->conf.s.lun_end; - e.op = NVM_TARGET_DEFAULT_OP; - break; - case NVM_CONFIG_TYPE_EXTENDED: - ret = __nvm_config_extended(dev, &create->conf.e); - if (ret) - return ret; - - e = create->conf.e; - break; - default: - pr_err("config type not valid\n"); - return -EINVAL; - } - - tt = nvm_find_target_type(create->tgttype); - if (!tt) { - pr_err("target type %s not found\n", create->tgttype); - return -EINVAL; - } - - if ((tt->flags & NVM_TGT_F_HOST_L2P) != (dev->geo.dom & NVM_RSP_L2P)) { - pr_err("device is incompatible with target L2P type.\n"); - return -EINVAL; - } - - if (nvm_target_exists(create->tgtname)) { - pr_err("target name already exists (%s)\n", - create->tgtname); - return -EINVAL; - } - - ret = nvm_reserve_luns(dev, e.lun_begin, e.lun_end); - if (ret) - return ret; - - t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL); - if (!t) { - ret = -ENOMEM; - goto err_reserve; - } - - tgt_dev = nvm_create_tgt_dev(dev, e.lun_begin, e.lun_end, e.op); - if (!tgt_dev) { - pr_err("could not create target device\n"); - ret = -ENOMEM; - goto err_t; - } - - tdisk = blk_alloc_disk(dev->q->node); - if (!tdisk) { - ret = -ENOMEM; - goto err_dev; - } - - strlcpy(tdisk->disk_name, create->tgtname, sizeof(tdisk->disk_name)); - tdisk->major = 0; - tdisk->first_minor = 0; - tdisk->fops = tt->bops; - - targetdata = tt->init(tgt_dev, tdisk, create->flags); - if (IS_ERR(targetdata)) { - ret = PTR_ERR(targetdata); - goto err_init; - } - - tdisk->private_data = targetdata; - tdisk->queue->queuedata = targetdata; - - mdts = (dev->geo.csecs >> 9) * NVM_MAX_VLBA; - if (dev->geo.mdts) { - mdts = min_t(u32, dev->geo.mdts, - (dev->geo.csecs >> 9) * NVM_MAX_VLBA); - } - blk_queue_max_hw_sectors(tdisk->queue, mdts); - - set_capacity(tdisk, tt->capacity(targetdata)); - add_disk(tdisk); - - if (tt->sysfs_init && tt->sysfs_init(tdisk)) { - ret = -ENOMEM; - goto err_sysfs; - } - - t->type = tt; - t->disk = tdisk; - t->dev = tgt_dev; - - mutex_lock(&dev->mlock); - list_add_tail(&t->list, &dev->targets); - mutex_unlock(&dev->mlock); - - __module_get(tt->owner); - - return 0; -err_sysfs: - if (tt->exit) - tt->exit(targetdata, true); -err_init: - blk_cleanup_disk(tdisk); -err_dev: - nvm_remove_tgt_dev(tgt_dev, 0); -err_t: - kfree(t); -err_reserve: - nvm_release_luns_err(dev, e.lun_begin, e.lun_end); - return ret; -} - -static void __nvm_remove_target(struct nvm_target *t, bool graceful) -{ - struct nvm_tgt_type *tt = t->type; - struct gendisk *tdisk = t->disk; - - del_gendisk(tdisk); - - if (tt->sysfs_exit) - tt->sysfs_exit(tdisk); - - if (tt->exit) - tt->exit(tdisk->private_data, graceful); - - nvm_remove_tgt_dev(t->dev, 1); - blk_cleanup_disk(tdisk); - module_put(t->type->owner); - - list_del(&t->list); - kfree(t); -} - -/** - * nvm_remove_tgt - Removes a target from the media manager - * @remove: ioctl structure with target name to remove. - * - * Returns: - * 0: on success - * 1: on not found - * <0: on error - */ -static int nvm_remove_tgt(struct nvm_ioctl_remove *remove) -{ - struct nvm_target *t = NULL; - struct nvm_dev *dev; - - down_read(&nvm_lock); - list_for_each_entry(dev, &nvm_devices, devices) { - mutex_lock(&dev->mlock); - t = nvm_find_target(dev, remove->tgtname); - if (t) { - mutex_unlock(&dev->mlock); - break; - } - mutex_unlock(&dev->mlock); - } - up_read(&nvm_lock); - - if (!t) { - pr_err("failed to remove target %s\n", - remove->tgtname); - return 1; - } - - __nvm_remove_target(t, true); - kref_put(&dev->ref, nvm_free); - - return 0; -} - -static int nvm_register_map(struct nvm_dev *dev) -{ - struct nvm_dev_map *rmap; - int i, j; - - rmap = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL); - if (!rmap) - goto err_rmap; - - rmap->chnls = kcalloc(dev->geo.num_ch, sizeof(struct nvm_ch_map), - GFP_KERNEL); - if (!rmap->chnls) - goto err_chnls; - - for (i = 0; i < dev->geo.num_ch; i++) { - struct nvm_ch_map *ch_rmap; - int *lun_roffs; - int luns_in_chnl = dev->geo.num_lun; - - ch_rmap = &rmap->chnls[i]; - - ch_rmap->ch_off = -1; - ch_rmap->num_lun = luns_in_chnl; - - lun_roffs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL); - if (!lun_roffs) - goto err_ch; - - for (j = 0; j < luns_in_chnl; j++) - lun_roffs[j] = -1; - - ch_rmap->lun_offs = lun_roffs; - } - - dev->rmap = rmap; - - return 0; -err_ch: - while (--i >= 0) - kfree(rmap->chnls[i].lun_offs); -err_chnls: - kfree(rmap); -err_rmap: - return -ENOMEM; -} - -static void nvm_unregister_map(struct nvm_dev *dev) -{ - struct nvm_dev_map *rmap = dev->rmap; - int i; - - for (i = 0; i < dev->geo.num_ch; i++) - kfree(rmap->chnls[i].lun_offs); - - kfree(rmap->chnls); - kfree(rmap); -} - -static void nvm_map_to_dev(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p) -{ - struct nvm_dev_map *dev_map = tgt_dev->map; - struct nvm_ch_map *ch_map = &dev_map->chnls[p->a.ch]; - int lun_off = ch_map->lun_offs[p->a.lun]; - - p->a.ch += ch_map->ch_off; - p->a.lun += lun_off; -} - -static void nvm_map_to_tgt(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p) -{ - struct nvm_dev *dev = tgt_dev->parent; - struct nvm_dev_map *dev_rmap = dev->rmap; - struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[p->a.ch]; - int lun_roff = ch_rmap->lun_offs[p->a.lun]; - - p->a.ch -= ch_rmap->ch_off; - p->a.lun -= lun_roff; -} - -static void nvm_ppa_tgt_to_dev(struct nvm_tgt_dev *tgt_dev, - struct ppa_addr *ppa_list, int nr_ppas) -{ - int i; - - for (i = 0; i < nr_ppas; i++) { - nvm_map_to_dev(tgt_dev, &ppa_list[i]); - ppa_list[i] = generic_to_dev_addr(tgt_dev->parent, ppa_list[i]); - } -} - -static void nvm_ppa_dev_to_tgt(struct nvm_tgt_dev *tgt_dev, - struct ppa_addr *ppa_list, int nr_ppas) -{ - int i; - - for (i = 0; i < nr_ppas; i++) { - ppa_list[i] = dev_to_generic_addr(tgt_dev->parent, ppa_list[i]); - nvm_map_to_tgt(tgt_dev, &ppa_list[i]); - } -} - -static void nvm_rq_tgt_to_dev(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) -{ - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - - nvm_ppa_tgt_to_dev(tgt_dev, ppa_list, rqd->nr_ppas); -} - -static void nvm_rq_dev_to_tgt(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) -{ - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - - nvm_ppa_dev_to_tgt(tgt_dev, ppa_list, rqd->nr_ppas); -} - -int nvm_register_tgt_type(struct nvm_tgt_type *tt) -{ - int ret = 0; - - down_write(&nvm_tgtt_lock); - if (__nvm_find_target_type(tt->name)) - ret = -EEXIST; - else - list_add(&tt->list, &nvm_tgt_types); - up_write(&nvm_tgtt_lock); - - return ret; -} -EXPORT_SYMBOL(nvm_register_tgt_type); - -void nvm_unregister_tgt_type(struct nvm_tgt_type *tt) -{ - if (!tt) - return; - - down_write(&nvm_tgtt_lock); - list_del(&tt->list); - up_write(&nvm_tgtt_lock); -} -EXPORT_SYMBOL(nvm_unregister_tgt_type); - -void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags, - dma_addr_t *dma_handler) -{ - return dev->ops->dev_dma_alloc(dev, dev->dma_pool, mem_flags, - dma_handler); -} -EXPORT_SYMBOL(nvm_dev_dma_alloc); - -void nvm_dev_dma_free(struct nvm_dev *dev, void *addr, dma_addr_t dma_handler) -{ - dev->ops->dev_dma_free(dev->dma_pool, addr, dma_handler); -} -EXPORT_SYMBOL(nvm_dev_dma_free); - -static struct nvm_dev *nvm_find_nvm_dev(const char *name) -{ - struct nvm_dev *dev; - - list_for_each_entry(dev, &nvm_devices, devices) - if (!strcmp(name, dev->name)) - return dev; - - return NULL; -} - -static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd, - const struct ppa_addr *ppas, int nr_ppas) -{ - struct nvm_dev *dev = tgt_dev->parent; - struct nvm_geo *geo = &tgt_dev->geo; - int i, plane_cnt, pl_idx; - struct ppa_addr ppa; - - if (geo->pln_mode == NVM_PLANE_SINGLE && nr_ppas == 1) { - rqd->nr_ppas = nr_ppas; - rqd->ppa_addr = ppas[0]; - - return 0; - } - - rqd->nr_ppas = nr_ppas; - rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list); - if (!rqd->ppa_list) { - pr_err("failed to allocate dma memory\n"); - return -ENOMEM; - } - - plane_cnt = geo->pln_mode; - rqd->nr_ppas *= plane_cnt; - - for (i = 0; i < nr_ppas; i++) { - for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) { - ppa = ppas[i]; - ppa.g.pl = pl_idx; - rqd->ppa_list[(pl_idx * nr_ppas) + i] = ppa; - } - } - - return 0; -} - -static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, - struct nvm_rq *rqd) -{ - if (!rqd->ppa_list) - return; - - nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list); -} - -static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd) -{ - int flags = 0; - - if (geo->version == NVM_OCSSD_SPEC_20) - return 0; - - if (rqd->is_seq) - flags |= geo->pln_mode >> 1; - - if (rqd->opcode == NVM_OP_PREAD) - flags |= (NVM_IO_SCRAMBLE_ENABLE | NVM_IO_SUSPEND); - else if (rqd->opcode == NVM_OP_PWRITE) - flags |= NVM_IO_SCRAMBLE_ENABLE; - - return flags; -} - -int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd, void *buf) -{ - struct nvm_dev *dev = tgt_dev->parent; - int ret; - - if (!dev->ops->submit_io) - return -ENODEV; - - nvm_rq_tgt_to_dev(tgt_dev, rqd); - - rqd->dev = tgt_dev; - rqd->flags = nvm_set_flags(&tgt_dev->geo, rqd); - - /* In case of error, fail with right address format */ - ret = dev->ops->submit_io(dev, rqd, buf); - if (ret) - nvm_rq_dev_to_tgt(tgt_dev, rqd); - return ret; -} -EXPORT_SYMBOL(nvm_submit_io); - -static void nvm_sync_end_io(struct nvm_rq *rqd) -{ - struct completion *waiting = rqd->private; - - complete(waiting); -} - -static int nvm_submit_io_wait(struct nvm_dev *dev, struct nvm_rq *rqd, - void *buf) -{ - DECLARE_COMPLETION_ONSTACK(wait); - int ret = 0; - - rqd->end_io = nvm_sync_end_io; - rqd->private = &wait; - - ret = dev->ops->submit_io(dev, rqd, buf); - if (ret) - return ret; - - wait_for_completion_io(&wait); - - return 0; -} - -int nvm_submit_io_sync(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd, - void *buf) -{ - struct nvm_dev *dev = tgt_dev->parent; - int ret; - - if (!dev->ops->submit_io) - return -ENODEV; - - nvm_rq_tgt_to_dev(tgt_dev, rqd); - - rqd->dev = tgt_dev; - rqd->flags = nvm_set_flags(&tgt_dev->geo, rqd); - - ret = nvm_submit_io_wait(dev, rqd, buf); - - return ret; -} -EXPORT_SYMBOL(nvm_submit_io_sync); - -void nvm_end_io(struct nvm_rq *rqd) -{ - struct nvm_tgt_dev *tgt_dev = rqd->dev; - - /* Convert address space */ - if (tgt_dev) - nvm_rq_dev_to_tgt(tgt_dev, rqd); - - if (rqd->end_io) - rqd->end_io(rqd); -} -EXPORT_SYMBOL(nvm_end_io); - -static int nvm_submit_io_sync_raw(struct nvm_dev *dev, struct nvm_rq *rqd) -{ - if (!dev->ops->submit_io) - return -ENODEV; - - rqd->dev = NULL; - rqd->flags = nvm_set_flags(&dev->geo, rqd); - - return nvm_submit_io_wait(dev, rqd, NULL); -} - -static int nvm_bb_chunk_sense(struct nvm_dev *dev, struct ppa_addr ppa) -{ - struct nvm_rq rqd = { NULL }; - struct bio bio; - struct bio_vec bio_vec; - struct page *page; - int ret; - - page = alloc_page(GFP_KERNEL); - if (!page) - return -ENOMEM; - - bio_init(&bio, &bio_vec, 1); - bio_add_page(&bio, page, PAGE_SIZE, 0); - bio_set_op_attrs(&bio, REQ_OP_READ, 0); - - rqd.bio = &bio; - rqd.opcode = NVM_OP_PREAD; - rqd.is_seq = 1; - rqd.nr_ppas = 1; - rqd.ppa_addr = generic_to_dev_addr(dev, ppa); - - ret = nvm_submit_io_sync_raw(dev, &rqd); - __free_page(page); - if (ret) - return ret; - - return rqd.error; -} - -/* - * Scans a 1.2 chunk first and last page to determine if its state. - * If the chunk is found to be open, also scan it to update the write - * pointer. - */ -static int nvm_bb_chunk_scan(struct nvm_dev *dev, struct ppa_addr ppa, - struct nvm_chk_meta *meta) -{ - struct nvm_geo *geo = &dev->geo; - int ret, pg, pl; - - /* sense first page */ - ret = nvm_bb_chunk_sense(dev, ppa); - if (ret < 0) /* io error */ - return ret; - else if (ret == 0) /* valid data */ - meta->state = NVM_CHK_ST_OPEN; - else if (ret > 0) { - /* - * If empty page, the chunk is free, else it is an - * actual io error. In that case, mark it offline. - */ - switch (ret) { - case NVM_RSP_ERR_EMPTYPAGE: - meta->state = NVM_CHK_ST_FREE; - return 0; - case NVM_RSP_ERR_FAILCRC: - case NVM_RSP_ERR_FAILECC: - case NVM_RSP_WARN_HIGHECC: - meta->state = NVM_CHK_ST_OPEN; - goto scan; - default: - return -ret; /* other io error */ - } - } - - /* sense last page */ - ppa.g.pg = geo->num_pg - 1; - ppa.g.pl = geo->num_pln - 1; - - ret = nvm_bb_chunk_sense(dev, ppa); - if (ret < 0) /* io error */ - return ret; - else if (ret == 0) { /* Chunk fully written */ - meta->state = NVM_CHK_ST_CLOSED; - meta->wp = geo->clba; - return 0; - } else if (ret > 0) { - switch (ret) { - case NVM_RSP_ERR_EMPTYPAGE: - case NVM_RSP_ERR_FAILCRC: - case NVM_RSP_ERR_FAILECC: - case NVM_RSP_WARN_HIGHECC: - meta->state = NVM_CHK_ST_OPEN; - break; - default: - return -ret; /* other io error */ - } - } - -scan: - /* - * chunk is open, we scan sequentially to update the write pointer. - * We make the assumption that targets write data across all planes - * before moving to the next page. - */ - for (pg = 0; pg < geo->num_pg; pg++) { - for (pl = 0; pl < geo->num_pln; pl++) { - ppa.g.pg = pg; - ppa.g.pl = pl; - - ret = nvm_bb_chunk_sense(dev, ppa); - if (ret < 0) /* io error */ - return ret; - else if (ret == 0) { - meta->wp += geo->ws_min; - } else if (ret > 0) { - switch (ret) { - case NVM_RSP_ERR_EMPTYPAGE: - return 0; - case NVM_RSP_ERR_FAILCRC: - case NVM_RSP_ERR_FAILECC: - case NVM_RSP_WARN_HIGHECC: - meta->wp += geo->ws_min; - break; - default: - return -ret; /* other io error */ - } - } - } - } - - return 0; -} - -/* - * folds a bad block list from its plane representation to its - * chunk representation. - * - * If any of the planes status are bad or grown bad, the chunk is marked - * offline. If not bad, the first plane state acts as the chunk state. - */ -static int nvm_bb_to_chunk(struct nvm_dev *dev, struct ppa_addr ppa, - u8 *blks, int nr_blks, struct nvm_chk_meta *meta) -{ - struct nvm_geo *geo = &dev->geo; - int ret, blk, pl, offset, blktype; - - for (blk = 0; blk < geo->num_chk; blk++) { - offset = blk * geo->pln_mode; - blktype = blks[offset]; - - for (pl = 0; pl < geo->pln_mode; pl++) { - if (blks[offset + pl] & - (NVM_BLK_T_BAD|NVM_BLK_T_GRWN_BAD)) { - blktype = blks[offset + pl]; - break; - } - } - - ppa.g.blk = blk; - - meta->wp = 0; - meta->type = NVM_CHK_TP_W_SEQ; - meta->wi = 0; - meta->slba = generic_to_dev_addr(dev, ppa).ppa; - meta->cnlb = dev->geo.clba; - - if (blktype == NVM_BLK_T_FREE) { - ret = nvm_bb_chunk_scan(dev, ppa, meta); - if (ret) - return ret; - } else { - meta->state = NVM_CHK_ST_OFFLINE; - } - - meta++; - } - - return 0; -} - -static int nvm_get_bb_meta(struct nvm_dev *dev, sector_t slba, - int nchks, struct nvm_chk_meta *meta) -{ - struct nvm_geo *geo = &dev->geo; - struct ppa_addr ppa; - u8 *blks; - int ch, lun, nr_blks; - int ret = 0; - - ppa.ppa = slba; - ppa = dev_to_generic_addr(dev, ppa); - - if (ppa.g.blk != 0) - return -EINVAL; - - if ((nchks % geo->num_chk) != 0) - return -EINVAL; - - nr_blks = geo->num_chk * geo->pln_mode; - - blks = kmalloc(nr_blks, GFP_KERNEL); - if (!blks) - return -ENOMEM; - - for (ch = ppa.g.ch; ch < geo->num_ch; ch++) { - for (lun = ppa.g.lun; lun < geo->num_lun; lun++) { - struct ppa_addr ppa_gen, ppa_dev; - - if (!nchks) - goto done; - - ppa_gen.ppa = 0; - ppa_gen.g.ch = ch; - ppa_gen.g.lun = lun; - ppa_dev = generic_to_dev_addr(dev, ppa_gen); - - ret = dev->ops->get_bb_tbl(dev, ppa_dev, blks); - if (ret) - goto done; - - ret = nvm_bb_to_chunk(dev, ppa_gen, blks, nr_blks, - meta); - if (ret) - goto done; - - meta += geo->num_chk; - nchks -= geo->num_chk; - } - } -done: - kfree(blks); - return ret; -} - -int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa, - int nchks, struct nvm_chk_meta *meta) -{ - struct nvm_dev *dev = tgt_dev->parent; - - nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1); - - if (dev->geo.version == NVM_OCSSD_SPEC_12) - return nvm_get_bb_meta(dev, (sector_t)ppa.ppa, nchks, meta); - - return dev->ops->get_chk_meta(dev, (sector_t)ppa.ppa, nchks, meta); -} -EXPORT_SYMBOL_GPL(nvm_get_chunk_meta); - -int nvm_set_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, - int nr_ppas, int type) -{ - struct nvm_dev *dev = tgt_dev->parent; - struct nvm_rq rqd; - int ret; - - if (dev->geo.version == NVM_OCSSD_SPEC_20) - return 0; - - if (nr_ppas > NVM_MAX_VLBA) { - pr_err("unable to update all blocks atomically\n"); - return -EINVAL; - } - - memset(&rqd, 0, sizeof(struct nvm_rq)); - - nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas); - nvm_rq_tgt_to_dev(tgt_dev, &rqd); - - ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type); - nvm_free_rqd_ppalist(tgt_dev, &rqd); - if (ret) - return -EINVAL; - - return 0; -} -EXPORT_SYMBOL_GPL(nvm_set_chunk_meta); - -static int nvm_core_init(struct nvm_dev *dev) -{ - struct nvm_geo *geo = &dev->geo; - int ret; - - dev->lun_map = kcalloc(BITS_TO_LONGS(geo->all_luns), - sizeof(unsigned long), GFP_KERNEL); - if (!dev->lun_map) - return -ENOMEM; - - INIT_LIST_HEAD(&dev->area_list); - INIT_LIST_HEAD(&dev->targets); - mutex_init(&dev->mlock); - spin_lock_init(&dev->lock); - - ret = nvm_register_map(dev); - if (ret) - goto err_fmtype; - - return 0; -err_fmtype: - kfree(dev->lun_map); - return ret; -} - -static void nvm_free(struct kref *ref) -{ - struct nvm_dev *dev = container_of(ref, struct nvm_dev, ref); - - if (dev->dma_pool) - dev->ops->destroy_dma_pool(dev->dma_pool); - - if (dev->rmap) - nvm_unregister_map(dev); - - kfree(dev->lun_map); - kfree(dev); -} - -static int nvm_init(struct nvm_dev *dev) -{ - struct nvm_geo *geo = &dev->geo; - int ret = -EINVAL; - - if (dev->ops->identity(dev)) { - pr_err("device could not be identified\n"); - goto err; - } - - pr_debug("ver:%u.%u nvm_vendor:%x\n", geo->major_ver_id, - geo->minor_ver_id, geo->vmnt); - - ret = nvm_core_init(dev); - if (ret) { - pr_err("could not initialize core structures.\n"); - goto err; - } - - pr_info("registered %s [%u/%u/%u/%u/%u]\n", - dev->name, dev->geo.ws_min, dev->geo.ws_opt, - dev->geo.num_chk, dev->geo.all_luns, - dev->geo.num_ch); - return 0; -err: - pr_err("failed to initialize nvm\n"); - return ret; -} - -struct nvm_dev *nvm_alloc_dev(int node) -{ - struct nvm_dev *dev; - - dev = kzalloc_node(sizeof(struct nvm_dev), GFP_KERNEL, node); - if (dev) - kref_init(&dev->ref); - - return dev; -} -EXPORT_SYMBOL(nvm_alloc_dev); - -int nvm_register(struct nvm_dev *dev) -{ - int ret, exp_pool_size; - - pr_warn_once("lightnvm support is deprecated and will be removed in Linux 5.15.\n"); - - if (!dev->q || !dev->ops) { - kref_put(&dev->ref, nvm_free); - return -EINVAL; - } - - ret = nvm_init(dev); - if (ret) { - kref_put(&dev->ref, nvm_free); - return ret; - } - - exp_pool_size = max_t(int, PAGE_SIZE, - (NVM_MAX_VLBA * (sizeof(u64) + dev->geo.sos))); - exp_pool_size = round_up(exp_pool_size, PAGE_SIZE); - - dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist", - exp_pool_size); - if (!dev->dma_pool) { - pr_err("could not create dma pool\n"); - kref_put(&dev->ref, nvm_free); - return -ENOMEM; - } - - /* register device with a supported media manager */ - down_write(&nvm_lock); - list_add(&dev->devices, &nvm_devices); - up_write(&nvm_lock); - - return 0; -} -EXPORT_SYMBOL(nvm_register); - -void nvm_unregister(struct nvm_dev *dev) -{ - struct nvm_target *t, *tmp; - - mutex_lock(&dev->mlock); - list_for_each_entry_safe(t, tmp, &dev->targets, list) { - if (t->dev->parent != dev) - continue; - __nvm_remove_target(t, false); - kref_put(&dev->ref, nvm_free); - } - mutex_unlock(&dev->mlock); - - down_write(&nvm_lock); - list_del(&dev->devices); - up_write(&nvm_lock); - - kref_put(&dev->ref, nvm_free); -} -EXPORT_SYMBOL(nvm_unregister); - -static int __nvm_configure_create(struct nvm_ioctl_create *create) -{ - struct nvm_dev *dev; - int ret; - - down_write(&nvm_lock); - dev = nvm_find_nvm_dev(create->dev); - up_write(&nvm_lock); - - if (!dev) { - pr_err("device not found\n"); - return -EINVAL; - } - - kref_get(&dev->ref); - ret = nvm_create_tgt(dev, create); - if (ret) - kref_put(&dev->ref, nvm_free); - - return ret; -} - -static long nvm_ioctl_info(struct file *file, void __user *arg) -{ - struct nvm_ioctl_info *info; - struct nvm_tgt_type *tt; - int tgt_iter = 0; - - info = memdup_user(arg, sizeof(struct nvm_ioctl_info)); - if (IS_ERR(info)) - return PTR_ERR(info); - - info->version[0] = NVM_VERSION_MAJOR; - info->version[1] = NVM_VERSION_MINOR; - info->version[2] = NVM_VERSION_PATCH; - - down_write(&nvm_tgtt_lock); - list_for_each_entry(tt, &nvm_tgt_types, list) { - struct nvm_ioctl_info_tgt *tgt = &info->tgts[tgt_iter]; - - tgt->version[0] = tt->version[0]; - tgt->version[1] = tt->version[1]; - tgt->version[2] = tt->version[2]; - strncpy(tgt->tgtname, tt->name, NVM_TTYPE_NAME_MAX); - - tgt_iter++; - } - - info->tgtsize = tgt_iter; - up_write(&nvm_tgtt_lock); - - if (copy_to_user(arg, info, sizeof(struct nvm_ioctl_info))) { - kfree(info); - return -EFAULT; - } - - kfree(info); - return 0; -} - -static long nvm_ioctl_get_devices(struct file *file, void __user *arg) -{ - struct nvm_ioctl_get_devices *devices; - struct nvm_dev *dev; - int i = 0; - - devices = kzalloc(sizeof(struct nvm_ioctl_get_devices), GFP_KERNEL); - if (!devices) - return -ENOMEM; - - down_write(&nvm_lock); - list_for_each_entry(dev, &nvm_devices, devices) { - struct nvm_ioctl_device_info *info = &devices->info[i]; - - strlcpy(info->devname, dev->name, sizeof(info->devname)); - - /* kept for compatibility */ - info->bmversion[0] = 1; - info->bmversion[1] = 0; - info->bmversion[2] = 0; - strlcpy(info->bmname, "gennvm", sizeof(info->bmname)); - i++; - - if (i >= ARRAY_SIZE(devices->info)) { - pr_err("max %zd devices can be reported.\n", - ARRAY_SIZE(devices->info)); - break; - } - } - up_write(&nvm_lock); - - devices->nr_devices = i; - - if (copy_to_user(arg, devices, - sizeof(struct nvm_ioctl_get_devices))) { - kfree(devices); - return -EFAULT; - } - - kfree(devices); - return 0; -} - -static long nvm_ioctl_dev_create(struct file *file, void __user *arg) -{ - struct nvm_ioctl_create create; - - if (copy_from_user(&create, arg, sizeof(struct nvm_ioctl_create))) - return -EFAULT; - - if (create.conf.type == NVM_CONFIG_TYPE_EXTENDED && - create.conf.e.rsv != 0) { - pr_err("reserved config field in use\n"); - return -EINVAL; - } - - create.dev[DISK_NAME_LEN - 1] = '\0'; - create.tgttype[NVM_TTYPE_NAME_MAX - 1] = '\0'; - create.tgtname[DISK_NAME_LEN - 1] = '\0'; - - if (create.flags != 0) { - __u32 flags = create.flags; - - /* Check for valid flags */ - if (flags & NVM_TARGET_FACTORY) - flags &= ~NVM_TARGET_FACTORY; - - if (flags) { - pr_err("flag not supported\n"); - return -EINVAL; - } - } - - return __nvm_configure_create(&create); -} - -static long nvm_ioctl_dev_remove(struct file *file, void __user *arg) -{ - struct nvm_ioctl_remove remove; - - if (copy_from_user(&remove, arg, sizeof(struct nvm_ioctl_remove))) - return -EFAULT; - - remove.tgtname[DISK_NAME_LEN - 1] = '\0'; - - if (remove.flags != 0) { - pr_err("no flags supported\n"); - return -EINVAL; - } - - return nvm_remove_tgt(&remove); -} - -/* kept for compatibility reasons */ -static long nvm_ioctl_dev_init(struct file *file, void __user *arg) -{ - struct nvm_ioctl_dev_init init; - - if (copy_from_user(&init, arg, sizeof(struct nvm_ioctl_dev_init))) - return -EFAULT; - - if (init.flags != 0) { - pr_err("no flags supported\n"); - return -EINVAL; - } - - return 0; -} - -/* Kept for compatibility reasons */ -static long nvm_ioctl_dev_factory(struct file *file, void __user *arg) -{ - struct nvm_ioctl_dev_factory fact; - - if (copy_from_user(&fact, arg, sizeof(struct nvm_ioctl_dev_factory))) - return -EFAULT; - - fact.dev[DISK_NAME_LEN - 1] = '\0'; - - if (fact.flags & ~(NVM_FACTORY_NR_BITS - 1)) - return -EINVAL; - - return 0; -} - -static long nvm_ctl_ioctl(struct file *file, uint cmd, unsigned long arg) -{ - void __user *argp = (void __user *)arg; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - switch (cmd) { - case NVM_INFO: - return nvm_ioctl_info(file, argp); - case NVM_GET_DEVICES: - return nvm_ioctl_get_devices(file, argp); - case NVM_DEV_CREATE: - return nvm_ioctl_dev_create(file, argp); - case NVM_DEV_REMOVE: - return nvm_ioctl_dev_remove(file, argp); - case NVM_DEV_INIT: - return nvm_ioctl_dev_init(file, argp); - case NVM_DEV_FACTORY: - return nvm_ioctl_dev_factory(file, argp); - } - return 0; -} - -static const struct file_operations _ctl_fops = { - .open = nonseekable_open, - .unlocked_ioctl = nvm_ctl_ioctl, - .owner = THIS_MODULE, - .llseek = noop_llseek, -}; - -static struct miscdevice _nvm_misc = { - .minor = MISC_DYNAMIC_MINOR, - .name = "lightnvm", - .nodename = "lightnvm/control", - .fops = &_ctl_fops, -}; -builtin_misc_device(_nvm_misc); diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c deleted file mode 100644 index f185f1a00008..000000000000 --- a/drivers/lightnvm/pblk-cache.c +++ /dev/null @@ -1,137 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2016 CNEX Labs - * Initial release: Javier Gonzalez <javier@cnexlabs.com> - * Matias Bjorling <matias@cnexlabs.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * pblk-cache.c - pblk's write cache - */ - -#include "pblk.h" - -void pblk_write_to_cache(struct pblk *pblk, struct bio *bio, - unsigned long flags) -{ - struct pblk_w_ctx w_ctx; - sector_t lba = pblk_get_lba(bio); - unsigned long start_time; - unsigned int bpos, pos; - int nr_entries = pblk_get_secs(bio); - int i, ret; - - start_time = bio_start_io_acct(bio); - - /* Update the write buffer head (mem) with the entries that we can - * write. The write in itself cannot fail, so there is no need to - * rollback from here on. - */ -retry: - ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos); - switch (ret) { - case NVM_IO_REQUEUE: - io_schedule(); - goto retry; - case NVM_IO_ERR: - pblk_pipeline_stop(pblk); - bio_io_error(bio); - goto out; - } - - pblk_ppa_set_empty(&w_ctx.ppa); - w_ctx.flags = flags; - if (bio->bi_opf & REQ_PREFLUSH) { - w_ctx.flags |= PBLK_FLUSH_ENTRY; - pblk_write_kick(pblk); - } - - if (unlikely(!bio_has_data(bio))) - goto out; - - for (i = 0; i < nr_entries; i++) { - void *data = bio_data(bio); - - w_ctx.lba = lba + i; - - pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + i); - pblk_rb_write_entry_user(&pblk->rwb, data, w_ctx, pos); - - bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE); - } - - atomic64_add(nr_entries, &pblk->user_wa); - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_add(nr_entries, &pblk->inflight_writes); - atomic_long_add(nr_entries, &pblk->req_writes); -#endif - - pblk_rl_inserted(&pblk->rl, nr_entries); - -out: - bio_end_io_acct(bio, start_time); - pblk_write_should_kick(pblk); - - if (ret == NVM_IO_DONE) - bio_endio(bio); -} - -/* - * On GC the incoming lbas are not necessarily sequential. Also, some of the - * lbas might not be valid entries, which are marked as empty by the GC thread - */ -int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq) -{ - struct pblk_w_ctx w_ctx; - unsigned int bpos, pos; - void *data = gc_rq->data; - int i, valid_entries; - - /* Update the write buffer head (mem) with the entries that we can - * write. The write in itself cannot fail, so there is no need to - * rollback from here on. - */ -retry: - if (!pblk_rb_may_write_gc(&pblk->rwb, gc_rq->secs_to_gc, &bpos)) { - io_schedule(); - goto retry; - } - - w_ctx.flags = PBLK_IOTYPE_GC; - pblk_ppa_set_empty(&w_ctx.ppa); - - for (i = 0, valid_entries = 0; i < gc_rq->nr_secs; i++) { - if (gc_rq->lba_list[i] == ADDR_EMPTY) - continue; - - w_ctx.lba = gc_rq->lba_list[i]; - - pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + valid_entries); - pblk_rb_write_entry_gc(&pblk->rwb, data, w_ctx, gc_rq->line, - gc_rq->paddr_list[i], pos); - - data += PBLK_EXPOSED_PAGE_SIZE; - valid_entries++; - } - - WARN_ONCE(gc_rq->secs_to_gc != valid_entries, - "pblk: inconsistent GC write\n"); - - atomic64_add(valid_entries, &pblk->gc_wa); - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_add(valid_entries, &pblk->inflight_writes); - atomic_long_add(valid_entries, &pblk->recov_gc_writes); -#endif - - pblk_write_should_kick(pblk); - return NVM_IO_OK; -} diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c deleted file mode 100644 index 33d39d3dd343..000000000000 --- a/drivers/lightnvm/pblk-core.c +++ /dev/null @@ -1,2151 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2016 CNEX Labs - * Initial release: Javier Gonzalez <javier@cnexlabs.com> - * Matias Bjorling <matias@cnexlabs.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * pblk-core.c - pblk's core functionality - * - */ - -#define CREATE_TRACE_POINTS - -#include "pblk.h" -#include "pblk-trace.h" - -static void pblk_line_mark_bb(struct work_struct *work) -{ - struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws, - ws); - struct pblk *pblk = line_ws->pblk; - struct nvm_tgt_dev *dev = pblk->dev; - struct ppa_addr *ppa = line_ws->priv; - int ret; - - ret = nvm_set_chunk_meta(dev, ppa, 1, NVM_BLK_T_GRWN_BAD); - if (ret) { - struct pblk_line *line; - int pos; - - line = pblk_ppa_to_line(pblk, *ppa); - pos = pblk_ppa_to_pos(&dev->geo, *ppa); - - pblk_err(pblk, "failed to mark bb, line:%d, pos:%d\n", - line->id, pos); - } - - kfree(ppa); - mempool_free(line_ws, &pblk->gen_ws_pool); -} - -static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line, - struct ppa_addr ppa_addr) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct ppa_addr *ppa; - int pos = pblk_ppa_to_pos(geo, ppa_addr); - - pblk_debug(pblk, "erase failed: line:%d, pos:%d\n", line->id, pos); - atomic_long_inc(&pblk->erase_failed); - - atomic_dec(&line->blk_in_line); - if (test_and_set_bit(pos, line->blk_bitmap)) - pblk_err(pblk, "attempted to erase bb: line:%d, pos:%d\n", - line->id, pos); - - /* Not necessary to mark bad blocks on 2.0 spec. */ - if (geo->version == NVM_OCSSD_SPEC_20) - return; - - ppa = kmalloc(sizeof(struct ppa_addr), GFP_ATOMIC); - if (!ppa) - return; - - *ppa = ppa_addr; - pblk_gen_run_ws(pblk, NULL, ppa, pblk_line_mark_bb, - GFP_ATOMIC, pblk->bb_wq); -} - -static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct nvm_chk_meta *chunk; - struct pblk_line *line; - int pos; - - line = pblk_ppa_to_line(pblk, rqd->ppa_addr); - pos = pblk_ppa_to_pos(geo, rqd->ppa_addr); - chunk = &line->chks[pos]; - - atomic_dec(&line->left_seblks); - - if (rqd->error) { - trace_pblk_chunk_reset(pblk_disk_name(pblk), - &rqd->ppa_addr, PBLK_CHUNK_RESET_FAILED); - - chunk->state = NVM_CHK_ST_OFFLINE; - pblk_mark_bb(pblk, line, rqd->ppa_addr); - } else { - trace_pblk_chunk_reset(pblk_disk_name(pblk), - &rqd->ppa_addr, PBLK_CHUNK_RESET_DONE); - - chunk->state = NVM_CHK_ST_FREE; - } - - trace_pblk_chunk_state(pblk_disk_name(pblk), &rqd->ppa_addr, - chunk->state); - - atomic_dec(&pblk->inflight_io); -} - -/* Erase completion assumes that only one block is erased at the time */ -static void pblk_end_io_erase(struct nvm_rq *rqd) -{ - struct pblk *pblk = rqd->private; - - __pblk_end_io_erase(pblk, rqd); - mempool_free(rqd, &pblk->e_rq_pool); -} - -/* - * Get information for all chunks from the device. - * - * The caller is responsible for freeing (vmalloc) the returned structure - */ -struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct nvm_chk_meta *meta; - struct ppa_addr ppa; - unsigned long len; - int ret; - - ppa.ppa = 0; - - len = geo->all_chunks * sizeof(*meta); - meta = vzalloc(len); - if (!meta) - return ERR_PTR(-ENOMEM); - - ret = nvm_get_chunk_meta(dev, ppa, geo->all_chunks, meta); - if (ret) { - vfree(meta); - return ERR_PTR(-EIO); - } - - return meta; -} - -struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, - struct nvm_chk_meta *meta, - struct ppa_addr ppa) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - int ch_off = ppa.m.grp * geo->num_chk * geo->num_lun; - int lun_off = ppa.m.pu * geo->num_chk; - int chk_off = ppa.m.chk; - - return meta + ch_off + lun_off + chk_off; -} - -void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, - u64 paddr) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct list_head *move_list = NULL; - - /* Lines being reclaimed (GC'ed) cannot be invalidated. Before the L2P - * table is modified with reclaimed sectors, a check is done to endure - * that newer updates are not overwritten. - */ - spin_lock(&line->lock); - WARN_ON(line->state == PBLK_LINESTATE_FREE); - - if (test_and_set_bit(paddr, line->invalid_bitmap)) { - WARN_ONCE(1, "pblk: double invalidate\n"); - spin_unlock(&line->lock); - return; - } - le32_add_cpu(line->vsc, -1); - - if (line->state == PBLK_LINESTATE_CLOSED) - move_list = pblk_line_gc_list(pblk, line); - spin_unlock(&line->lock); - - if (move_list) { - spin_lock(&l_mg->gc_lock); - spin_lock(&line->lock); - /* Prevent moving a line that has just been chosen for GC */ - if (line->state == PBLK_LINESTATE_GC) { - spin_unlock(&line->lock); - spin_unlock(&l_mg->gc_lock); - return; - } - spin_unlock(&line->lock); - - list_move_tail(&line->list, move_list); - spin_unlock(&l_mg->gc_lock); - } -} - -void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa) -{ - struct pblk_line *line; - u64 paddr; - -#ifdef CONFIG_NVM_PBLK_DEBUG - /* Callers must ensure that the ppa points to a device address */ - BUG_ON(pblk_addr_in_cache(ppa)); - BUG_ON(pblk_ppa_empty(ppa)); -#endif - - line = pblk_ppa_to_line(pblk, ppa); - paddr = pblk_dev_ppa_to_line_addr(pblk, ppa); - - __pblk_map_invalidate(pblk, line, paddr); -} - -static void pblk_invalidate_range(struct pblk *pblk, sector_t slba, - unsigned int nr_secs) -{ - sector_t lba; - - spin_lock(&pblk->trans_lock); - for (lba = slba; lba < slba + nr_secs; lba++) { - struct ppa_addr ppa; - - ppa = pblk_trans_map_get(pblk, lba); - - if (!pblk_addr_in_cache(ppa) && !pblk_ppa_empty(ppa)) - pblk_map_invalidate(pblk, ppa); - - pblk_ppa_set_empty(&ppa); - pblk_trans_map_set(pblk, lba, ppa); - } - spin_unlock(&pblk->trans_lock); -} - -int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd) -{ - struct nvm_tgt_dev *dev = pblk->dev; - - rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, - &rqd->dma_meta_list); - if (!rqd->meta_list) - return -ENOMEM; - - if (rqd->nr_ppas == 1) - return 0; - - rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size(pblk); - rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size(pblk); - - return 0; -} - -void pblk_free_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd) -{ - struct nvm_tgt_dev *dev = pblk->dev; - - if (rqd->meta_list) - nvm_dev_dma_free(dev->parent, rqd->meta_list, - rqd->dma_meta_list); -} - -/* Caller must guarantee that the request is a valid type */ -struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type) -{ - mempool_t *pool; - struct nvm_rq *rqd; - int rq_size; - - switch (type) { - case PBLK_WRITE: - case PBLK_WRITE_INT: - pool = &pblk->w_rq_pool; - rq_size = pblk_w_rq_size; - break; - case PBLK_READ: - pool = &pblk->r_rq_pool; - rq_size = pblk_g_rq_size; - break; - default: - pool = &pblk->e_rq_pool; - rq_size = pblk_g_rq_size; - } - - rqd = mempool_alloc(pool, GFP_KERNEL); - memset(rqd, 0, rq_size); - - return rqd; -} - -/* Typically used on completion path. Cannot guarantee request consistency */ -void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type) -{ - mempool_t *pool; - - switch (type) { - case PBLK_WRITE: - kfree(((struct pblk_c_ctx *)nvm_rq_to_pdu(rqd))->lun_bitmap); - fallthrough; - case PBLK_WRITE_INT: - pool = &pblk->w_rq_pool; - break; - case PBLK_READ: - pool = &pblk->r_rq_pool; - break; - case PBLK_ERASE: - pool = &pblk->e_rq_pool; - break; - default: - pblk_err(pblk, "trying to free unknown rqd type\n"); - return; - } - - pblk_free_rqd_meta(pblk, rqd); - mempool_free(rqd, pool); -} - -void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off, - int nr_pages) -{ - struct bio_vec *bv; - struct page *page; - int i, e, nbv = 0; - - for (i = 0; i < bio->bi_vcnt; i++) { - bv = &bio->bi_io_vec[i]; - page = bv->bv_page; - for (e = 0; e < bv->bv_len; e += PBLK_EXPOSED_PAGE_SIZE, nbv++) - if (nbv >= off) - mempool_free(page++, &pblk->page_bio_pool); - } -} - -int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags, - int nr_pages) -{ - struct request_queue *q = pblk->dev->q; - struct page *page; - int i, ret; - - for (i = 0; i < nr_pages; i++) { - page = mempool_alloc(&pblk->page_bio_pool, flags); - - ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0); - if (ret != PBLK_EXPOSED_PAGE_SIZE) { - pblk_err(pblk, "could not add page to bio\n"); - mempool_free(page, &pblk->page_bio_pool); - goto err; - } - } - - return 0; -err: - pblk_bio_free_pages(pblk, bio, (bio->bi_vcnt - i), i); - return -1; -} - -void pblk_write_kick(struct pblk *pblk) -{ - wake_up_process(pblk->writer_ts); - mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(1000)); -} - -void pblk_write_timer_fn(struct timer_list *t) -{ - struct pblk *pblk = from_timer(pblk, t, wtimer); - - /* kick the write thread every tick to flush outstanding data */ - pblk_write_kick(pblk); -} - -void pblk_write_should_kick(struct pblk *pblk) -{ - unsigned int secs_avail = pblk_rb_read_count(&pblk->rwb); - - if (secs_avail >= pblk->min_write_pgs_data) - pblk_write_kick(pblk); -} - -static void pblk_wait_for_meta(struct pblk *pblk) -{ - do { - if (!atomic_read(&pblk->inflight_io)) - break; - - schedule(); - } while (1); -} - -static void pblk_flush_writer(struct pblk *pblk) -{ - pblk_rb_flush(&pblk->rwb); - do { - if (!pblk_rb_sync_count(&pblk->rwb)) - break; - - pblk_write_kick(pblk); - schedule(); - } while (1); -} - -struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct list_head *move_list = NULL; - int packed_meta = (le32_to_cpu(*line->vsc) / pblk->min_write_pgs_data) - * (pblk->min_write_pgs - pblk->min_write_pgs_data); - int vsc = le32_to_cpu(*line->vsc) + packed_meta; - - lockdep_assert_held(&line->lock); - - if (line->w_err_gc->has_write_err) { - if (line->gc_group != PBLK_LINEGC_WERR) { - line->gc_group = PBLK_LINEGC_WERR; - move_list = &l_mg->gc_werr_list; - pblk_rl_werr_line_in(&pblk->rl); - } - } else if (!vsc) { - if (line->gc_group != PBLK_LINEGC_FULL) { - line->gc_group = PBLK_LINEGC_FULL; - move_list = &l_mg->gc_full_list; - } - } else if (vsc < lm->high_thrs) { - if (line->gc_group != PBLK_LINEGC_HIGH) { - line->gc_group = PBLK_LINEGC_HIGH; - move_list = &l_mg->gc_high_list; - } - } else if (vsc < lm->mid_thrs) { - if (line->gc_group != PBLK_LINEGC_MID) { - line->gc_group = PBLK_LINEGC_MID; - move_list = &l_mg->gc_mid_list; - } - } else if (vsc < line->sec_in_line) { - if (line->gc_group != PBLK_LINEGC_LOW) { - line->gc_group = PBLK_LINEGC_LOW; - move_list = &l_mg->gc_low_list; - } - } else if (vsc == line->sec_in_line) { - if (line->gc_group != PBLK_LINEGC_EMPTY) { - line->gc_group = PBLK_LINEGC_EMPTY; - move_list = &l_mg->gc_empty_list; - } - } else { - line->state = PBLK_LINESTATE_CORRUPT; - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - - line->gc_group = PBLK_LINEGC_NONE; - move_list = &l_mg->corrupt_list; - pblk_err(pblk, "corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n", - line->id, vsc, - line->sec_in_line, - lm->high_thrs, lm->mid_thrs); - } - - return move_list; -} - -void pblk_discard(struct pblk *pblk, struct bio *bio) -{ - sector_t slba = pblk_get_lba(bio); - sector_t nr_secs = pblk_get_secs(bio); - - pblk_invalidate_range(pblk, slba, nr_secs); -} - -void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd) -{ - atomic_long_inc(&pblk->write_failed); -#ifdef CONFIG_NVM_PBLK_DEBUG - pblk_print_failed_rqd(pblk, rqd, rqd->error); -#endif -} - -void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd) -{ - /* Empty page read is not necessarily an error (e.g., L2P recovery) */ - if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) { - atomic_long_inc(&pblk->read_empty); - return; - } - - switch (rqd->error) { - case NVM_RSP_WARN_HIGHECC: - atomic_long_inc(&pblk->read_high_ecc); - break; - case NVM_RSP_ERR_FAILECC: - case NVM_RSP_ERR_FAILCRC: - atomic_long_inc(&pblk->read_failed); - break; - default: - pblk_err(pblk, "unknown read error:%d\n", rqd->error); - } -#ifdef CONFIG_NVM_PBLK_DEBUG - pblk_print_failed_rqd(pblk, rqd, rqd->error); -#endif -} - -void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write) -{ - pblk->sec_per_write = sec_per_write; -} - -int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd, void *buf) -{ - struct nvm_tgt_dev *dev = pblk->dev; - - atomic_inc(&pblk->inflight_io); - -#ifdef CONFIG_NVM_PBLK_DEBUG - if (pblk_check_io(pblk, rqd)) - return NVM_IO_ERR; -#endif - - return nvm_submit_io(dev, rqd, buf); -} - -void pblk_check_chunk_state_update(struct pblk *pblk, struct nvm_rq *rqd) -{ - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - - int i; - - for (i = 0; i < rqd->nr_ppas; i++) { - struct ppa_addr *ppa = &ppa_list[i]; - struct nvm_chk_meta *chunk = pblk_dev_ppa_to_chunk(pblk, *ppa); - u64 caddr = pblk_dev_ppa_to_chunk_addr(pblk, *ppa); - - if (caddr == 0) - trace_pblk_chunk_state(pblk_disk_name(pblk), - ppa, NVM_CHK_ST_OPEN); - else if (caddr == (chunk->cnlb - 1)) - trace_pblk_chunk_state(pblk_disk_name(pblk), - ppa, NVM_CHK_ST_CLOSED); - } -} - -int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd, void *buf) -{ - struct nvm_tgt_dev *dev = pblk->dev; - int ret; - - atomic_inc(&pblk->inflight_io); - -#ifdef CONFIG_NVM_PBLK_DEBUG - if (pblk_check_io(pblk, rqd)) - return NVM_IO_ERR; -#endif - - ret = nvm_submit_io_sync(dev, rqd, buf); - - if (trace_pblk_chunk_state_enabled() && !ret && - rqd->opcode == NVM_OP_PWRITE) - pblk_check_chunk_state_update(pblk, rqd); - - return ret; -} - -static int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd, - void *buf) -{ - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - int ret; - - pblk_down_chunk(pblk, ppa_list[0]); - ret = pblk_submit_io_sync(pblk, rqd, buf); - pblk_up_chunk(pblk, ppa_list[0]); - - return ret; -} - -int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail, - unsigned long secs_to_flush, bool skip_meta) -{ - int max = pblk->sec_per_write; - int min = pblk->min_write_pgs; - int secs_to_sync = 0; - - if (skip_meta && pblk->min_write_pgs_data != pblk->min_write_pgs) - min = max = pblk->min_write_pgs_data; - - if (secs_avail >= max) - secs_to_sync = max; - else if (secs_avail >= min) - secs_to_sync = min * (secs_avail / min); - else if (secs_to_flush) - secs_to_sync = min; - - return secs_to_sync; -} - -void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs) -{ - u64 addr; - int i; - - spin_lock(&line->lock); - addr = find_next_zero_bit(line->map_bitmap, - pblk->lm.sec_per_line, line->cur_sec); - line->cur_sec = addr - nr_secs; - - for (i = 0; i < nr_secs; i++, line->cur_sec--) - WARN_ON(!test_and_clear_bit(line->cur_sec, line->map_bitmap)); - spin_unlock(&line->lock); -} - -u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs) -{ - u64 addr; - int i; - - lockdep_assert_held(&line->lock); - - /* logic error: ppa out-of-bounds. Prevent generating bad address */ - if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) { - WARN(1, "pblk: page allocation out of bounds\n"); - nr_secs = pblk->lm.sec_per_line - line->cur_sec; - } - - line->cur_sec = addr = find_next_zero_bit(line->map_bitmap, - pblk->lm.sec_per_line, line->cur_sec); - for (i = 0; i < nr_secs; i++, line->cur_sec++) - WARN_ON(test_and_set_bit(line->cur_sec, line->map_bitmap)); - - return addr; -} - -u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs) -{ - u64 addr; - - /* Lock needed in case a write fails and a recovery needs to remap - * failed write buffer entries - */ - spin_lock(&line->lock); - addr = __pblk_alloc_page(pblk, line, nr_secs); - line->left_msecs -= nr_secs; - WARN(line->left_msecs < 0, "pblk: page allocation out of bounds\n"); - spin_unlock(&line->lock); - - return addr; -} - -u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line) -{ - u64 paddr; - - spin_lock(&line->lock); - paddr = find_next_zero_bit(line->map_bitmap, - pblk->lm.sec_per_line, line->cur_sec); - spin_unlock(&line->lock); - - return paddr; -} - -u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - int bit; - - /* This usually only happens on bad lines */ - bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); - if (bit >= lm->blk_per_line) - return -1; - - return bit * geo->ws_opt; -} - -int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_meta *lm = &pblk->lm; - struct ppa_addr *ppa_list; - struct nvm_rq rqd; - u64 paddr = pblk_line_smeta_start(pblk, line); - int i, ret; - - memset(&rqd, 0, sizeof(struct nvm_rq)); - - ret = pblk_alloc_rqd_meta(pblk, &rqd); - if (ret) - return ret; - - rqd.opcode = NVM_OP_PREAD; - rqd.nr_ppas = lm->smeta_sec; - rqd.is_seq = 1; - ppa_list = nvm_rq_to_ppa_list(&rqd); - - for (i = 0; i < lm->smeta_sec; i++, paddr++) - ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); - - ret = pblk_submit_io_sync(pblk, &rqd, line->smeta); - if (ret) { - pblk_err(pblk, "smeta I/O submission failed: %d\n", ret); - goto clear_rqd; - } - - atomic_dec(&pblk->inflight_io); - - if (rqd.error && rqd.error != NVM_RSP_WARN_HIGHECC) { - pblk_log_read_err(pblk, &rqd); - ret = -EIO; - } - -clear_rqd: - pblk_free_rqd_meta(pblk, &rqd); - return ret; -} - -static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line, - u64 paddr) -{ - struct pblk_line_meta *lm = &pblk->lm; - struct ppa_addr *ppa_list; - struct nvm_rq rqd; - __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf); - __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); - int i, ret; - - memset(&rqd, 0, sizeof(struct nvm_rq)); - - ret = pblk_alloc_rqd_meta(pblk, &rqd); - if (ret) - return ret; - - rqd.opcode = NVM_OP_PWRITE; - rqd.nr_ppas = lm->smeta_sec; - rqd.is_seq = 1; - ppa_list = nvm_rq_to_ppa_list(&rqd); - - for (i = 0; i < lm->smeta_sec; i++, paddr++) { - struct pblk_sec_meta *meta = pblk_get_meta(pblk, - rqd.meta_list, i); - - ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); - meta->lba = lba_list[paddr] = addr_empty; - } - - ret = pblk_submit_io_sync_sem(pblk, &rqd, line->smeta); - if (ret) { - pblk_err(pblk, "smeta I/O submission failed: %d\n", ret); - goto clear_rqd; - } - - atomic_dec(&pblk->inflight_io); - - if (rqd.error) { - pblk_log_write_err(pblk, &rqd); - ret = -EIO; - } - -clear_rqd: - pblk_free_rqd_meta(pblk, &rqd); - return ret; -} - -int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line, - void *emeta_buf) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - void *ppa_list_buf, *meta_list; - struct ppa_addr *ppa_list; - struct nvm_rq rqd; - u64 paddr = line->emeta_ssec; - dma_addr_t dma_ppa_list, dma_meta_list; - int min = pblk->min_write_pgs; - int left_ppas = lm->emeta_sec[0]; - int line_id = line->id; - int rq_ppas, rq_len; - int i, j; - int ret; - - meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, - &dma_meta_list); - if (!meta_list) - return -ENOMEM; - - ppa_list_buf = meta_list + pblk_dma_meta_size(pblk); - dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk); - -next_rq: - memset(&rqd, 0, sizeof(struct nvm_rq)); - - rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false); - rq_len = rq_ppas * geo->csecs; - - rqd.meta_list = meta_list; - rqd.ppa_list = ppa_list_buf; - rqd.dma_meta_list = dma_meta_list; - rqd.dma_ppa_list = dma_ppa_list; - rqd.opcode = NVM_OP_PREAD; - rqd.nr_ppas = rq_ppas; - ppa_list = nvm_rq_to_ppa_list(&rqd); - - for (i = 0; i < rqd.nr_ppas; ) { - struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, line_id); - int pos = pblk_ppa_to_pos(geo, ppa); - - if (pblk_io_aligned(pblk, rq_ppas)) - rqd.is_seq = 1; - - while (test_bit(pos, line->blk_bitmap)) { - paddr += min; - if (pblk_boundary_paddr_checks(pblk, paddr)) { - ret = -EINTR; - goto free_rqd_dma; - } - - ppa = addr_to_gen_ppa(pblk, paddr, line_id); - pos = pblk_ppa_to_pos(geo, ppa); - } - - if (pblk_boundary_paddr_checks(pblk, paddr + min)) { - ret = -EINTR; - goto free_rqd_dma; - } - - for (j = 0; j < min; j++, i++, paddr++) - ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line_id); - } - - ret = pblk_submit_io_sync(pblk, &rqd, emeta_buf); - if (ret) { - pblk_err(pblk, "emeta I/O submission failed: %d\n", ret); - goto free_rqd_dma; - } - - atomic_dec(&pblk->inflight_io); - - if (rqd.error && rqd.error != NVM_RSP_WARN_HIGHECC) { - pblk_log_read_err(pblk, &rqd); - ret = -EIO; - goto free_rqd_dma; - } - - emeta_buf += rq_len; - left_ppas -= rq_ppas; - if (left_ppas) - goto next_rq; - -free_rqd_dma: - nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); - return ret; -} - -static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd, - struct ppa_addr ppa) -{ - rqd->opcode = NVM_OP_ERASE; - rqd->ppa_addr = ppa; - rqd->nr_ppas = 1; - rqd->is_seq = 1; - rqd->bio = NULL; -} - -static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa) -{ - struct nvm_rq rqd = {NULL}; - int ret; - - trace_pblk_chunk_reset(pblk_disk_name(pblk), &ppa, - PBLK_CHUNK_RESET_START); - - pblk_setup_e_rq(pblk, &rqd, ppa); - - /* The write thread schedules erases so that it minimizes disturbances - * with writes. Thus, there is no need to take the LUN semaphore. - */ - ret = pblk_submit_io_sync(pblk, &rqd, NULL); - rqd.private = pblk; - __pblk_end_io_erase(pblk, &rqd); - - return ret; -} - -int pblk_line_erase(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_meta *lm = &pblk->lm; - struct ppa_addr ppa; - int ret, bit = -1; - - /* Erase only good blocks, one at a time */ - do { - spin_lock(&line->lock); - bit = find_next_zero_bit(line->erase_bitmap, lm->blk_per_line, - bit + 1); - if (bit >= lm->blk_per_line) { - spin_unlock(&line->lock); - break; - } - - ppa = pblk->luns[bit].bppa; /* set ch and lun */ - ppa.a.blk = line->id; - - atomic_dec(&line->left_eblks); - WARN_ON(test_and_set_bit(bit, line->erase_bitmap)); - spin_unlock(&line->lock); - - ret = pblk_blk_erase_sync(pblk, ppa); - if (ret) { - pblk_err(pblk, "failed to erase line %d\n", line->id); - return ret; - } - } while (1); - - return 0; -} - -static void pblk_line_setup_metadata(struct pblk_line *line, - struct pblk_line_mgmt *l_mg, - struct pblk_line_meta *lm) -{ - int meta_line; - - lockdep_assert_held(&l_mg->free_lock); - -retry_meta: - meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES); - if (meta_line == PBLK_DATA_LINES) { - spin_unlock(&l_mg->free_lock); - io_schedule(); - spin_lock(&l_mg->free_lock); - goto retry_meta; - } - - set_bit(meta_line, &l_mg->meta_bitmap); - line->meta_line = meta_line; - - line->smeta = l_mg->sline_meta[meta_line]; - line->emeta = l_mg->eline_meta[meta_line]; - - memset(line->smeta, 0, lm->smeta_len); - memset(line->emeta->buf, 0, lm->emeta_len[0]); - - line->emeta->mem = 0; - atomic_set(&line->emeta->sync, 0); -} - -/* For now lines are always assumed full lines. Thus, smeta former and current - * lun bitmaps are omitted. - */ -static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line, - struct pblk_line *cur) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_emeta *emeta = line->emeta; - struct line_emeta *emeta_buf = emeta->buf; - struct line_smeta *smeta_buf = (struct line_smeta *)line->smeta; - int nr_blk_line; - - /* After erasing the line, new bad blocks might appear and we risk - * having an invalid line - */ - nr_blk_line = lm->blk_per_line - - bitmap_weight(line->blk_bitmap, lm->blk_per_line); - if (nr_blk_line < lm->min_blk_line) { - spin_lock(&l_mg->free_lock); - spin_lock(&line->lock); - line->state = PBLK_LINESTATE_BAD; - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - spin_unlock(&line->lock); - - list_add_tail(&line->list, &l_mg->bad_list); - spin_unlock(&l_mg->free_lock); - - pblk_debug(pblk, "line %d is bad\n", line->id); - - return 0; - } - - /* Run-time metadata */ - line->lun_bitmap = ((void *)(smeta_buf)) + sizeof(struct line_smeta); - - /* Mark LUNs allocated in this line (all for now) */ - bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len); - - smeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC); - export_guid(smeta_buf->header.uuid, &pblk->instance_uuid); - smeta_buf->header.id = cpu_to_le32(line->id); - smeta_buf->header.type = cpu_to_le16(line->type); - smeta_buf->header.version_major = SMETA_VERSION_MAJOR; - smeta_buf->header.version_minor = SMETA_VERSION_MINOR; - - /* Start metadata */ - smeta_buf->seq_nr = cpu_to_le64(line->seq_nr); - smeta_buf->window_wr_lun = cpu_to_le32(geo->all_luns); - - /* Fill metadata among lines */ - if (cur) { - memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len); - smeta_buf->prev_id = cpu_to_le32(cur->id); - cur->emeta->buf->next_id = cpu_to_le32(line->id); - } else { - smeta_buf->prev_id = cpu_to_le32(PBLK_LINE_EMPTY); - } - - /* All smeta must be set at this point */ - smeta_buf->header.crc = cpu_to_le32( - pblk_calc_meta_header_crc(pblk, &smeta_buf->header)); - smeta_buf->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta_buf)); - - /* End metadata */ - memcpy(&emeta_buf->header, &smeta_buf->header, - sizeof(struct line_header)); - - emeta_buf->header.version_major = EMETA_VERSION_MAJOR; - emeta_buf->header.version_minor = EMETA_VERSION_MINOR; - emeta_buf->header.crc = cpu_to_le32( - pblk_calc_meta_header_crc(pblk, &emeta_buf->header)); - - emeta_buf->seq_nr = cpu_to_le64(line->seq_nr); - emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line); - emeta_buf->nr_valid_lbas = cpu_to_le64(0); - emeta_buf->next_id = cpu_to_le32(PBLK_LINE_EMPTY); - emeta_buf->crc = cpu_to_le32(0); - emeta_buf->prev_id = smeta_buf->prev_id; - - return 1; -} - -static int pblk_line_alloc_bitmaps(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - - line->map_bitmap = mempool_alloc(l_mg->bitmap_pool, GFP_KERNEL); - if (!line->map_bitmap) - return -ENOMEM; - - memset(line->map_bitmap, 0, lm->sec_bitmap_len); - - /* will be initialized using bb info from map_bitmap */ - line->invalid_bitmap = mempool_alloc(l_mg->bitmap_pool, GFP_KERNEL); - if (!line->invalid_bitmap) { - mempool_free(line->map_bitmap, l_mg->bitmap_pool); - line->map_bitmap = NULL; - return -ENOMEM; - } - - return 0; -} - -/* For now lines are always assumed full lines. Thus, smeta former and current - * lun bitmaps are omitted. - */ -static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, - int init) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - u64 off; - int bit = -1; - int emeta_secs; - - line->sec_in_line = lm->sec_per_line; - - /* Capture bad block information on line mapping bitmaps */ - while ((bit = find_next_bit(line->blk_bitmap, lm->blk_per_line, - bit + 1)) < lm->blk_per_line) { - off = bit * geo->ws_opt; - bitmap_shift_left(l_mg->bb_aux, l_mg->bb_template, off, - lm->sec_per_line); - bitmap_or(line->map_bitmap, line->map_bitmap, l_mg->bb_aux, - lm->sec_per_line); - line->sec_in_line -= geo->clba; - } - - /* Mark smeta metadata sectors as bad sectors */ - bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); - off = bit * geo->ws_opt; - bitmap_set(line->map_bitmap, off, lm->smeta_sec); - line->sec_in_line -= lm->smeta_sec; - line->cur_sec = off + lm->smeta_sec; - - if (init && pblk_line_smeta_write(pblk, line, off)) { - pblk_debug(pblk, "line smeta I/O failed. Retry\n"); - return 0; - } - - bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line); - - /* Mark emeta metadata sectors as bad sectors. We need to consider bad - * blocks to make sure that there are enough sectors to store emeta - */ - emeta_secs = lm->emeta_sec[0]; - off = lm->sec_per_line; - while (emeta_secs) { - off -= geo->ws_opt; - if (!test_bit(off, line->invalid_bitmap)) { - bitmap_set(line->invalid_bitmap, off, geo->ws_opt); - emeta_secs -= geo->ws_opt; - } - } - - line->emeta_ssec = off; - line->sec_in_line -= lm->emeta_sec[0]; - line->nr_valid_lbas = 0; - line->left_msecs = line->sec_in_line; - *line->vsc = cpu_to_le32(line->sec_in_line); - - if (lm->sec_per_line - line->sec_in_line != - bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) { - spin_lock(&line->lock); - line->state = PBLK_LINESTATE_BAD; - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - spin_unlock(&line->lock); - - list_add_tail(&line->list, &l_mg->bad_list); - pblk_err(pblk, "unexpected line %d is bad\n", line->id); - - return 0; - } - - return 1; -} - -static int pblk_prepare_new_line(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_meta *lm = &pblk->lm; - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - int blk_to_erase = atomic_read(&line->blk_in_line); - int i; - - for (i = 0; i < lm->blk_per_line; i++) { - struct pblk_lun *rlun = &pblk->luns[i]; - int pos = pblk_ppa_to_pos(geo, rlun->bppa); - int state = line->chks[pos].state; - - /* Free chunks should not be erased */ - if (state & NVM_CHK_ST_FREE) { - set_bit(pblk_ppa_to_pos(geo, rlun->bppa), - line->erase_bitmap); - blk_to_erase--; - } - } - - return blk_to_erase; -} - -static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_meta *lm = &pblk->lm; - int blk_in_line = atomic_read(&line->blk_in_line); - int blk_to_erase; - - /* Bad blocks do not need to be erased */ - bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line); - - spin_lock(&line->lock); - - /* If we have not written to this line, we need to mark up free chunks - * as already erased - */ - if (line->state == PBLK_LINESTATE_NEW) { - blk_to_erase = pblk_prepare_new_line(pblk, line); - line->state = PBLK_LINESTATE_FREE; - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - } else { - blk_to_erase = blk_in_line; - } - - if (blk_in_line < lm->min_blk_line) { - spin_unlock(&line->lock); - return -EAGAIN; - } - - if (line->state != PBLK_LINESTATE_FREE) { - WARN(1, "pblk: corrupted line %d, state %d\n", - line->id, line->state); - spin_unlock(&line->lock); - return -EINTR; - } - - line->state = PBLK_LINESTATE_OPEN; - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - - atomic_set(&line->left_eblks, blk_to_erase); - atomic_set(&line->left_seblks, blk_to_erase); - - line->meta_distance = lm->meta_distance; - spin_unlock(&line->lock); - - kref_init(&line->ref); - atomic_set(&line->sec_to_update, 0); - - return 0; -} - -/* Line allocations in the recovery path are always single threaded */ -int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - int ret; - - spin_lock(&l_mg->free_lock); - l_mg->data_line = line; - list_del(&line->list); - - ret = pblk_line_prepare(pblk, line); - if (ret) { - list_add(&line->list, &l_mg->free_list); - spin_unlock(&l_mg->free_lock); - return ret; - } - spin_unlock(&l_mg->free_lock); - - ret = pblk_line_alloc_bitmaps(pblk, line); - if (ret) - goto fail; - - if (!pblk_line_init_bb(pblk, line, 0)) { - ret = -EINTR; - goto fail; - } - - pblk_rl_free_lines_dec(&pblk->rl, line, true); - return 0; - -fail: - spin_lock(&l_mg->free_lock); - list_add(&line->list, &l_mg->free_list); - spin_unlock(&l_mg->free_lock); - - return ret; -} - -void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - - mempool_free(line->map_bitmap, l_mg->bitmap_pool); - line->map_bitmap = NULL; - line->smeta = NULL; - line->emeta = NULL; -} - -static void pblk_line_reinit(struct pblk_line *line) -{ - *line->vsc = cpu_to_le32(EMPTY_ENTRY); - - line->map_bitmap = NULL; - line->invalid_bitmap = NULL; - line->smeta = NULL; - line->emeta = NULL; -} - -void pblk_line_free(struct pblk_line *line) -{ - struct pblk *pblk = line->pblk; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - - mempool_free(line->map_bitmap, l_mg->bitmap_pool); - mempool_free(line->invalid_bitmap, l_mg->bitmap_pool); - - pblk_line_reinit(line); -} - -struct pblk_line *pblk_line_get(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line *line; - int ret, bit; - - lockdep_assert_held(&l_mg->free_lock); - -retry: - if (list_empty(&l_mg->free_list)) { - pblk_err(pblk, "no free lines\n"); - return NULL; - } - - line = list_first_entry(&l_mg->free_list, struct pblk_line, list); - list_del(&line->list); - l_mg->nr_free_lines--; - - bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); - if (unlikely(bit >= lm->blk_per_line)) { - spin_lock(&line->lock); - line->state = PBLK_LINESTATE_BAD; - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - spin_unlock(&line->lock); - - list_add_tail(&line->list, &l_mg->bad_list); - - pblk_debug(pblk, "line %d is bad\n", line->id); - goto retry; - } - - ret = pblk_line_prepare(pblk, line); - if (ret) { - switch (ret) { - case -EAGAIN: - list_add(&line->list, &l_mg->bad_list); - goto retry; - case -EINTR: - list_add(&line->list, &l_mg->corrupt_list); - goto retry; - default: - pblk_err(pblk, "failed to prepare line %d\n", line->id); - list_add(&line->list, &l_mg->free_list); - l_mg->nr_free_lines++; - return NULL; - } - } - - return line; -} - -static struct pblk_line *pblk_line_retry(struct pblk *pblk, - struct pblk_line *line) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line *retry_line; - -retry: - spin_lock(&l_mg->free_lock); - retry_line = pblk_line_get(pblk); - if (!retry_line) { - l_mg->data_line = NULL; - spin_unlock(&l_mg->free_lock); - return NULL; - } - - retry_line->map_bitmap = line->map_bitmap; - retry_line->invalid_bitmap = line->invalid_bitmap; - retry_line->smeta = line->smeta; - retry_line->emeta = line->emeta; - retry_line->meta_line = line->meta_line; - - pblk_line_reinit(line); - - l_mg->data_line = retry_line; - spin_unlock(&l_mg->free_lock); - - pblk_rl_free_lines_dec(&pblk->rl, line, false); - - if (pblk_line_erase(pblk, retry_line)) - goto retry; - - return retry_line; -} - -static void pblk_set_space_limit(struct pblk *pblk) -{ - struct pblk_rl *rl = &pblk->rl; - - atomic_set(&rl->rb_space, 0); -} - -struct pblk_line *pblk_line_get_first_data(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line *line; - - spin_lock(&l_mg->free_lock); - line = pblk_line_get(pblk); - if (!line) { - spin_unlock(&l_mg->free_lock); - return NULL; - } - - line->seq_nr = l_mg->d_seq_nr++; - line->type = PBLK_LINETYPE_DATA; - l_mg->data_line = line; - - pblk_line_setup_metadata(line, l_mg, &pblk->lm); - - /* Allocate next line for preparation */ - l_mg->data_next = pblk_line_get(pblk); - if (!l_mg->data_next) { - /* If we cannot get a new line, we need to stop the pipeline. - * Only allow as many writes in as we can store safely and then - * fail gracefully - */ - pblk_set_space_limit(pblk); - - l_mg->data_next = NULL; - } else { - l_mg->data_next->seq_nr = l_mg->d_seq_nr++; - l_mg->data_next->type = PBLK_LINETYPE_DATA; - } - spin_unlock(&l_mg->free_lock); - - if (pblk_line_alloc_bitmaps(pblk, line)) - return NULL; - - if (pblk_line_erase(pblk, line)) { - line = pblk_line_retry(pblk, line); - if (!line) - return NULL; - } - -retry_setup: - if (!pblk_line_init_metadata(pblk, line, NULL)) { - line = pblk_line_retry(pblk, line); - if (!line) - return NULL; - - goto retry_setup; - } - - if (!pblk_line_init_bb(pblk, line, 1)) { - line = pblk_line_retry(pblk, line); - if (!line) - return NULL; - - goto retry_setup; - } - - pblk_rl_free_lines_dec(&pblk->rl, line, true); - - return line; -} - -void pblk_ppa_to_line_put(struct pblk *pblk, struct ppa_addr ppa) -{ - struct pblk_line *line; - - line = pblk_ppa_to_line(pblk, ppa); - kref_put(&line->ref, pblk_line_put_wq); -} - -void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd) -{ - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - int i; - - for (i = 0; i < rqd->nr_ppas; i++) - pblk_ppa_to_line_put(pblk, ppa_list[i]); -} - -static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line) -{ - lockdep_assert_held(&pblk->l_mg.free_lock); - - pblk_set_space_limit(pblk); - pblk->state = PBLK_STATE_STOPPING; - trace_pblk_state(pblk_disk_name(pblk), pblk->state); -} - -static void pblk_line_close_meta_sync(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line *line, *tline; - LIST_HEAD(list); - - spin_lock(&l_mg->close_lock); - if (list_empty(&l_mg->emeta_list)) { - spin_unlock(&l_mg->close_lock); - return; - } - - list_cut_position(&list, &l_mg->emeta_list, l_mg->emeta_list.prev); - spin_unlock(&l_mg->close_lock); - - list_for_each_entry_safe(line, tline, &list, list) { - struct pblk_emeta *emeta = line->emeta; - - while (emeta->mem < lm->emeta_len[0]) { - int ret; - - ret = pblk_submit_meta_io(pblk, line); - if (ret) { - pblk_err(pblk, "sync meta line %d failed (%d)\n", - line->id, ret); - return; - } - } - } - - pblk_wait_for_meta(pblk); - flush_workqueue(pblk->close_wq); -} - -void __pblk_pipeline_flush(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - int ret; - - spin_lock(&l_mg->free_lock); - if (pblk->state == PBLK_STATE_RECOVERING || - pblk->state == PBLK_STATE_STOPPED) { - spin_unlock(&l_mg->free_lock); - return; - } - pblk->state = PBLK_STATE_RECOVERING; - trace_pblk_state(pblk_disk_name(pblk), pblk->state); - spin_unlock(&l_mg->free_lock); - - pblk_flush_writer(pblk); - pblk_wait_for_meta(pblk); - - ret = pblk_recov_pad(pblk); - if (ret) { - pblk_err(pblk, "could not close data on teardown(%d)\n", ret); - return; - } - - flush_workqueue(pblk->bb_wq); - pblk_line_close_meta_sync(pblk); -} - -void __pblk_pipeline_stop(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - - spin_lock(&l_mg->free_lock); - pblk->state = PBLK_STATE_STOPPED; - trace_pblk_state(pblk_disk_name(pblk), pblk->state); - l_mg->data_line = NULL; - l_mg->data_next = NULL; - spin_unlock(&l_mg->free_lock); -} - -void pblk_pipeline_stop(struct pblk *pblk) -{ - __pblk_pipeline_flush(pblk); - __pblk_pipeline_stop(pblk); -} - -struct pblk_line *pblk_line_replace_data(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line *cur, *new = NULL; - unsigned int left_seblks; - - new = l_mg->data_next; - if (!new) - goto out; - - spin_lock(&l_mg->free_lock); - cur = l_mg->data_line; - l_mg->data_line = new; - - pblk_line_setup_metadata(new, l_mg, &pblk->lm); - spin_unlock(&l_mg->free_lock); - -retry_erase: - left_seblks = atomic_read(&new->left_seblks); - if (left_seblks) { - /* If line is not fully erased, erase it */ - if (atomic_read(&new->left_eblks)) { - if (pblk_line_erase(pblk, new)) - goto out; - } else { - io_schedule(); - } - goto retry_erase; - } - - if (pblk_line_alloc_bitmaps(pblk, new)) - return NULL; - -retry_setup: - if (!pblk_line_init_metadata(pblk, new, cur)) { - new = pblk_line_retry(pblk, new); - if (!new) - goto out; - - goto retry_setup; - } - - if (!pblk_line_init_bb(pblk, new, 1)) { - new = pblk_line_retry(pblk, new); - if (!new) - goto out; - - goto retry_setup; - } - - pblk_rl_free_lines_dec(&pblk->rl, new, true); - - /* Allocate next line for preparation */ - spin_lock(&l_mg->free_lock); - l_mg->data_next = pblk_line_get(pblk); - if (!l_mg->data_next) { - /* If we cannot get a new line, we need to stop the pipeline. - * Only allow as many writes in as we can store safely and then - * fail gracefully - */ - pblk_stop_writes(pblk, new); - l_mg->data_next = NULL; - } else { - l_mg->data_next->seq_nr = l_mg->d_seq_nr++; - l_mg->data_next->type = PBLK_LINETYPE_DATA; - } - spin_unlock(&l_mg->free_lock); - -out: - return new; -} - -static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_gc *gc = &pblk->gc; - - spin_lock(&line->lock); - WARN_ON(line->state != PBLK_LINESTATE_GC); - if (line->w_err_gc->has_gc_err) { - spin_unlock(&line->lock); - pblk_err(pblk, "line %d had errors during GC\n", line->id); - pblk_put_line_back(pblk, line); - line->w_err_gc->has_gc_err = 0; - return; - } - - line->state = PBLK_LINESTATE_FREE; - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - line->gc_group = PBLK_LINEGC_NONE; - pblk_line_free(line); - - if (line->w_err_gc->has_write_err) { - pblk_rl_werr_line_out(&pblk->rl); - line->w_err_gc->has_write_err = 0; - } - - spin_unlock(&line->lock); - atomic_dec(&gc->pipeline_gc); - - spin_lock(&l_mg->free_lock); - list_add_tail(&line->list, &l_mg->free_list); - l_mg->nr_free_lines++; - spin_unlock(&l_mg->free_lock); - - pblk_rl_free_lines_inc(&pblk->rl, line); -} - -static void pblk_line_put_ws(struct work_struct *work) -{ - struct pblk_line_ws *line_put_ws = container_of(work, - struct pblk_line_ws, ws); - struct pblk *pblk = line_put_ws->pblk; - struct pblk_line *line = line_put_ws->line; - - __pblk_line_put(pblk, line); - mempool_free(line_put_ws, &pblk->gen_ws_pool); -} - -void pblk_line_put(struct kref *ref) -{ - struct pblk_line *line = container_of(ref, struct pblk_line, ref); - struct pblk *pblk = line->pblk; - - __pblk_line_put(pblk, line); -} - -void pblk_line_put_wq(struct kref *ref) -{ - struct pblk_line *line = container_of(ref, struct pblk_line, ref); - struct pblk *pblk = line->pblk; - struct pblk_line_ws *line_put_ws; - - line_put_ws = mempool_alloc(&pblk->gen_ws_pool, GFP_ATOMIC); - if (!line_put_ws) - return; - - line_put_ws->pblk = pblk; - line_put_ws->line = line; - line_put_ws->priv = NULL; - - INIT_WORK(&line_put_ws->ws, pblk_line_put_ws); - queue_work(pblk->r_end_wq, &line_put_ws->ws); -} - -int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa) -{ - struct nvm_rq *rqd; - int err; - - rqd = pblk_alloc_rqd(pblk, PBLK_ERASE); - - pblk_setup_e_rq(pblk, rqd, ppa); - - rqd->end_io = pblk_end_io_erase; - rqd->private = pblk; - - trace_pblk_chunk_reset(pblk_disk_name(pblk), - &ppa, PBLK_CHUNK_RESET_START); - - /* The write thread schedules erases so that it minimizes disturbances - * with writes. Thus, there is no need to take the LUN semaphore. - */ - err = pblk_submit_io(pblk, rqd, NULL); - if (err) { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - - pblk_err(pblk, "could not async erase line:%d,blk:%d\n", - pblk_ppa_to_line_id(ppa), - pblk_ppa_to_pos(geo, ppa)); - } - - return err; -} - -struct pblk_line *pblk_line_get_data(struct pblk *pblk) -{ - return pblk->l_mg.data_line; -} - -/* For now, always erase next line */ -struct pblk_line *pblk_line_get_erase(struct pblk *pblk) -{ - return pblk->l_mg.data_next; -} - -int pblk_line_is_full(struct pblk_line *line) -{ - return (line->left_msecs == 0); -} - -static void pblk_line_should_sync_meta(struct pblk *pblk) -{ - if (pblk_rl_is_limit(&pblk->rl)) - pblk_line_close_meta_sync(pblk); -} - -void pblk_line_close(struct pblk *pblk, struct pblk_line *line) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct list_head *move_list; - int i; - -#ifdef CONFIG_NVM_PBLK_DEBUG - WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line), - "pblk: corrupt closed line %d\n", line->id); -#endif - - spin_lock(&l_mg->free_lock); - WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap)); - spin_unlock(&l_mg->free_lock); - - spin_lock(&l_mg->gc_lock); - spin_lock(&line->lock); - WARN_ON(line->state != PBLK_LINESTATE_OPEN); - line->state = PBLK_LINESTATE_CLOSED; - move_list = pblk_line_gc_list(pblk, line); - list_add_tail(&line->list, move_list); - - mempool_free(line->map_bitmap, l_mg->bitmap_pool); - line->map_bitmap = NULL; - line->smeta = NULL; - line->emeta = NULL; - - for (i = 0; i < lm->blk_per_line; i++) { - struct pblk_lun *rlun = &pblk->luns[i]; - int pos = pblk_ppa_to_pos(geo, rlun->bppa); - int state = line->chks[pos].state; - - if (!(state & NVM_CHK_ST_OFFLINE)) - state = NVM_CHK_ST_CLOSED; - } - - spin_unlock(&line->lock); - spin_unlock(&l_mg->gc_lock); - - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); -} - -void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_emeta *emeta = line->emeta; - struct line_emeta *emeta_buf = emeta->buf; - struct wa_counters *wa = emeta_to_wa(lm, emeta_buf); - - /* No need for exact vsc value; avoid a big line lock and take aprox. */ - memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len); - memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len); - - wa->user = cpu_to_le64(atomic64_read(&pblk->user_wa)); - wa->pad = cpu_to_le64(atomic64_read(&pblk->pad_wa)); - wa->gc = cpu_to_le64(atomic64_read(&pblk->gc_wa)); - - if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC) { - emeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC); - export_guid(emeta_buf->header.uuid, &pblk->instance_uuid); - emeta_buf->header.id = cpu_to_le32(line->id); - emeta_buf->header.type = cpu_to_le16(line->type); - emeta_buf->header.version_major = EMETA_VERSION_MAJOR; - emeta_buf->header.version_minor = EMETA_VERSION_MINOR; - emeta_buf->header.crc = cpu_to_le32( - pblk_calc_meta_header_crc(pblk, &emeta_buf->header)); - } - - emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas); - emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf)); - - spin_lock(&l_mg->close_lock); - spin_lock(&line->lock); - - /* Update the in-memory start address for emeta, in case it has - * shifted due to write errors - */ - if (line->emeta_ssec != line->cur_sec) - line->emeta_ssec = line->cur_sec; - - list_add_tail(&line->list, &l_mg->emeta_list); - spin_unlock(&line->lock); - spin_unlock(&l_mg->close_lock); - - pblk_line_should_sync_meta(pblk); -} - -static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_meta *lm = &pblk->lm; - unsigned int lba_list_size = lm->emeta_len[2]; - struct pblk_w_err_gc *w_err_gc = line->w_err_gc; - struct pblk_emeta *emeta = line->emeta; - - w_err_gc->lba_list = kvmalloc(lba_list_size, GFP_KERNEL); - memcpy(w_err_gc->lba_list, emeta_to_lbas(pblk, emeta->buf), - lba_list_size); -} - -void pblk_line_close_ws(struct work_struct *work) -{ - struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws, - ws); - struct pblk *pblk = line_ws->pblk; - struct pblk_line *line = line_ws->line; - struct pblk_w_err_gc *w_err_gc = line->w_err_gc; - - /* Write errors makes the emeta start address stored in smeta invalid, - * so keep a copy of the lba list until we've gc'd the line - */ - if (w_err_gc->has_write_err) - pblk_save_lba_list(pblk, line); - - pblk_line_close(pblk, line); - mempool_free(line_ws, &pblk->gen_ws_pool); -} - -void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, - void (*work)(struct work_struct *), gfp_t gfp_mask, - struct workqueue_struct *wq) -{ - struct pblk_line_ws *line_ws; - - line_ws = mempool_alloc(&pblk->gen_ws_pool, gfp_mask); - if (!line_ws) { - pblk_err(pblk, "pblk: could not allocate memory\n"); - return; - } - - line_ws->pblk = pblk; - line_ws->line = line; - line_ws->priv = priv; - - INIT_WORK(&line_ws->ws, work); - queue_work(wq, &line_ws->ws); -} - -static void __pblk_down_chunk(struct pblk *pblk, int pos) -{ - struct pblk_lun *rlun = &pblk->luns[pos]; - int ret; - - /* - * Only send one inflight I/O per LUN. Since we map at a page - * granurality, all ppas in the I/O will map to the same LUN - */ - - ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(30000)); - if (ret == -ETIME || ret == -EINTR) - pblk_err(pblk, "taking lun semaphore timed out: err %d\n", - -ret); -} - -void pblk_down_chunk(struct pblk *pblk, struct ppa_addr ppa) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - int pos = pblk_ppa_to_pos(geo, ppa); - - __pblk_down_chunk(pblk, pos); -} - -void pblk_down_rq(struct pblk *pblk, struct ppa_addr ppa, - unsigned long *lun_bitmap) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - int pos = pblk_ppa_to_pos(geo, ppa); - - /* If the LUN has been locked for this same request, do no attempt to - * lock it again - */ - if (test_and_set_bit(pos, lun_bitmap)) - return; - - __pblk_down_chunk(pblk, pos); -} - -void pblk_up_chunk(struct pblk *pblk, struct ppa_addr ppa) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_lun *rlun; - int pos = pblk_ppa_to_pos(geo, ppa); - - rlun = &pblk->luns[pos]; - up(&rlun->wr_sem); -} - -void pblk_up_rq(struct pblk *pblk, unsigned long *lun_bitmap) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_lun *rlun; - int num_lun = geo->all_luns; - int bit = -1; - - while ((bit = find_next_bit(lun_bitmap, num_lun, bit + 1)) < num_lun) { - rlun = &pblk->luns[bit]; - up(&rlun->wr_sem); - } -} - -void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa) -{ - struct ppa_addr ppa_l2p; - - /* logic error: lba out-of-bounds. Ignore update */ - if (!(lba < pblk->capacity)) { - WARN(1, "pblk: corrupted L2P map request\n"); - return; - } - - spin_lock(&pblk->trans_lock); - ppa_l2p = pblk_trans_map_get(pblk, lba); - - if (!pblk_addr_in_cache(ppa_l2p) && !pblk_ppa_empty(ppa_l2p)) - pblk_map_invalidate(pblk, ppa_l2p); - - pblk_trans_map_set(pblk, lba, ppa); - spin_unlock(&pblk->trans_lock); -} - -void pblk_update_map_cache(struct pblk *pblk, sector_t lba, struct ppa_addr ppa) -{ - -#ifdef CONFIG_NVM_PBLK_DEBUG - /* Callers must ensure that the ppa points to a cache address */ - BUG_ON(!pblk_addr_in_cache(ppa)); - BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa))); -#endif - - pblk_update_map(pblk, lba, ppa); -} - -int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa_new, - struct pblk_line *gc_line, u64 paddr_gc) -{ - struct ppa_addr ppa_l2p, ppa_gc; - int ret = 1; - -#ifdef CONFIG_NVM_PBLK_DEBUG - /* Callers must ensure that the ppa points to a cache address */ - BUG_ON(!pblk_addr_in_cache(ppa_new)); - BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa_new))); -#endif - - /* logic error: lba out-of-bounds. Ignore update */ - if (!(lba < pblk->capacity)) { - WARN(1, "pblk: corrupted L2P map request\n"); - return 0; - } - - spin_lock(&pblk->trans_lock); - ppa_l2p = pblk_trans_map_get(pblk, lba); - ppa_gc = addr_to_gen_ppa(pblk, paddr_gc, gc_line->id); - - if (!pblk_ppa_comp(ppa_l2p, ppa_gc)) { - spin_lock(&gc_line->lock); - WARN(!test_bit(paddr_gc, gc_line->invalid_bitmap), - "pblk: corrupted GC update"); - spin_unlock(&gc_line->lock); - - ret = 0; - goto out; - } - - pblk_trans_map_set(pblk, lba, ppa_new); -out: - spin_unlock(&pblk->trans_lock); - return ret; -} - -void pblk_update_map_dev(struct pblk *pblk, sector_t lba, - struct ppa_addr ppa_mapped, struct ppa_addr ppa_cache) -{ - struct ppa_addr ppa_l2p; - -#ifdef CONFIG_NVM_PBLK_DEBUG - /* Callers must ensure that the ppa points to a device address */ - BUG_ON(pblk_addr_in_cache(ppa_mapped)); -#endif - /* Invalidate and discard padded entries */ - if (lba == ADDR_EMPTY) { - atomic64_inc(&pblk->pad_wa); -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_inc(&pblk->padded_wb); -#endif - if (!pblk_ppa_empty(ppa_mapped)) - pblk_map_invalidate(pblk, ppa_mapped); - return; - } - - /* logic error: lba out-of-bounds. Ignore update */ - if (!(lba < pblk->capacity)) { - WARN(1, "pblk: corrupted L2P map request\n"); - return; - } - - spin_lock(&pblk->trans_lock); - ppa_l2p = pblk_trans_map_get(pblk, lba); - - /* Do not update L2P if the cacheline has been updated. In this case, - * the mapped ppa must be invalidated - */ - if (!pblk_ppa_comp(ppa_l2p, ppa_cache)) { - if (!pblk_ppa_empty(ppa_mapped)) - pblk_map_invalidate(pblk, ppa_mapped); - goto out; - } - -#ifdef CONFIG_NVM_PBLK_DEBUG - WARN_ON(!pblk_addr_in_cache(ppa_l2p) && !pblk_ppa_empty(ppa_l2p)); -#endif - - pblk_trans_map_set(pblk, lba, ppa_mapped); -out: - spin_unlock(&pblk->trans_lock); -} - -int pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, - sector_t blba, int nr_secs, bool *from_cache) -{ - int i; - - spin_lock(&pblk->trans_lock); - for (i = 0; i < nr_secs; i++) { - struct ppa_addr ppa; - - ppa = ppas[i] = pblk_trans_map_get(pblk, blba + i); - - /* If the L2P entry maps to a line, the reference is valid */ - if (!pblk_ppa_empty(ppa) && !pblk_addr_in_cache(ppa)) { - struct pblk_line *line = pblk_ppa_to_line(pblk, ppa); - - if (i > 0 && *from_cache) - break; - *from_cache = false; - - kref_get(&line->ref); - } else { - if (i > 0 && !*from_cache) - break; - *from_cache = true; - } - } - spin_unlock(&pblk->trans_lock); - return i; -} - -void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, - u64 *lba_list, int nr_secs) -{ - u64 lba; - int i; - - spin_lock(&pblk->trans_lock); - for (i = 0; i < nr_secs; i++) { - lba = lba_list[i]; - if (lba != ADDR_EMPTY) { - /* logic error: lba out-of-bounds. Ignore update */ - if (!(lba < pblk->capacity)) { - WARN(1, "pblk: corrupted L2P map request\n"); - continue; - } - ppas[i] = pblk_trans_map_get(pblk, lba); - } - } - spin_unlock(&pblk->trans_lock); -} - -void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd) -{ - void *buffer; - - if (pblk_is_oob_meta_supported(pblk)) { - /* Just use OOB metadata buffer as always */ - buffer = rqd->meta_list; - } else { - /* We need to reuse last page of request (packed metadata) - * in similar way as traditional oob metadata - */ - buffer = page_to_virt( - rqd->bio->bi_io_vec[rqd->bio->bi_vcnt - 1].bv_page); - } - - return buffer; -} - -void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd) -{ - void *meta_list = rqd->meta_list; - void *page; - int i = 0; - - if (pblk_is_oob_meta_supported(pblk)) - return; - - page = page_to_virt(rqd->bio->bi_io_vec[rqd->bio->bi_vcnt - 1].bv_page); - /* We need to fill oob meta buffer with data from packed metadata */ - for (; i < rqd->nr_ppas; i++) - memcpy(pblk_get_meta(pblk, meta_list, i), - page + (i * sizeof(struct pblk_sec_meta)), - sizeof(struct pblk_sec_meta)); -} diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c deleted file mode 100644 index b31658be35a7..000000000000 --- a/drivers/lightnvm/pblk-gc.c +++ /dev/null @@ -1,726 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2016 CNEX Labs - * Initial release: Javier Gonzalez <javier@cnexlabs.com> - * Matias Bjorling <matias@cnexlabs.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * pblk-gc.c - pblk's garbage collector - */ - -#include "pblk.h" -#include "pblk-trace.h" -#include <linux/delay.h> - - -static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq) -{ - vfree(gc_rq->data); - kfree(gc_rq); -} - -static int pblk_gc_write(struct pblk *pblk) -{ - struct pblk_gc *gc = &pblk->gc; - struct pblk_gc_rq *gc_rq, *tgc_rq; - LIST_HEAD(w_list); - - spin_lock(&gc->w_lock); - if (list_empty(&gc->w_list)) { - spin_unlock(&gc->w_lock); - return 1; - } - - list_cut_position(&w_list, &gc->w_list, gc->w_list.prev); - gc->w_entries = 0; - spin_unlock(&gc->w_lock); - - list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) { - pblk_write_gc_to_cache(pblk, gc_rq); - list_del(&gc_rq->list); - kref_put(&gc_rq->line->ref, pblk_line_put); - pblk_gc_free_gc_rq(gc_rq); - } - - return 0; -} - -static void pblk_gc_writer_kick(struct pblk_gc *gc) -{ - wake_up_process(gc->gc_writer_ts); -} - -void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct list_head *move_list; - - spin_lock(&l_mg->gc_lock); - spin_lock(&line->lock); - WARN_ON(line->state != PBLK_LINESTATE_GC); - line->state = PBLK_LINESTATE_CLOSED; - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - - /* We need to reset gc_group in order to ensure that - * pblk_line_gc_list will return proper move_list - * since right now current line is not on any of the - * gc lists. - */ - line->gc_group = PBLK_LINEGC_NONE; - move_list = pblk_line_gc_list(pblk, line); - spin_unlock(&line->lock); - list_add_tail(&line->list, move_list); - spin_unlock(&l_mg->gc_lock); -} - -static void pblk_gc_line_ws(struct work_struct *work) -{ - struct pblk_line_ws *gc_rq_ws = container_of(work, - struct pblk_line_ws, ws); - struct pblk *pblk = gc_rq_ws->pblk; - struct pblk_gc *gc = &pblk->gc; - struct pblk_line *line = gc_rq_ws->line; - struct pblk_gc_rq *gc_rq = gc_rq_ws->priv; - int ret; - - up(&gc->gc_sem); - - /* Read from GC victim block */ - ret = pblk_submit_read_gc(pblk, gc_rq); - if (ret) { - line->w_err_gc->has_gc_err = 1; - goto out; - } - - if (!gc_rq->secs_to_gc) - goto out; - -retry: - spin_lock(&gc->w_lock); - if (gc->w_entries >= PBLK_GC_RQ_QD) { - spin_unlock(&gc->w_lock); - pblk_gc_writer_kick(&pblk->gc); - usleep_range(128, 256); - goto retry; - } - gc->w_entries++; - list_add_tail(&gc_rq->list, &gc->w_list); - spin_unlock(&gc->w_lock); - - pblk_gc_writer_kick(&pblk->gc); - - kfree(gc_rq_ws); - return; - -out: - pblk_gc_free_gc_rq(gc_rq); - kref_put(&line->ref, pblk_line_put); - kfree(gc_rq_ws); -} - -static __le64 *get_lba_list_from_emeta(struct pblk *pblk, - struct pblk_line *line) -{ - struct line_emeta *emeta_buf; - struct pblk_line_meta *lm = &pblk->lm; - unsigned int lba_list_size = lm->emeta_len[2]; - __le64 *lba_list; - int ret; - - emeta_buf = kvmalloc(lm->emeta_len[0], GFP_KERNEL); - if (!emeta_buf) - return NULL; - - ret = pblk_line_emeta_read(pblk, line, emeta_buf); - if (ret) { - pblk_err(pblk, "line %d read emeta failed (%d)\n", - line->id, ret); - kvfree(emeta_buf); - return NULL; - } - - /* If this read fails, it means that emeta is corrupted. - * For now, leave the line untouched. - * TODO: Implement a recovery routine that scans and moves - * all sectors on the line. - */ - - ret = pblk_recov_check_emeta(pblk, emeta_buf); - if (ret) { - pblk_err(pblk, "inconsistent emeta (line %d)\n", - line->id); - kvfree(emeta_buf); - return NULL; - } - - lba_list = kvmalloc(lba_list_size, GFP_KERNEL); - - if (lba_list) - memcpy(lba_list, emeta_to_lbas(pblk, emeta_buf), lba_list_size); - - kvfree(emeta_buf); - - return lba_list; -} - -static void pblk_gc_line_prepare_ws(struct work_struct *work) -{ - struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws, - ws); - struct pblk *pblk = line_ws->pblk; - struct pblk_line *line = line_ws->line; - struct pblk_line_meta *lm = &pblk->lm; - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_gc *gc = &pblk->gc; - struct pblk_line_ws *gc_rq_ws; - struct pblk_gc_rq *gc_rq; - __le64 *lba_list; - unsigned long *invalid_bitmap; - int sec_left, nr_secs, bit; - - invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL); - if (!invalid_bitmap) - goto fail_free_ws; - - if (line->w_err_gc->has_write_err) { - lba_list = line->w_err_gc->lba_list; - line->w_err_gc->lba_list = NULL; - } else { - lba_list = get_lba_list_from_emeta(pblk, line); - if (!lba_list) { - pblk_err(pblk, "could not interpret emeta (line %d)\n", - line->id); - goto fail_free_invalid_bitmap; - } - } - - spin_lock(&line->lock); - bitmap_copy(invalid_bitmap, line->invalid_bitmap, lm->sec_per_line); - sec_left = pblk_line_vsc(line); - spin_unlock(&line->lock); - - if (sec_left < 0) { - pblk_err(pblk, "corrupted GC line (%d)\n", line->id); - goto fail_free_lba_list; - } - - bit = -1; -next_rq: - gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL); - if (!gc_rq) - goto fail_free_lba_list; - - nr_secs = 0; - do { - bit = find_next_zero_bit(invalid_bitmap, lm->sec_per_line, - bit + 1); - if (bit > line->emeta_ssec) - break; - - gc_rq->paddr_list[nr_secs] = bit; - gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]); - } while (nr_secs < pblk->max_write_pgs); - - if (unlikely(!nr_secs)) { - kfree(gc_rq); - goto out; - } - - gc_rq->nr_secs = nr_secs; - gc_rq->line = line; - - gc_rq->data = vmalloc(array_size(gc_rq->nr_secs, geo->csecs)); - if (!gc_rq->data) - goto fail_free_gc_rq; - - gc_rq_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL); - if (!gc_rq_ws) - goto fail_free_gc_data; - - gc_rq_ws->pblk = pblk; - gc_rq_ws->line = line; - gc_rq_ws->priv = gc_rq; - - /* The write GC path can be much slower than the read GC one due to - * the budget imposed by the rate-limiter. Balance in case that we get - * back pressure from the write GC path. - */ - while (down_timeout(&gc->gc_sem, msecs_to_jiffies(30000))) - io_schedule(); - - kref_get(&line->ref); - - INIT_WORK(&gc_rq_ws->ws, pblk_gc_line_ws); - queue_work(gc->gc_line_reader_wq, &gc_rq_ws->ws); - - sec_left -= nr_secs; - if (sec_left > 0) - goto next_rq; - -out: - kvfree(lba_list); - kfree(line_ws); - kfree(invalid_bitmap); - - kref_put(&line->ref, pblk_line_put); - atomic_dec(&gc->read_inflight_gc); - - return; - -fail_free_gc_data: - vfree(gc_rq->data); -fail_free_gc_rq: - kfree(gc_rq); -fail_free_lba_list: - kvfree(lba_list); -fail_free_invalid_bitmap: - kfree(invalid_bitmap); -fail_free_ws: - kfree(line_ws); - - /* Line goes back to closed state, so we cannot release additional - * reference for line, since we do that only when we want to do - * gc to free line state transition. - */ - pblk_put_line_back(pblk, line); - atomic_dec(&gc->read_inflight_gc); - - pblk_err(pblk, "failed to GC line %d\n", line->id); -} - -static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_gc *gc = &pblk->gc; - struct pblk_line_ws *line_ws; - - pblk_debug(pblk, "line '%d' being reclaimed for GC\n", line->id); - - line_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL); - if (!line_ws) - return -ENOMEM; - - line_ws->pblk = pblk; - line_ws->line = line; - - atomic_inc(&gc->pipeline_gc); - INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws); - queue_work(gc->gc_reader_wq, &line_ws->ws); - - return 0; -} - -static void pblk_gc_reader_kick(struct pblk_gc *gc) -{ - wake_up_process(gc->gc_reader_ts); -} - -static void pblk_gc_kick(struct pblk *pblk) -{ - struct pblk_gc *gc = &pblk->gc; - - pblk_gc_writer_kick(gc); - pblk_gc_reader_kick(gc); - - /* If we're shutting down GC, let's not start it up again */ - if (gc->gc_enabled) { - wake_up_process(gc->gc_ts); - mod_timer(&gc->gc_timer, - jiffies + msecs_to_jiffies(GC_TIME_MSECS)); - } -} - -static int pblk_gc_read(struct pblk *pblk) -{ - struct pblk_gc *gc = &pblk->gc; - struct pblk_line *line; - - spin_lock(&gc->r_lock); - if (list_empty(&gc->r_list)) { - spin_unlock(&gc->r_lock); - return 1; - } - - line = list_first_entry(&gc->r_list, struct pblk_line, list); - list_del(&line->list); - spin_unlock(&gc->r_lock); - - pblk_gc_kick(pblk); - - if (pblk_gc_line(pblk, line)) { - pblk_err(pblk, "failed to GC line %d\n", line->id); - /* rollback */ - spin_lock(&gc->r_lock); - list_add_tail(&line->list, &gc->r_list); - spin_unlock(&gc->r_lock); - } - - return 0; -} - -static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk, - struct list_head *group_list) -{ - struct pblk_line *line, *victim; - unsigned int line_vsc = ~0x0L, victim_vsc = ~0x0L; - - victim = list_first_entry(group_list, struct pblk_line, list); - - list_for_each_entry(line, group_list, list) { - if (!atomic_read(&line->sec_to_update)) - line_vsc = le32_to_cpu(*line->vsc); - if (line_vsc < victim_vsc) { - victim = line; - victim_vsc = le32_to_cpu(*victim->vsc); - } - } - - if (victim_vsc == ~0x0) - return NULL; - - return victim; -} - -static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl) -{ - unsigned int nr_blocks_free, nr_blocks_need; - unsigned int werr_lines = atomic_read(&rl->werr_lines); - - nr_blocks_need = pblk_rl_high_thrs(rl); - nr_blocks_free = pblk_rl_nr_free_blks(rl); - - /* This is not critical, no need to take lock here */ - return ((werr_lines > 0) || - ((gc->gc_active) && (nr_blocks_need > nr_blocks_free))); -} - -void pblk_gc_free_full_lines(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_gc *gc = &pblk->gc; - struct pblk_line *line; - - do { - spin_lock(&l_mg->gc_lock); - if (list_empty(&l_mg->gc_full_list)) { - spin_unlock(&l_mg->gc_lock); - return; - } - - line = list_first_entry(&l_mg->gc_full_list, - struct pblk_line, list); - - spin_lock(&line->lock); - WARN_ON(line->state != PBLK_LINESTATE_CLOSED); - line->state = PBLK_LINESTATE_GC; - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - spin_unlock(&line->lock); - - list_del(&line->list); - spin_unlock(&l_mg->gc_lock); - - atomic_inc(&gc->pipeline_gc); - kref_put(&line->ref, pblk_line_put); - } while (1); -} - -/* - * Lines with no valid sectors will be returned to the free list immediately. If - * GC is activated - either because the free block count is under the determined - * threshold, or because it is being forced from user space - only lines with a - * high count of invalid sectors will be recycled. - */ -static void pblk_gc_run(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_gc *gc = &pblk->gc; - struct pblk_line *line; - struct list_head *group_list; - bool run_gc; - int read_inflight_gc, gc_group = 0, prev_group = 0; - - pblk_gc_free_full_lines(pblk); - - run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl); - if (!run_gc || (atomic_read(&gc->read_inflight_gc) >= PBLK_GC_L_QD)) - return; - -next_gc_group: - group_list = l_mg->gc_lists[gc_group++]; - - do { - spin_lock(&l_mg->gc_lock); - - line = pblk_gc_get_victim_line(pblk, group_list); - if (!line) { - spin_unlock(&l_mg->gc_lock); - break; - } - - spin_lock(&line->lock); - WARN_ON(line->state != PBLK_LINESTATE_CLOSED); - line->state = PBLK_LINESTATE_GC; - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - spin_unlock(&line->lock); - - list_del(&line->list); - spin_unlock(&l_mg->gc_lock); - - spin_lock(&gc->r_lock); - list_add_tail(&line->list, &gc->r_list); - spin_unlock(&gc->r_lock); - - read_inflight_gc = atomic_inc_return(&gc->read_inflight_gc); - pblk_gc_reader_kick(gc); - - prev_group = 1; - - /* No need to queue up more GC lines than we can handle */ - run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl); - if (!run_gc || read_inflight_gc >= PBLK_GC_L_QD) - break; - } while (1); - - if (!prev_group && pblk->rl.rb_state > gc_group && - gc_group < PBLK_GC_NR_LISTS) - goto next_gc_group; -} - -static void pblk_gc_timer(struct timer_list *t) -{ - struct pblk *pblk = from_timer(pblk, t, gc.gc_timer); - - pblk_gc_kick(pblk); -} - -static int pblk_gc_ts(void *data) -{ - struct pblk *pblk = data; - - while (!kthread_should_stop()) { - pblk_gc_run(pblk); - set_current_state(TASK_INTERRUPTIBLE); - io_schedule(); - } - - return 0; -} - -static int pblk_gc_writer_ts(void *data) -{ - struct pblk *pblk = data; - - while (!kthread_should_stop()) { - if (!pblk_gc_write(pblk)) - continue; - set_current_state(TASK_INTERRUPTIBLE); - io_schedule(); - } - - return 0; -} - -static int pblk_gc_reader_ts(void *data) -{ - struct pblk *pblk = data; - struct pblk_gc *gc = &pblk->gc; - - while (!kthread_should_stop()) { - if (!pblk_gc_read(pblk)) - continue; - set_current_state(TASK_INTERRUPTIBLE); - io_schedule(); - } - -#ifdef CONFIG_NVM_PBLK_DEBUG - pblk_info(pblk, "flushing gc pipeline, %d lines left\n", - atomic_read(&gc->pipeline_gc)); -#endif - - do { - if (!atomic_read(&gc->pipeline_gc)) - break; - - schedule(); - } while (1); - - return 0; -} - -static void pblk_gc_start(struct pblk *pblk) -{ - pblk->gc.gc_active = 1; - pblk_debug(pblk, "gc start\n"); -} - -void pblk_gc_should_start(struct pblk *pblk) -{ - struct pblk_gc *gc = &pblk->gc; - - if (gc->gc_enabled && !gc->gc_active) { - pblk_gc_start(pblk); - pblk_gc_kick(pblk); - } -} - -void pblk_gc_should_stop(struct pblk *pblk) -{ - struct pblk_gc *gc = &pblk->gc; - - if (gc->gc_active && !gc->gc_forced) - gc->gc_active = 0; -} - -void pblk_gc_should_kick(struct pblk *pblk) -{ - pblk_rl_update_rates(&pblk->rl); -} - -void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, - int *gc_active) -{ - struct pblk_gc *gc = &pblk->gc; - - spin_lock(&gc->lock); - *gc_enabled = gc->gc_enabled; - *gc_active = gc->gc_active; - spin_unlock(&gc->lock); -} - -int pblk_gc_sysfs_force(struct pblk *pblk, int force) -{ - struct pblk_gc *gc = &pblk->gc; - - if (force < 0 || force > 1) - return -EINVAL; - - spin_lock(&gc->lock); - gc->gc_forced = force; - - if (force) - gc->gc_enabled = 1; - else - gc->gc_enabled = 0; - spin_unlock(&gc->lock); - - pblk_gc_should_start(pblk); - - return 0; -} - -int pblk_gc_init(struct pblk *pblk) -{ - struct pblk_gc *gc = &pblk->gc; - int ret; - - gc->gc_ts = kthread_create(pblk_gc_ts, pblk, "pblk-gc-ts"); - if (IS_ERR(gc->gc_ts)) { - pblk_err(pblk, "could not allocate GC main kthread\n"); - return PTR_ERR(gc->gc_ts); - } - - gc->gc_writer_ts = kthread_create(pblk_gc_writer_ts, pblk, - "pblk-gc-writer-ts"); - if (IS_ERR(gc->gc_writer_ts)) { - pblk_err(pblk, "could not allocate GC writer kthread\n"); - ret = PTR_ERR(gc->gc_writer_ts); - goto fail_free_main_kthread; - } - - gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk, - "pblk-gc-reader-ts"); - if (IS_ERR(gc->gc_reader_ts)) { - pblk_err(pblk, "could not allocate GC reader kthread\n"); - ret = PTR_ERR(gc->gc_reader_ts); - goto fail_free_writer_kthread; - } - - timer_setup(&gc->gc_timer, pblk_gc_timer, 0); - mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS)); - - gc->gc_active = 0; - gc->gc_forced = 0; - gc->gc_enabled = 1; - gc->w_entries = 0; - atomic_set(&gc->read_inflight_gc, 0); - atomic_set(&gc->pipeline_gc, 0); - - /* Workqueue that reads valid sectors from a line and submit them to the - * GC writer to be recycled. - */ - gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq", - WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS); - if (!gc->gc_line_reader_wq) { - pblk_err(pblk, "could not allocate GC line reader workqueue\n"); - ret = -ENOMEM; - goto fail_free_reader_kthread; - } - - /* Workqueue that prepare lines for GC */ - gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq", - WQ_MEM_RECLAIM | WQ_UNBOUND, 1); - if (!gc->gc_reader_wq) { - pblk_err(pblk, "could not allocate GC reader workqueue\n"); - ret = -ENOMEM; - goto fail_free_reader_line_wq; - } - - spin_lock_init(&gc->lock); - spin_lock_init(&gc->w_lock); - spin_lock_init(&gc->r_lock); - - sema_init(&gc->gc_sem, PBLK_GC_RQ_QD); - - INIT_LIST_HEAD(&gc->w_list); - INIT_LIST_HEAD(&gc->r_list); - - return 0; - -fail_free_reader_line_wq: - destroy_workqueue(gc->gc_line_reader_wq); -fail_free_reader_kthread: - kthread_stop(gc->gc_reader_ts); -fail_free_writer_kthread: - kthread_stop(gc->gc_writer_ts); -fail_free_main_kthread: - kthread_stop(gc->gc_ts); - - return ret; -} - -void pblk_gc_exit(struct pblk *pblk, bool graceful) -{ - struct pblk_gc *gc = &pblk->gc; - - gc->gc_enabled = 0; - del_timer_sync(&gc->gc_timer); - gc->gc_active = 0; - - if (gc->gc_ts) - kthread_stop(gc->gc_ts); - - if (gc->gc_reader_ts) - kthread_stop(gc->gc_reader_ts); - - if (graceful) { - flush_workqueue(gc->gc_reader_wq); - flush_workqueue(gc->gc_line_reader_wq); - } - - destroy_workqueue(gc->gc_reader_wq); - destroy_workqueue(gc->gc_line_reader_wq); - - if (gc->gc_writer_ts) - kthread_stop(gc->gc_writer_ts); -} diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c deleted file mode 100644 index 5924f09c217b..000000000000 --- a/drivers/lightnvm/pblk-init.c +++ /dev/null @@ -1,1324 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2015 IT University of Copenhagen (rrpc.c) - * Copyright (C) 2016 CNEX Labs - * Initial release: Javier Gonzalez <javier@cnexlabs.com> - * Matias Bjorling <matias@cnexlabs.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Implementation of a physical block-device target for Open-channel SSDs. - * - * pblk-init.c - pblk's initialization. - */ - -#include "pblk.h" -#include "pblk-trace.h" - -static unsigned int write_buffer_size; - -module_param(write_buffer_size, uint, 0644); -MODULE_PARM_DESC(write_buffer_size, "number of entries in a write buffer"); - -struct pblk_global_caches { - struct kmem_cache *ws; - struct kmem_cache *rec; - struct kmem_cache *g_rq; - struct kmem_cache *w_rq; - - struct kref kref; - - struct mutex mutex; /* Ensures consistency between - * caches and kref - */ -}; - -static struct pblk_global_caches pblk_caches = { - .mutex = __MUTEX_INITIALIZER(pblk_caches.mutex), - .kref = KREF_INIT(0), -}; - -struct bio_set pblk_bio_set; - -static blk_qc_t pblk_submit_bio(struct bio *bio) -{ - struct pblk *pblk = bio->bi_bdev->bd_disk->queue->queuedata; - - if (bio_op(bio) == REQ_OP_DISCARD) { - pblk_discard(pblk, bio); - if (!(bio->bi_opf & REQ_PREFLUSH)) { - bio_endio(bio); - return BLK_QC_T_NONE; - } - } - - /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap - * constraint. Writes can be of arbitrary size. - */ - if (bio_data_dir(bio) == READ) { - blk_queue_split(&bio); - pblk_submit_read(pblk, bio); - } else { - /* Prevent deadlock in the case of a modest LUN configuration - * and large user I/Os. Unless stalled, the rate limiter - * leaves at least 256KB available for user I/O. - */ - if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl)) - blk_queue_split(&bio); - - pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER); - } - - return BLK_QC_T_NONE; -} - -static const struct block_device_operations pblk_bops = { - .owner = THIS_MODULE, - .submit_bio = pblk_submit_bio, -}; - - -static size_t pblk_trans_map_size(struct pblk *pblk) -{ - int entry_size = 8; - - if (pblk->addrf_len < 32) - entry_size = 4; - - return entry_size * pblk->capacity; -} - -#ifdef CONFIG_NVM_PBLK_DEBUG -static u32 pblk_l2p_crc(struct pblk *pblk) -{ - size_t map_size; - u32 crc = ~(u32)0; - - map_size = pblk_trans_map_size(pblk); - crc = crc32_le(crc, pblk->trans_map, map_size); - return crc; -} -#endif - -static void pblk_l2p_free(struct pblk *pblk) -{ - vfree(pblk->trans_map); -} - -static int pblk_l2p_recover(struct pblk *pblk, bool factory_init) -{ - struct pblk_line *line = NULL; - - if (factory_init) { - guid_gen(&pblk->instance_uuid); - } else { - line = pblk_recov_l2p(pblk); - if (IS_ERR(line)) { - pblk_err(pblk, "could not recover l2p table\n"); - return -EFAULT; - } - } - -#ifdef CONFIG_NVM_PBLK_DEBUG - pblk_info(pblk, "init: L2P CRC: %x\n", pblk_l2p_crc(pblk)); -#endif - - /* Free full lines directly as GC has not been started yet */ - pblk_gc_free_full_lines(pblk); - - if (!line) { - /* Configure next line for user data */ - line = pblk_line_get_first_data(pblk); - if (!line) - return -EFAULT; - } - - return 0; -} - -static int pblk_l2p_init(struct pblk *pblk, bool factory_init) -{ - sector_t i; - struct ppa_addr ppa; - size_t map_size; - int ret = 0; - - map_size = pblk_trans_map_size(pblk); - pblk->trans_map = __vmalloc(map_size, GFP_KERNEL | __GFP_NOWARN | - __GFP_RETRY_MAYFAIL | __GFP_HIGHMEM); - if (!pblk->trans_map) { - pblk_err(pblk, "failed to allocate L2P (need %zu of memory)\n", - map_size); - return -ENOMEM; - } - - pblk_ppa_set_empty(&ppa); - - for (i = 0; i < pblk->capacity; i++) - pblk_trans_map_set(pblk, i, ppa); - - ret = pblk_l2p_recover(pblk, factory_init); - if (ret) - vfree(pblk->trans_map); - - return ret; -} - -static void pblk_rwb_free(struct pblk *pblk) -{ - if (pblk_rb_tear_down_check(&pblk->rwb)) - pblk_err(pblk, "write buffer error on tear down\n"); - - pblk_rb_free(&pblk->rwb); -} - -static int pblk_rwb_init(struct pblk *pblk) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - unsigned long buffer_size; - int pgs_in_buffer, threshold; - - threshold = geo->mw_cunits * geo->all_luns; - pgs_in_buffer = (max(geo->mw_cunits, geo->ws_opt) + geo->ws_opt) - * geo->all_luns; - - if (write_buffer_size && (write_buffer_size > pgs_in_buffer)) - buffer_size = write_buffer_size; - else - buffer_size = pgs_in_buffer; - - return pblk_rb_init(&pblk->rwb, buffer_size, threshold, geo->csecs); -} - -static int pblk_set_addrf_12(struct pblk *pblk, struct nvm_geo *geo, - struct nvm_addrf_12 *dst) -{ - struct nvm_addrf_12 *src = (struct nvm_addrf_12 *)&geo->addrf; - int power_len; - - /* Re-calculate channel and lun format to adapt to configuration */ - power_len = get_count_order(geo->num_ch); - if (1 << power_len != geo->num_ch) { - pblk_err(pblk, "supports only power-of-two channel config.\n"); - return -EINVAL; - } - dst->ch_len = power_len; - - power_len = get_count_order(geo->num_lun); - if (1 << power_len != geo->num_lun) { - pblk_err(pblk, "supports only power-of-two LUN config.\n"); - return -EINVAL; - } - dst->lun_len = power_len; - - dst->blk_len = src->blk_len; - dst->pg_len = src->pg_len; - dst->pln_len = src->pln_len; - dst->sec_len = src->sec_len; - - dst->sec_offset = 0; - dst->pln_offset = dst->sec_len; - dst->ch_offset = dst->pln_offset + dst->pln_len; - dst->lun_offset = dst->ch_offset + dst->ch_len; - dst->pg_offset = dst->lun_offset + dst->lun_len; - dst->blk_offset = dst->pg_offset + dst->pg_len; - - dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset; - dst->pln_mask = ((1ULL << dst->pln_len) - 1) << dst->pln_offset; - dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset; - dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset; - dst->pg_mask = ((1ULL << dst->pg_len) - 1) << dst->pg_offset; - dst->blk_mask = ((1ULL << dst->blk_len) - 1) << dst->blk_offset; - - return dst->blk_offset + src->blk_len; -} - -static int pblk_set_addrf_20(struct nvm_geo *geo, struct nvm_addrf *adst, - struct pblk_addrf *udst) -{ - struct nvm_addrf *src = &geo->addrf; - - adst->ch_len = get_count_order(geo->num_ch); - adst->lun_len = get_count_order(geo->num_lun); - adst->chk_len = src->chk_len; - adst->sec_len = src->sec_len; - - adst->sec_offset = 0; - adst->ch_offset = adst->sec_len; - adst->lun_offset = adst->ch_offset + adst->ch_len; - adst->chk_offset = adst->lun_offset + adst->lun_len; - - adst->sec_mask = ((1ULL << adst->sec_len) - 1) << adst->sec_offset; - adst->chk_mask = ((1ULL << adst->chk_len) - 1) << adst->chk_offset; - adst->lun_mask = ((1ULL << adst->lun_len) - 1) << adst->lun_offset; - adst->ch_mask = ((1ULL << adst->ch_len) - 1) << adst->ch_offset; - - udst->sec_stripe = geo->ws_opt; - udst->ch_stripe = geo->num_ch; - udst->lun_stripe = geo->num_lun; - - udst->sec_lun_stripe = udst->sec_stripe * udst->ch_stripe; - udst->sec_ws_stripe = udst->sec_lun_stripe * udst->lun_stripe; - - return adst->chk_offset + adst->chk_len; -} - -static int pblk_set_addrf(struct pblk *pblk) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - int mod; - - switch (geo->version) { - case NVM_OCSSD_SPEC_12: - div_u64_rem(geo->clba, pblk->min_write_pgs, &mod); - if (mod) { - pblk_err(pblk, "bad configuration of sectors/pages\n"); - return -EINVAL; - } - - pblk->addrf_len = pblk_set_addrf_12(pblk, geo, - (void *)&pblk->addrf); - break; - case NVM_OCSSD_SPEC_20: - pblk->addrf_len = pblk_set_addrf_20(geo, (void *)&pblk->addrf, - &pblk->uaddrf); - break; - default: - pblk_err(pblk, "OCSSD revision not supported (%d)\n", - geo->version); - return -EINVAL; - } - - return 0; -} - -static int pblk_create_global_caches(void) -{ - - pblk_caches.ws = kmem_cache_create("pblk_blk_ws", - sizeof(struct pblk_line_ws), 0, 0, NULL); - if (!pblk_caches.ws) - return -ENOMEM; - - pblk_caches.rec = kmem_cache_create("pblk_rec", - sizeof(struct pblk_rec_ctx), 0, 0, NULL); - if (!pblk_caches.rec) - goto fail_destroy_ws; - - pblk_caches.g_rq = kmem_cache_create("pblk_g_rq", pblk_g_rq_size, - 0, 0, NULL); - if (!pblk_caches.g_rq) - goto fail_destroy_rec; - - pblk_caches.w_rq = kmem_cache_create("pblk_w_rq", pblk_w_rq_size, - 0, 0, NULL); - if (!pblk_caches.w_rq) - goto fail_destroy_g_rq; - - return 0; - -fail_destroy_g_rq: - kmem_cache_destroy(pblk_caches.g_rq); -fail_destroy_rec: - kmem_cache_destroy(pblk_caches.rec); -fail_destroy_ws: - kmem_cache_destroy(pblk_caches.ws); - - return -ENOMEM; -} - -static int pblk_get_global_caches(void) -{ - int ret = 0; - - mutex_lock(&pblk_caches.mutex); - - if (kref_get_unless_zero(&pblk_caches.kref)) - goto out; - - ret = pblk_create_global_caches(); - if (!ret) - kref_init(&pblk_caches.kref); - -out: - mutex_unlock(&pblk_caches.mutex); - return ret; -} - -static void pblk_destroy_global_caches(struct kref *ref) -{ - struct pblk_global_caches *c; - - c = container_of(ref, struct pblk_global_caches, kref); - - kmem_cache_destroy(c->ws); - kmem_cache_destroy(c->rec); - kmem_cache_destroy(c->g_rq); - kmem_cache_destroy(c->w_rq); -} - -static void pblk_put_global_caches(void) -{ - mutex_lock(&pblk_caches.mutex); - kref_put(&pblk_caches.kref, pblk_destroy_global_caches); - mutex_unlock(&pblk_caches.mutex); -} - -static int pblk_core_init(struct pblk *pblk) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - int ret, max_write_ppas; - - atomic64_set(&pblk->user_wa, 0); - atomic64_set(&pblk->pad_wa, 0); - atomic64_set(&pblk->gc_wa, 0); - pblk->user_rst_wa = 0; - pblk->pad_rst_wa = 0; - pblk->gc_rst_wa = 0; - - atomic64_set(&pblk->nr_flush, 0); - pblk->nr_flush_rst = 0; - - pblk->min_write_pgs = geo->ws_opt; - pblk->min_write_pgs_data = pblk->min_write_pgs; - max_write_ppas = pblk->min_write_pgs * geo->all_luns; - pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA); - pblk->max_write_pgs = min_t(int, pblk->max_write_pgs, - queue_max_hw_sectors(dev->q) / (geo->csecs >> SECTOR_SHIFT)); - pblk_set_sec_per_write(pblk, pblk->min_write_pgs); - - pblk->oob_meta_size = geo->sos; - if (!pblk_is_oob_meta_supported(pblk)) { - /* For drives which does not have OOB metadata feature - * in order to support recovery feature we need to use - * so called packed metadata. Packed metada will store - * the same information as OOB metadata (l2p table mapping, - * but in the form of the single page at the end of - * every write request. - */ - if (pblk->min_write_pgs - * sizeof(struct pblk_sec_meta) > PAGE_SIZE) { - /* We want to keep all the packed metadata on single - * page per write requests. So we need to ensure that - * it will fit. - * - * This is more like sanity check, since there is - * no device with such a big minimal write size - * (above 1 metabytes). - */ - pblk_err(pblk, "Not supported min write size\n"); - return -EINVAL; - } - /* For packed meta approach we do some simplification. - * On read path we always issue requests which size - * equal to max_write_pgs, with all pages filled with - * user payload except of last one page which will be - * filled with packed metadata. - */ - pblk->max_write_pgs = pblk->min_write_pgs; - pblk->min_write_pgs_data = pblk->min_write_pgs - 1; - } - - pblk->pad_dist = kcalloc(pblk->min_write_pgs - 1, sizeof(atomic64_t), - GFP_KERNEL); - if (!pblk->pad_dist) - return -ENOMEM; - - if (pblk_get_global_caches()) - goto fail_free_pad_dist; - - /* Internal bios can be at most the sectors signaled by the device. */ - ret = mempool_init_page_pool(&pblk->page_bio_pool, NVM_MAX_VLBA, 0); - if (ret) - goto free_global_caches; - - ret = mempool_init_slab_pool(&pblk->gen_ws_pool, PBLK_GEN_WS_POOL_SIZE, - pblk_caches.ws); - if (ret) - goto free_page_bio_pool; - - ret = mempool_init_slab_pool(&pblk->rec_pool, geo->all_luns, - pblk_caches.rec); - if (ret) - goto free_gen_ws_pool; - - ret = mempool_init_slab_pool(&pblk->r_rq_pool, geo->all_luns, - pblk_caches.g_rq); - if (ret) - goto free_rec_pool; - - ret = mempool_init_slab_pool(&pblk->e_rq_pool, geo->all_luns, - pblk_caches.g_rq); - if (ret) - goto free_r_rq_pool; - - ret = mempool_init_slab_pool(&pblk->w_rq_pool, geo->all_luns, - pblk_caches.w_rq); - if (ret) - goto free_e_rq_pool; - - pblk->close_wq = alloc_workqueue("pblk-close-wq", - WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_NR_CLOSE_JOBS); - if (!pblk->close_wq) - goto free_w_rq_pool; - - pblk->bb_wq = alloc_workqueue("pblk-bb-wq", - WQ_MEM_RECLAIM | WQ_UNBOUND, 0); - if (!pblk->bb_wq) - goto free_close_wq; - - pblk->r_end_wq = alloc_workqueue("pblk-read-end-wq", - WQ_MEM_RECLAIM | WQ_UNBOUND, 0); - if (!pblk->r_end_wq) - goto free_bb_wq; - - if (pblk_set_addrf(pblk)) - goto free_r_end_wq; - - INIT_LIST_HEAD(&pblk->compl_list); - INIT_LIST_HEAD(&pblk->resubmit_list); - - return 0; - -free_r_end_wq: - destroy_workqueue(pblk->r_end_wq); -free_bb_wq: - destroy_workqueue(pblk->bb_wq); -free_close_wq: - destroy_workqueue(pblk->close_wq); -free_w_rq_pool: - mempool_exit(&pblk->w_rq_pool); -free_e_rq_pool: - mempool_exit(&pblk->e_rq_pool); -free_r_rq_pool: - mempool_exit(&pblk->r_rq_pool); -free_rec_pool: - mempool_exit(&pblk->rec_pool); -free_gen_ws_pool: - mempool_exit(&pblk->gen_ws_pool); -free_page_bio_pool: - mempool_exit(&pblk->page_bio_pool); -free_global_caches: - pblk_put_global_caches(); -fail_free_pad_dist: - kfree(pblk->pad_dist); - return -ENOMEM; -} - -static void pblk_core_free(struct pblk *pblk) -{ - if (pblk->close_wq) - destroy_workqueue(pblk->close_wq); - - if (pblk->r_end_wq) - destroy_workqueue(pblk->r_end_wq); - - if (pblk->bb_wq) - destroy_workqueue(pblk->bb_wq); - - mempool_exit(&pblk->page_bio_pool); - mempool_exit(&pblk->gen_ws_pool); - mempool_exit(&pblk->rec_pool); - mempool_exit(&pblk->r_rq_pool); - mempool_exit(&pblk->e_rq_pool); - mempool_exit(&pblk->w_rq_pool); - - pblk_put_global_caches(); - kfree(pblk->pad_dist); -} - -static void pblk_line_mg_free(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - int i; - - kfree(l_mg->bb_template); - kfree(l_mg->bb_aux); - kfree(l_mg->vsc_list); - - for (i = 0; i < PBLK_DATA_LINES; i++) { - kfree(l_mg->sline_meta[i]); - kvfree(l_mg->eline_meta[i]->buf); - kfree(l_mg->eline_meta[i]); - } - - mempool_destroy(l_mg->bitmap_pool); - kmem_cache_destroy(l_mg->bitmap_cache); -} - -static void pblk_line_meta_free(struct pblk_line_mgmt *l_mg, - struct pblk_line *line) -{ - struct pblk_w_err_gc *w_err_gc = line->w_err_gc; - - kfree(line->blk_bitmap); - kfree(line->erase_bitmap); - kfree(line->chks); - - kvfree(w_err_gc->lba_list); - kfree(w_err_gc); -} - -static void pblk_lines_free(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line *line; - int i; - - for (i = 0; i < l_mg->nr_lines; i++) { - line = &pblk->lines[i]; - - pblk_line_free(line); - pblk_line_meta_free(l_mg, line); - } - - pblk_line_mg_free(pblk); - - kfree(pblk->luns); - kfree(pblk->lines); -} - -static int pblk_luns_init(struct pblk *pblk) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_lun *rlun; - int i; - - /* TODO: Implement unbalanced LUN support */ - if (geo->num_lun < 0) { - pblk_err(pblk, "unbalanced LUN config.\n"); - return -EINVAL; - } - - pblk->luns = kcalloc(geo->all_luns, sizeof(struct pblk_lun), - GFP_KERNEL); - if (!pblk->luns) - return -ENOMEM; - - for (i = 0; i < geo->all_luns; i++) { - /* Stripe across channels */ - int ch = i % geo->num_ch; - int lun_raw = i / geo->num_ch; - int lunid = lun_raw + ch * geo->num_lun; - - rlun = &pblk->luns[i]; - rlun->bppa = dev->luns[lunid]; - - sema_init(&rlun->wr_sem, 1); - } - - return 0; -} - -/* See comment over struct line_emeta definition */ -static unsigned int calc_emeta_len(struct pblk *pblk) -{ - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - - /* Round to sector size so that lba_list starts on its own sector */ - lm->emeta_sec[1] = DIV_ROUND_UP( - sizeof(struct line_emeta) + lm->blk_bitmap_len + - sizeof(struct wa_counters), geo->csecs); - lm->emeta_len[1] = lm->emeta_sec[1] * geo->csecs; - - /* Round to sector size so that vsc_list starts on its own sector */ - lm->dsec_per_line = lm->sec_per_line - lm->emeta_sec[0]; - lm->emeta_sec[2] = DIV_ROUND_UP(lm->dsec_per_line * sizeof(u64), - geo->csecs); - lm->emeta_len[2] = lm->emeta_sec[2] * geo->csecs; - - lm->emeta_sec[3] = DIV_ROUND_UP(l_mg->nr_lines * sizeof(u32), - geo->csecs); - lm->emeta_len[3] = lm->emeta_sec[3] * geo->csecs; - - lm->vsc_list_len = l_mg->nr_lines * sizeof(u32); - - return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]); -} - -static int pblk_set_provision(struct pblk *pblk, int nr_free_chks) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line_meta *lm = &pblk->lm; - struct nvm_geo *geo = &dev->geo; - sector_t provisioned; - int sec_meta, blk_meta, clba; - int minimum; - - if (geo->op == NVM_TARGET_DEFAULT_OP) - pblk->op = PBLK_DEFAULT_OP; - else - pblk->op = geo->op; - - minimum = pblk_get_min_chks(pblk); - provisioned = nr_free_chks; - provisioned *= (100 - pblk->op); - sector_div(provisioned, 100); - - if ((nr_free_chks - provisioned) < minimum) { - if (geo->op != NVM_TARGET_DEFAULT_OP) { - pblk_err(pblk, "OP too small to create a sane instance\n"); - return -EINTR; - } - - /* If the user did not specify an OP value, and PBLK_DEFAULT_OP - * is not enough, calculate and set sane value - */ - - provisioned = nr_free_chks - minimum; - pblk->op = (100 * minimum) / nr_free_chks; - pblk_info(pblk, "Default OP insufficient, adjusting OP to %d\n", - pblk->op); - } - - pblk->op_blks = nr_free_chks - provisioned; - - /* Internally pblk manages all free blocks, but all calculations based - * on user capacity consider only provisioned blocks - */ - pblk->rl.total_blocks = nr_free_chks; - - /* Consider sectors used for metadata */ - sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines; - blk_meta = DIV_ROUND_UP(sec_meta, geo->clba); - - clba = (geo->clba / pblk->min_write_pgs) * pblk->min_write_pgs_data; - pblk->capacity = (provisioned - blk_meta) * clba; - - atomic_set(&pblk->rl.free_blocks, nr_free_chks); - atomic_set(&pblk->rl.free_user_blocks, nr_free_chks); - - return 0; -} - -static int pblk_setup_line_meta_chk(struct pblk *pblk, struct pblk_line *line, - struct nvm_chk_meta *meta) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - int i, nr_bad_chks = 0; - - for (i = 0; i < lm->blk_per_line; i++) { - struct pblk_lun *rlun = &pblk->luns[i]; - struct nvm_chk_meta *chunk; - struct nvm_chk_meta *chunk_meta; - struct ppa_addr ppa; - int pos; - - ppa = rlun->bppa; - pos = pblk_ppa_to_pos(geo, ppa); - chunk = &line->chks[pos]; - - ppa.m.chk = line->id; - chunk_meta = pblk_chunk_get_off(pblk, meta, ppa); - - chunk->state = chunk_meta->state; - chunk->type = chunk_meta->type; - chunk->wi = chunk_meta->wi; - chunk->slba = chunk_meta->slba; - chunk->cnlb = chunk_meta->cnlb; - chunk->wp = chunk_meta->wp; - - trace_pblk_chunk_state(pblk_disk_name(pblk), &ppa, - chunk->state); - - if (chunk->type & NVM_CHK_TP_SZ_SPEC) { - WARN_ONCE(1, "pblk: custom-sized chunks unsupported\n"); - continue; - } - - if (!(chunk->state & NVM_CHK_ST_OFFLINE)) - continue; - - set_bit(pos, line->blk_bitmap); - nr_bad_chks++; - } - - return nr_bad_chks; -} - -static long pblk_setup_line_meta(struct pblk *pblk, struct pblk_line *line, - void *chunk_meta, int line_id) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line_meta *lm = &pblk->lm; - long nr_bad_chks, chk_in_line; - - line->pblk = pblk; - line->id = line_id; - line->type = PBLK_LINETYPE_FREE; - line->state = PBLK_LINESTATE_NEW; - line->gc_group = PBLK_LINEGC_NONE; - line->vsc = &l_mg->vsc_list[line_id]; - spin_lock_init(&line->lock); - - nr_bad_chks = pblk_setup_line_meta_chk(pblk, line, chunk_meta); - - chk_in_line = lm->blk_per_line - nr_bad_chks; - if (nr_bad_chks < 0 || nr_bad_chks > lm->blk_per_line || - chk_in_line < lm->min_blk_line) { - line->state = PBLK_LINESTATE_BAD; - list_add_tail(&line->list, &l_mg->bad_list); - return 0; - } - - atomic_set(&line->blk_in_line, chk_in_line); - list_add_tail(&line->list, &l_mg->free_list); - l_mg->nr_free_lines++; - - return chk_in_line; -} - -static int pblk_alloc_line_meta(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_meta *lm = &pblk->lm; - - line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL); - if (!line->blk_bitmap) - return -ENOMEM; - - line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL); - if (!line->erase_bitmap) - goto free_blk_bitmap; - - - line->chks = kmalloc_array(lm->blk_per_line, - sizeof(struct nvm_chk_meta), GFP_KERNEL); - if (!line->chks) - goto free_erase_bitmap; - - line->w_err_gc = kzalloc(sizeof(struct pblk_w_err_gc), GFP_KERNEL); - if (!line->w_err_gc) - goto free_chks; - - return 0; - -free_chks: - kfree(line->chks); -free_erase_bitmap: - kfree(line->erase_bitmap); -free_blk_bitmap: - kfree(line->blk_bitmap); - return -ENOMEM; -} - -static int pblk_line_mg_init(struct pblk *pblk) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line_meta *lm = &pblk->lm; - int i, bb_distance; - - l_mg->nr_lines = geo->num_chk; - l_mg->log_line = l_mg->data_line = NULL; - l_mg->l_seq_nr = l_mg->d_seq_nr = 0; - l_mg->nr_free_lines = 0; - bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES); - - INIT_LIST_HEAD(&l_mg->free_list); - INIT_LIST_HEAD(&l_mg->corrupt_list); - INIT_LIST_HEAD(&l_mg->bad_list); - INIT_LIST_HEAD(&l_mg->gc_full_list); - INIT_LIST_HEAD(&l_mg->gc_high_list); - INIT_LIST_HEAD(&l_mg->gc_mid_list); - INIT_LIST_HEAD(&l_mg->gc_low_list); - INIT_LIST_HEAD(&l_mg->gc_empty_list); - INIT_LIST_HEAD(&l_mg->gc_werr_list); - - INIT_LIST_HEAD(&l_mg->emeta_list); - - l_mg->gc_lists[0] = &l_mg->gc_werr_list; - l_mg->gc_lists[1] = &l_mg->gc_high_list; - l_mg->gc_lists[2] = &l_mg->gc_mid_list; - l_mg->gc_lists[3] = &l_mg->gc_low_list; - - spin_lock_init(&l_mg->free_lock); - spin_lock_init(&l_mg->close_lock); - spin_lock_init(&l_mg->gc_lock); - - l_mg->vsc_list = kcalloc(l_mg->nr_lines, sizeof(__le32), GFP_KERNEL); - if (!l_mg->vsc_list) - goto fail; - - l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); - if (!l_mg->bb_template) - goto fail_free_vsc_list; - - l_mg->bb_aux = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); - if (!l_mg->bb_aux) - goto fail_free_bb_template; - - /* smeta is always small enough to fit on a kmalloc memory allocation, - * emeta depends on the number of LUNs allocated to the pblk instance - */ - for (i = 0; i < PBLK_DATA_LINES; i++) { - l_mg->sline_meta[i] = kmalloc(lm->smeta_len, GFP_KERNEL); - if (!l_mg->sline_meta[i]) - goto fail_free_smeta; - } - - l_mg->bitmap_cache = kmem_cache_create("pblk_lm_bitmap", - lm->sec_bitmap_len, 0, 0, NULL); - if (!l_mg->bitmap_cache) - goto fail_free_smeta; - - /* the bitmap pool is used for both valid and map bitmaps */ - l_mg->bitmap_pool = mempool_create_slab_pool(PBLK_DATA_LINES * 2, - l_mg->bitmap_cache); - if (!l_mg->bitmap_pool) - goto fail_destroy_bitmap_cache; - - /* emeta allocates three different buffers for managing metadata with - * in-memory and in-media layouts - */ - for (i = 0; i < PBLK_DATA_LINES; i++) { - struct pblk_emeta *emeta; - - emeta = kmalloc(sizeof(struct pblk_emeta), GFP_KERNEL); - if (!emeta) - goto fail_free_emeta; - - emeta->buf = kvmalloc(lm->emeta_len[0], GFP_KERNEL); - if (!emeta->buf) { - kfree(emeta); - goto fail_free_emeta; - } - - emeta->nr_entries = lm->emeta_sec[0]; - l_mg->eline_meta[i] = emeta; - } - - for (i = 0; i < l_mg->nr_lines; i++) - l_mg->vsc_list[i] = cpu_to_le32(EMPTY_ENTRY); - - bb_distance = (geo->all_luns) * geo->ws_opt; - for (i = 0; i < lm->sec_per_line; i += bb_distance) - bitmap_set(l_mg->bb_template, i, geo->ws_opt); - - return 0; - -fail_free_emeta: - while (--i >= 0) { - kvfree(l_mg->eline_meta[i]->buf); - kfree(l_mg->eline_meta[i]); - } - - mempool_destroy(l_mg->bitmap_pool); -fail_destroy_bitmap_cache: - kmem_cache_destroy(l_mg->bitmap_cache); -fail_free_smeta: - for (i = 0; i < PBLK_DATA_LINES; i++) - kfree(l_mg->sline_meta[i]); - kfree(l_mg->bb_aux); -fail_free_bb_template: - kfree(l_mg->bb_template); -fail_free_vsc_list: - kfree(l_mg->vsc_list); -fail: - return -ENOMEM; -} - -static int pblk_line_meta_init(struct pblk *pblk) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - unsigned int smeta_len, emeta_len; - int i; - - lm->sec_per_line = geo->clba * geo->all_luns; - lm->blk_per_line = geo->all_luns; - lm->blk_bitmap_len = BITS_TO_LONGS(geo->all_luns) * sizeof(long); - lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long); - lm->lun_bitmap_len = BITS_TO_LONGS(geo->all_luns) * sizeof(long); - lm->mid_thrs = lm->sec_per_line / 2; - lm->high_thrs = lm->sec_per_line / 4; - lm->meta_distance = (geo->all_luns / 2) * pblk->min_write_pgs; - - /* Calculate necessary pages for smeta. See comment over struct - * line_smeta definition - */ - i = 1; -add_smeta_page: - lm->smeta_sec = i * geo->ws_opt; - lm->smeta_len = lm->smeta_sec * geo->csecs; - - smeta_len = sizeof(struct line_smeta) + lm->lun_bitmap_len; - if (smeta_len > lm->smeta_len) { - i++; - goto add_smeta_page; - } - - /* Calculate necessary pages for emeta. See comment over struct - * line_emeta definition - */ - i = 1; -add_emeta_page: - lm->emeta_sec[0] = i * geo->ws_opt; - lm->emeta_len[0] = lm->emeta_sec[0] * geo->csecs; - - emeta_len = calc_emeta_len(pblk); - if (emeta_len > lm->emeta_len[0]) { - i++; - goto add_emeta_page; - } - - lm->emeta_bb = geo->all_luns > i ? geo->all_luns - i : 0; - - lm->min_blk_line = 1; - if (geo->all_luns > 1) - lm->min_blk_line += DIV_ROUND_UP(lm->smeta_sec + - lm->emeta_sec[0], geo->clba); - - if (lm->min_blk_line > lm->blk_per_line) { - pblk_err(pblk, "config. not supported. Min. LUN in line:%d\n", - lm->blk_per_line); - return -EINVAL; - } - - return 0; -} - -static int pblk_lines_init(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line *line; - void *chunk_meta; - int nr_free_chks = 0; - int i, ret; - - ret = pblk_line_meta_init(pblk); - if (ret) - return ret; - - ret = pblk_line_mg_init(pblk); - if (ret) - return ret; - - ret = pblk_luns_init(pblk); - if (ret) - goto fail_free_meta; - - chunk_meta = pblk_get_chunk_meta(pblk); - if (IS_ERR(chunk_meta)) { - ret = PTR_ERR(chunk_meta); - goto fail_free_luns; - } - - pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line), - GFP_KERNEL); - if (!pblk->lines) { - ret = -ENOMEM; - goto fail_free_chunk_meta; - } - - for (i = 0; i < l_mg->nr_lines; i++) { - line = &pblk->lines[i]; - - ret = pblk_alloc_line_meta(pblk, line); - if (ret) - goto fail_free_lines; - - nr_free_chks += pblk_setup_line_meta(pblk, line, chunk_meta, i); - - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - } - - if (!nr_free_chks) { - pblk_err(pblk, "too many bad blocks prevent for sane instance\n"); - ret = -EINTR; - goto fail_free_lines; - } - - ret = pblk_set_provision(pblk, nr_free_chks); - if (ret) - goto fail_free_lines; - - vfree(chunk_meta); - return 0; - -fail_free_lines: - while (--i >= 0) - pblk_line_meta_free(l_mg, &pblk->lines[i]); - kfree(pblk->lines); -fail_free_chunk_meta: - vfree(chunk_meta); -fail_free_luns: - kfree(pblk->luns); -fail_free_meta: - pblk_line_mg_free(pblk); - - return ret; -} - -static int pblk_writer_init(struct pblk *pblk) -{ - pblk->writer_ts = kthread_create(pblk_write_ts, pblk, "pblk-writer-t"); - if (IS_ERR(pblk->writer_ts)) { - int err = PTR_ERR(pblk->writer_ts); - - if (err != -EINTR) - pblk_err(pblk, "could not allocate writer kthread (%d)\n", - err); - return err; - } - - timer_setup(&pblk->wtimer, pblk_write_timer_fn, 0); - mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(100)); - - return 0; -} - -static void pblk_writer_stop(struct pblk *pblk) -{ - /* The pipeline must be stopped and the write buffer emptied before the - * write thread is stopped - */ - WARN(pblk_rb_read_count(&pblk->rwb), - "Stopping not fully persisted write buffer\n"); - - WARN(pblk_rb_sync_count(&pblk->rwb), - "Stopping not fully synced write buffer\n"); - - del_timer_sync(&pblk->wtimer); - if (pblk->writer_ts) - kthread_stop(pblk->writer_ts); -} - -static void pblk_free(struct pblk *pblk) -{ - pblk_lines_free(pblk); - pblk_l2p_free(pblk); - pblk_rwb_free(pblk); - pblk_core_free(pblk); - - kfree(pblk); -} - -static void pblk_tear_down(struct pblk *pblk, bool graceful) -{ - if (graceful) - __pblk_pipeline_flush(pblk); - __pblk_pipeline_stop(pblk); - pblk_writer_stop(pblk); - pblk_rb_sync_l2p(&pblk->rwb); - pblk_rl_free(&pblk->rl); - - pblk_debug(pblk, "consistent tear down (graceful:%d)\n", graceful); -} - -static void pblk_exit(void *private, bool graceful) -{ - struct pblk *pblk = private; - - pblk_gc_exit(pblk, graceful); - pblk_tear_down(pblk, graceful); - -#ifdef CONFIG_NVM_PBLK_DEBUG - pblk_info(pblk, "exit: L2P CRC: %x\n", pblk_l2p_crc(pblk)); -#endif - - pblk_free(pblk); -} - -static sector_t pblk_capacity(void *private) -{ - struct pblk *pblk = private; - - return pblk->capacity * NR_PHY_IN_LOG; -} - -static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, - int flags) -{ - struct nvm_geo *geo = &dev->geo; - struct request_queue *bqueue = dev->q; - struct request_queue *tqueue = tdisk->queue; - struct pblk *pblk; - int ret; - - pblk = kzalloc(sizeof(struct pblk), GFP_KERNEL); - if (!pblk) - return ERR_PTR(-ENOMEM); - - pblk->dev = dev; - pblk->disk = tdisk; - pblk->state = PBLK_STATE_RUNNING; - trace_pblk_state(pblk_disk_name(pblk), pblk->state); - pblk->gc.gc_enabled = 0; - - if (!(geo->version == NVM_OCSSD_SPEC_12 || - geo->version == NVM_OCSSD_SPEC_20)) { - pblk_err(pblk, "OCSSD version not supported (%u)\n", - geo->version); - kfree(pblk); - return ERR_PTR(-EINVAL); - } - - if (geo->ext) { - pblk_err(pblk, "extended metadata not supported\n"); - kfree(pblk); - return ERR_PTR(-EINVAL); - } - - spin_lock_init(&pblk->resubmit_lock); - spin_lock_init(&pblk->trans_lock); - spin_lock_init(&pblk->lock); - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_set(&pblk->inflight_writes, 0); - atomic_long_set(&pblk->padded_writes, 0); - atomic_long_set(&pblk->padded_wb, 0); - atomic_long_set(&pblk->req_writes, 0); - atomic_long_set(&pblk->sub_writes, 0); - atomic_long_set(&pblk->sync_writes, 0); - atomic_long_set(&pblk->inflight_reads, 0); - atomic_long_set(&pblk->cache_reads, 0); - atomic_long_set(&pblk->sync_reads, 0); - atomic_long_set(&pblk->recov_writes, 0); - atomic_long_set(&pblk->recov_writes, 0); - atomic_long_set(&pblk->recov_gc_writes, 0); - atomic_long_set(&pblk->recov_gc_reads, 0); -#endif - - atomic_long_set(&pblk->read_failed, 0); - atomic_long_set(&pblk->read_empty, 0); - atomic_long_set(&pblk->read_high_ecc, 0); - atomic_long_set(&pblk->read_failed_gc, 0); - atomic_long_set(&pblk->write_failed, 0); - atomic_long_set(&pblk->erase_failed, 0); - - ret = pblk_core_init(pblk); - if (ret) { - pblk_err(pblk, "could not initialize core\n"); - goto fail; - } - - ret = pblk_lines_init(pblk); - if (ret) { - pblk_err(pblk, "could not initialize lines\n"); - goto fail_free_core; - } - - ret = pblk_rwb_init(pblk); - if (ret) { - pblk_err(pblk, "could not initialize write buffer\n"); - goto fail_free_lines; - } - - ret = pblk_l2p_init(pblk, flags & NVM_TARGET_FACTORY); - if (ret) { - pblk_err(pblk, "could not initialize maps\n"); - goto fail_free_rwb; - } - - ret = pblk_writer_init(pblk); - if (ret) { - if (ret != -EINTR) - pblk_err(pblk, "could not initialize write thread\n"); - goto fail_free_l2p; - } - - ret = pblk_gc_init(pblk); - if (ret) { - pblk_err(pblk, "could not initialize gc\n"); - goto fail_stop_writer; - } - - /* inherit the size from the underlying device */ - blk_queue_logical_block_size(tqueue, queue_physical_block_size(bqueue)); - blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue)); - - blk_queue_write_cache(tqueue, true, false); - - tqueue->limits.discard_granularity = geo->clba * geo->csecs; - tqueue->limits.discard_alignment = 0; - blk_queue_max_discard_sectors(tqueue, UINT_MAX >> 9); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, tqueue); - - pblk_info(pblk, "luns:%u, lines:%d, secs:%llu, buf entries:%u\n", - geo->all_luns, pblk->l_mg.nr_lines, - (unsigned long long)pblk->capacity, - pblk->rwb.nr_entries); - - wake_up_process(pblk->writer_ts); - - /* Check if we need to start GC */ - pblk_gc_should_kick(pblk); - - return pblk; - -fail_stop_writer: - pblk_writer_stop(pblk); -fail_free_l2p: - pblk_l2p_free(pblk); -fail_free_rwb: - pblk_rwb_free(pblk); -fail_free_lines: - pblk_lines_free(pblk); -fail_free_core: - pblk_core_free(pblk); -fail: - kfree(pblk); - return ERR_PTR(ret); -} - -/* physical block device target */ -static struct nvm_tgt_type tt_pblk = { - .name = "pblk", - .version = {1, 0, 0}, - - .bops = &pblk_bops, - .capacity = pblk_capacity, - - .init = pblk_init, - .exit = pblk_exit, - - .sysfs_init = pblk_sysfs_init, - .sysfs_exit = pblk_sysfs_exit, - .owner = THIS_MODULE, -}; - -static int __init pblk_module_init(void) -{ - int ret; - - ret = bioset_init(&pblk_bio_set, BIO_POOL_SIZE, 0, 0); - if (ret) - return ret; - ret = nvm_register_tgt_type(&tt_pblk); - if (ret) - bioset_exit(&pblk_bio_set); - return ret; -} - -static void pblk_module_exit(void) -{ - bioset_exit(&pblk_bio_set); - nvm_unregister_tgt_type(&tt_pblk); -} - -module_init(pblk_module_init); -module_exit(pblk_module_exit); -MODULE_AUTHOR("Javier Gonzalez <javier@cnexlabs.com>"); -MODULE_AUTHOR("Matias Bjorling <matias@cnexlabs.com>"); -MODULE_LICENSE("GPL v2"); -MODULE_DESCRIPTION("Physical Block-Device for Open-Channel SSDs"); diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c deleted file mode 100644 index 5408e32b2f13..000000000000 --- a/drivers/lightnvm/pblk-map.c +++ /dev/null @@ -1,210 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2016 CNEX Labs - * Initial release: Javier Gonzalez <javier@cnexlabs.com> - * Matias Bjorling <matias@cnexlabs.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * pblk-map.c - pblk's lba-ppa mapping strategy - * - */ - -#include "pblk.h" - -static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry, - struct ppa_addr *ppa_list, - unsigned long *lun_bitmap, - void *meta_list, - unsigned int valid_secs) -{ - struct pblk_line *line = pblk_line_get_data(pblk); - struct pblk_emeta *emeta; - struct pblk_w_ctx *w_ctx; - __le64 *lba_list; - u64 paddr; - int nr_secs = pblk->min_write_pgs; - int i; - - if (!line) - return -ENOSPC; - - if (pblk_line_is_full(line)) { - struct pblk_line *prev_line = line; - - /* If we cannot allocate a new line, make sure to store metadata - * on current line and then fail - */ - line = pblk_line_replace_data(pblk); - pblk_line_close_meta(pblk, prev_line); - - if (!line) { - pblk_pipeline_stop(pblk); - return -ENOSPC; - } - - } - - emeta = line->emeta; - lba_list = emeta_to_lbas(pblk, emeta->buf); - - paddr = pblk_alloc_page(pblk, line, nr_secs); - - for (i = 0; i < nr_secs; i++, paddr++) { - struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); - __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); - - /* ppa to be sent to the device */ - ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); - - /* Write context for target bio completion on write buffer. Note - * that the write buffer is protected by the sync backpointer, - * and a single writer thread have access to each specific entry - * at a time. Thus, it is safe to modify the context for the - * entry we are setting up for submission without taking any - * lock or memory barrier. - */ - if (i < valid_secs) { - kref_get(&line->ref); - atomic_inc(&line->sec_to_update); - w_ctx = pblk_rb_w_ctx(&pblk->rwb, sentry + i); - w_ctx->ppa = ppa_list[i]; - meta->lba = cpu_to_le64(w_ctx->lba); - lba_list[paddr] = cpu_to_le64(w_ctx->lba); - if (lba_list[paddr] != addr_empty) - line->nr_valid_lbas++; - else - atomic64_inc(&pblk->pad_wa); - } else { - lba_list[paddr] = addr_empty; - meta->lba = addr_empty; - __pblk_map_invalidate(pblk, line, paddr); - } - } - - pblk_down_rq(pblk, ppa_list[0], lun_bitmap); - return 0; -} - -int pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, - unsigned long *lun_bitmap, unsigned int valid_secs, - unsigned int off) -{ - void *meta_list = pblk_get_meta_for_writes(pblk, rqd); - void *meta_buffer; - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - unsigned int map_secs; - int min = pblk->min_write_pgs; - int i; - int ret; - - for (i = off; i < rqd->nr_ppas; i += min) { - map_secs = (i + min > valid_secs) ? (valid_secs % min) : min; - meta_buffer = pblk_get_meta(pblk, meta_list, i); - - ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i], - lun_bitmap, meta_buffer, map_secs); - if (ret) - return ret; - } - - return 0; -} - -/* only if erase_ppa is set, acquire erase semaphore */ -int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, - unsigned int sentry, unsigned long *lun_bitmap, - unsigned int valid_secs, struct ppa_addr *erase_ppa) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - void *meta_list = pblk_get_meta_for_writes(pblk, rqd); - void *meta_buffer; - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - struct pblk_line *e_line, *d_line; - unsigned int map_secs; - int min = pblk->min_write_pgs; - int i, erase_lun; - int ret; - - - for (i = 0; i < rqd->nr_ppas; i += min) { - map_secs = (i + min > valid_secs) ? (valid_secs % min) : min; - meta_buffer = pblk_get_meta(pblk, meta_list, i); - - ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i], - lun_bitmap, meta_buffer, map_secs); - if (ret) - return ret; - - erase_lun = pblk_ppa_to_pos(geo, ppa_list[i]); - - /* line can change after page map. We might also be writing the - * last line. - */ - e_line = pblk_line_get_erase(pblk); - if (!e_line) - return pblk_map_rq(pblk, rqd, sentry, lun_bitmap, - valid_secs, i + min); - - spin_lock(&e_line->lock); - if (!test_bit(erase_lun, e_line->erase_bitmap)) { - set_bit(erase_lun, e_line->erase_bitmap); - atomic_dec(&e_line->left_eblks); - - *erase_ppa = ppa_list[i]; - erase_ppa->a.blk = e_line->id; - erase_ppa->a.reserved = 0; - - spin_unlock(&e_line->lock); - - /* Avoid evaluating e_line->left_eblks */ - return pblk_map_rq(pblk, rqd, sentry, lun_bitmap, - valid_secs, i + min); - } - spin_unlock(&e_line->lock); - } - - d_line = pblk_line_get_data(pblk); - - /* line can change after page map. We might also be writing the - * last line. - */ - e_line = pblk_line_get_erase(pblk); - if (!e_line) - return -ENOSPC; - - /* Erase blocks that are bad in this line but might not be in next */ - if (unlikely(pblk_ppa_empty(*erase_ppa)) && - bitmap_weight(d_line->blk_bitmap, lm->blk_per_line)) { - int bit = -1; - -retry: - bit = find_next_bit(d_line->blk_bitmap, - lm->blk_per_line, bit + 1); - if (bit >= lm->blk_per_line) - return 0; - - spin_lock(&e_line->lock); - if (test_bit(bit, e_line->erase_bitmap)) { - spin_unlock(&e_line->lock); - goto retry; - } - spin_unlock(&e_line->lock); - - set_bit(bit, e_line->erase_bitmap); - atomic_dec(&e_line->left_eblks); - *erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */ - erase_ppa->a.blk = e_line->id; - } - - return 0; -} diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c deleted file mode 100644 index 5abb1705b039..000000000000 --- a/drivers/lightnvm/pblk-rb.c +++ /dev/null @@ -1,858 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2016 CNEX Labs - * Initial release: Javier Gonzalez <javier@cnexlabs.com> - * - * Based upon the circular ringbuffer. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * pblk-rb.c - pblk's write buffer - */ - -#include <linux/circ_buf.h> - -#include "pblk.h" - -static DECLARE_RWSEM(pblk_rb_lock); - -static void pblk_rb_data_free(struct pblk_rb *rb) -{ - struct pblk_rb_pages *p, *t; - - down_write(&pblk_rb_lock); - list_for_each_entry_safe(p, t, &rb->pages, list) { - free_pages((unsigned long)page_address(p->pages), p->order); - list_del(&p->list); - kfree(p); - } - up_write(&pblk_rb_lock); -} - -void pblk_rb_free(struct pblk_rb *rb) -{ - pblk_rb_data_free(rb); - vfree(rb->entries); -} - -/* - * pblk_rb_calculate_size -- calculate the size of the write buffer - */ -static unsigned int pblk_rb_calculate_size(unsigned int nr_entries, - unsigned int threshold) -{ - unsigned int thr_sz = 1 << (get_count_order(threshold + NVM_MAX_VLBA)); - unsigned int max_sz = max(thr_sz, nr_entries); - unsigned int max_io; - - /* Alloc a write buffer that can (i) fit at least two split bios - * (considering max I/O size NVM_MAX_VLBA, and (ii) guarantee that the - * threshold will be respected - */ - max_io = (1 << max((int)(get_count_order(max_sz)), - (int)(get_count_order(NVM_MAX_VLBA << 1)))); - if ((threshold + NVM_MAX_VLBA) >= max_io) - max_io <<= 1; - - return max_io; -} - -/* - * Initialize ring buffer. The data and metadata buffers must be previously - * allocated and their size must be a power of two - * (Documentation/core-api/circular-buffers.rst) - */ -int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold, - unsigned int seg_size) -{ - struct pblk *pblk = container_of(rb, struct pblk, rwb); - struct pblk_rb_entry *entries; - unsigned int init_entry = 0; - unsigned int max_order = MAX_ORDER - 1; - unsigned int power_size, power_seg_sz; - unsigned int alloc_order, order, iter; - unsigned int nr_entries; - - nr_entries = pblk_rb_calculate_size(size, threshold); - entries = vzalloc(array_size(nr_entries, sizeof(struct pblk_rb_entry))); - if (!entries) - return -ENOMEM; - - power_size = get_count_order(nr_entries); - power_seg_sz = get_count_order(seg_size); - - down_write(&pblk_rb_lock); - rb->entries = entries; - rb->seg_size = (1 << power_seg_sz); - rb->nr_entries = (1 << power_size); - rb->mem = rb->subm = rb->sync = rb->l2p_update = 0; - rb->back_thres = threshold; - rb->flush_point = EMPTY_ENTRY; - - spin_lock_init(&rb->w_lock); - spin_lock_init(&rb->s_lock); - - INIT_LIST_HEAD(&rb->pages); - - alloc_order = power_size; - if (alloc_order >= max_order) { - order = max_order; - iter = (1 << (alloc_order - max_order)); - } else { - order = alloc_order; - iter = 1; - } - - do { - struct pblk_rb_entry *entry; - struct pblk_rb_pages *page_set; - void *kaddr; - unsigned long set_size; - int i; - - page_set = kmalloc(sizeof(struct pblk_rb_pages), GFP_KERNEL); - if (!page_set) { - up_write(&pblk_rb_lock); - vfree(entries); - return -ENOMEM; - } - - page_set->order = order; - page_set->pages = alloc_pages(GFP_KERNEL, order); - if (!page_set->pages) { - kfree(page_set); - pblk_rb_data_free(rb); - up_write(&pblk_rb_lock); - vfree(entries); - return -ENOMEM; - } - kaddr = page_address(page_set->pages); - - entry = &rb->entries[init_entry]; - entry->data = kaddr; - entry->cacheline = pblk_cacheline_to_addr(init_entry++); - entry->w_ctx.flags = PBLK_WRITABLE_ENTRY; - - set_size = (1 << order); - for (i = 1; i < set_size; i++) { - entry = &rb->entries[init_entry]; - entry->cacheline = pblk_cacheline_to_addr(init_entry++); - entry->data = kaddr + (i * rb->seg_size); - entry->w_ctx.flags = PBLK_WRITABLE_ENTRY; - bio_list_init(&entry->w_ctx.bios); - } - - list_add_tail(&page_set->list, &rb->pages); - iter--; - } while (iter > 0); - up_write(&pblk_rb_lock); - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_set(&rb->inflight_flush_point, 0); -#endif - - /* - * Initialize rate-limiter, which controls access to the write buffer - * by user and GC I/O - */ - pblk_rl_init(&pblk->rl, rb->nr_entries, threshold); - - return 0; -} - -static void clean_wctx(struct pblk_w_ctx *w_ctx) -{ - int flags; - - flags = READ_ONCE(w_ctx->flags); - WARN_ONCE(!(flags & PBLK_SUBMITTED_ENTRY), - "pblk: overwriting unsubmitted data\n"); - - /* Release flags on context. Protect from writes and reads */ - smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY); - pblk_ppa_set_empty(&w_ctx->ppa); - w_ctx->lba = ADDR_EMPTY; -} - -#define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size) -#define pblk_rb_ring_space(rb, head, tail, size) \ - (CIRC_SPACE(head, tail, size)) - -/* - * Buffer space is calculated with respect to the back pointer signaling - * synchronized entries to the media. - */ -static unsigned int pblk_rb_space(struct pblk_rb *rb) -{ - unsigned int mem = READ_ONCE(rb->mem); - unsigned int sync = READ_ONCE(rb->sync); - - return pblk_rb_ring_space(rb, mem, sync, rb->nr_entries); -} - -unsigned int pblk_rb_ptr_wrap(struct pblk_rb *rb, unsigned int p, - unsigned int nr_entries) -{ - return (p + nr_entries) & (rb->nr_entries - 1); -} - -/* - * Buffer count is calculated with respect to the submission entry signaling the - * entries that are available to send to the media - */ -unsigned int pblk_rb_read_count(struct pblk_rb *rb) -{ - unsigned int mem = READ_ONCE(rb->mem); - unsigned int subm = READ_ONCE(rb->subm); - - return pblk_rb_ring_count(mem, subm, rb->nr_entries); -} - -unsigned int pblk_rb_sync_count(struct pblk_rb *rb) -{ - unsigned int mem = READ_ONCE(rb->mem); - unsigned int sync = READ_ONCE(rb->sync); - - return pblk_rb_ring_count(mem, sync, rb->nr_entries); -} - -unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries) -{ - unsigned int subm; - - subm = READ_ONCE(rb->subm); - /* Commit read means updating submission pointer */ - smp_store_release(&rb->subm, pblk_rb_ptr_wrap(rb, subm, nr_entries)); - - return subm; -} - -static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int to_update) -{ - struct pblk *pblk = container_of(rb, struct pblk, rwb); - struct pblk_line *line; - struct pblk_rb_entry *entry; - struct pblk_w_ctx *w_ctx; - unsigned int user_io = 0, gc_io = 0; - unsigned int i; - int flags; - - for (i = 0; i < to_update; i++) { - entry = &rb->entries[rb->l2p_update]; - w_ctx = &entry->w_ctx; - - flags = READ_ONCE(entry->w_ctx.flags); - if (flags & PBLK_IOTYPE_USER) - user_io++; - else if (flags & PBLK_IOTYPE_GC) - gc_io++; - else - WARN(1, "pblk: unknown IO type\n"); - - pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa, - entry->cacheline); - - line = pblk_ppa_to_line(pblk, w_ctx->ppa); - atomic_dec(&line->sec_to_update); - kref_put(&line->ref, pblk_line_put); - clean_wctx(w_ctx); - rb->l2p_update = pblk_rb_ptr_wrap(rb, rb->l2p_update, 1); - } - - pblk_rl_out(&pblk->rl, user_io, gc_io); - - return 0; -} - -/* - * When we move the l2p_update pointer, we update the l2p table - lookups will - * point to the physical address instead of to the cacheline in the write buffer - * from this moment on. - */ -static int pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int nr_entries, - unsigned int mem, unsigned int sync) -{ - unsigned int space, count; - int ret = 0; - - lockdep_assert_held(&rb->w_lock); - - /* Update l2p only as buffer entries are being overwritten */ - space = pblk_rb_ring_space(rb, mem, rb->l2p_update, rb->nr_entries); - if (space > nr_entries) - goto out; - - count = nr_entries - space; - /* l2p_update used exclusively under rb->w_lock */ - ret = __pblk_rb_update_l2p(rb, count); - -out: - return ret; -} - -/* - * Update the l2p entry for all sectors stored on the write buffer. This means - * that all future lookups to the l2p table will point to a device address, not - * to the cacheline in the write buffer. - */ -void pblk_rb_sync_l2p(struct pblk_rb *rb) -{ - unsigned int sync; - unsigned int to_update; - - spin_lock(&rb->w_lock); - - /* Protect from reads and writes */ - sync = smp_load_acquire(&rb->sync); - - to_update = pblk_rb_ring_count(sync, rb->l2p_update, rb->nr_entries); - __pblk_rb_update_l2p(rb, to_update); - - spin_unlock(&rb->w_lock); -} - -/* - * Write @nr_entries to ring buffer from @data buffer if there is enough space. - * Typically, 4KB data chunks coming from a bio will be copied to the ring - * buffer, thus the write will fail if not all incoming data can be copied. - * - */ -static void __pblk_rb_write_entry(struct pblk_rb *rb, void *data, - struct pblk_w_ctx w_ctx, - struct pblk_rb_entry *entry) -{ - memcpy(entry->data, data, rb->seg_size); - - entry->w_ctx.lba = w_ctx.lba; - entry->w_ctx.ppa = w_ctx.ppa; -} - -void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data, - struct pblk_w_ctx w_ctx, unsigned int ring_pos) -{ - struct pblk *pblk = container_of(rb, struct pblk, rwb); - struct pblk_rb_entry *entry; - int flags; - - entry = &rb->entries[ring_pos]; - flags = READ_ONCE(entry->w_ctx.flags); -#ifdef CONFIG_NVM_PBLK_DEBUG - /* Caller must guarantee that the entry is free */ - BUG_ON(!(flags & PBLK_WRITABLE_ENTRY)); -#endif - - __pblk_rb_write_entry(rb, data, w_ctx, entry); - - pblk_update_map_cache(pblk, w_ctx.lba, entry->cacheline); - flags = w_ctx.flags | PBLK_WRITTEN_DATA; - - /* Release flags on write context. Protect from writes */ - smp_store_release(&entry->w_ctx.flags, flags); -} - -void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data, - struct pblk_w_ctx w_ctx, struct pblk_line *line, - u64 paddr, unsigned int ring_pos) -{ - struct pblk *pblk = container_of(rb, struct pblk, rwb); - struct pblk_rb_entry *entry; - int flags; - - entry = &rb->entries[ring_pos]; - flags = READ_ONCE(entry->w_ctx.flags); -#ifdef CONFIG_NVM_PBLK_DEBUG - /* Caller must guarantee that the entry is free */ - BUG_ON(!(flags & PBLK_WRITABLE_ENTRY)); -#endif - - __pblk_rb_write_entry(rb, data, w_ctx, entry); - - if (!pblk_update_map_gc(pblk, w_ctx.lba, entry->cacheline, line, paddr)) - entry->w_ctx.lba = ADDR_EMPTY; - - flags = w_ctx.flags | PBLK_WRITTEN_DATA; - - /* Release flags on write context. Protect from writes */ - smp_store_release(&entry->w_ctx.flags, flags); -} - -static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio, - unsigned int pos) -{ - struct pblk_rb_entry *entry; - unsigned int sync, flush_point; - - pblk_rb_sync_init(rb, NULL); - sync = READ_ONCE(rb->sync); - - if (pos == sync) { - pblk_rb_sync_end(rb, NULL); - return 0; - } - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_inc(&rb->inflight_flush_point); -#endif - - flush_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1); - entry = &rb->entries[flush_point]; - - /* Protect flush points */ - smp_store_release(&rb->flush_point, flush_point); - - if (bio) - bio_list_add(&entry->w_ctx.bios, bio); - - pblk_rb_sync_end(rb, NULL); - - return bio ? 1 : 0; -} - -static int __pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries, - unsigned int *pos) -{ - unsigned int mem; - unsigned int sync; - unsigned int threshold; - - sync = READ_ONCE(rb->sync); - mem = READ_ONCE(rb->mem); - - threshold = nr_entries + rb->back_thres; - - if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < threshold) - return 0; - - if (pblk_rb_update_l2p(rb, nr_entries, mem, sync)) - return 0; - - *pos = mem; - - return 1; -} - -static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries, - unsigned int *pos) -{ - if (!__pblk_rb_may_write(rb, nr_entries, pos)) - return 0; - - /* Protect from read count */ - smp_store_release(&rb->mem, pblk_rb_ptr_wrap(rb, *pos, nr_entries)); - return 1; -} - -void pblk_rb_flush(struct pblk_rb *rb) -{ - struct pblk *pblk = container_of(rb, struct pblk, rwb); - unsigned int mem = READ_ONCE(rb->mem); - - if (pblk_rb_flush_point_set(rb, NULL, mem)) - return; - - pblk_write_kick(pblk); -} - -static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries, - unsigned int *pos, struct bio *bio, - int *io_ret) -{ - unsigned int mem; - - if (!__pblk_rb_may_write(rb, nr_entries, pos)) - return 0; - - mem = pblk_rb_ptr_wrap(rb, *pos, nr_entries); - *io_ret = NVM_IO_DONE; - - if (bio->bi_opf & REQ_PREFLUSH) { - struct pblk *pblk = container_of(rb, struct pblk, rwb); - - atomic64_inc(&pblk->nr_flush); - if (pblk_rb_flush_point_set(&pblk->rwb, bio, mem)) - *io_ret = NVM_IO_OK; - } - - /* Protect from read count */ - smp_store_release(&rb->mem, mem); - - return 1; -} - -/* - * Atomically check that (i) there is space on the write buffer for the - * incoming I/O, and (ii) the current I/O type has enough budget in the write - * buffer (rate-limiter). - */ -int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio, - unsigned int nr_entries, unsigned int *pos) -{ - struct pblk *pblk = container_of(rb, struct pblk, rwb); - int io_ret; - - spin_lock(&rb->w_lock); - io_ret = pblk_rl_user_may_insert(&pblk->rl, nr_entries); - if (io_ret) { - spin_unlock(&rb->w_lock); - return io_ret; - } - - if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &io_ret)) { - spin_unlock(&rb->w_lock); - return NVM_IO_REQUEUE; - } - - pblk_rl_user_in(&pblk->rl, nr_entries); - spin_unlock(&rb->w_lock); - - return io_ret; -} - -/* - * Look at pblk_rb_may_write_user comment - */ -int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries, - unsigned int *pos) -{ - struct pblk *pblk = container_of(rb, struct pblk, rwb); - - spin_lock(&rb->w_lock); - if (!pblk_rl_gc_may_insert(&pblk->rl, nr_entries)) { - spin_unlock(&rb->w_lock); - return 0; - } - - if (!pblk_rb_may_write(rb, nr_entries, pos)) { - spin_unlock(&rb->w_lock); - return 0; - } - - pblk_rl_gc_in(&pblk->rl, nr_entries); - spin_unlock(&rb->w_lock); - - return 1; -} - -/* - * Read available entries on rb and add them to the given bio. To avoid a memory - * copy, a page reference to the write buffer is used to be added to the bio. - * - * This function is used by the write thread to form the write bio that will - * persist data on the write buffer to the media. - */ -unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd, - unsigned int pos, unsigned int nr_entries, - unsigned int count) -{ - struct pblk *pblk = container_of(rb, struct pblk, rwb); - struct request_queue *q = pblk->dev->q; - struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); - struct bio *bio = rqd->bio; - struct pblk_rb_entry *entry; - struct page *page; - unsigned int pad = 0, to_read = nr_entries; - unsigned int i; - int flags; - - if (count < nr_entries) { - pad = nr_entries - count; - to_read = count; - } - - /* Add space for packed metadata if in use*/ - pad += (pblk->min_write_pgs - pblk->min_write_pgs_data); - - c_ctx->sentry = pos; - c_ctx->nr_valid = to_read; - c_ctx->nr_padded = pad; - - for (i = 0; i < to_read; i++) { - entry = &rb->entries[pos]; - - /* A write has been allowed into the buffer, but data is still - * being copied to it. It is ok to busy wait. - */ -try: - flags = READ_ONCE(entry->w_ctx.flags); - if (!(flags & PBLK_WRITTEN_DATA)) { - io_schedule(); - goto try; - } - - page = virt_to_page(entry->data); - if (!page) { - pblk_err(pblk, "could not allocate write bio page\n"); - flags &= ~PBLK_WRITTEN_DATA; - flags |= PBLK_SUBMITTED_ENTRY; - /* Release flags on context. Protect from writes */ - smp_store_release(&entry->w_ctx.flags, flags); - return NVM_IO_ERR; - } - - if (bio_add_pc_page(q, bio, page, rb->seg_size, 0) != - rb->seg_size) { - pblk_err(pblk, "could not add page to write bio\n"); - flags &= ~PBLK_WRITTEN_DATA; - flags |= PBLK_SUBMITTED_ENTRY; - /* Release flags on context. Protect from writes */ - smp_store_release(&entry->w_ctx.flags, flags); - return NVM_IO_ERR; - } - - flags &= ~PBLK_WRITTEN_DATA; - flags |= PBLK_SUBMITTED_ENTRY; - - /* Release flags on context. Protect from writes */ - smp_store_release(&entry->w_ctx.flags, flags); - - pos = pblk_rb_ptr_wrap(rb, pos, 1); - } - - if (pad) { - if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, pad)) { - pblk_err(pblk, "could not pad page in write bio\n"); - return NVM_IO_ERR; - } - - if (pad < pblk->min_write_pgs) - atomic64_inc(&pblk->pad_dist[pad - 1]); - else - pblk_warn(pblk, "padding more than min. sectors\n"); - - atomic64_add(pad, &pblk->pad_wa); - } - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_add(pad, &pblk->padded_writes); -#endif - - return NVM_IO_OK; -} - -/* - * Copy to bio only if the lba matches the one on the given cache entry. - * Otherwise, it means that the entry has been overwritten, and the bio should - * be directed to disk. - */ -int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, - struct ppa_addr ppa) -{ - struct pblk *pblk = container_of(rb, struct pblk, rwb); - struct pblk_rb_entry *entry; - struct pblk_w_ctx *w_ctx; - struct ppa_addr l2p_ppa; - u64 pos = pblk_addr_to_cacheline(ppa); - void *data; - int flags; - int ret = 1; - - -#ifdef CONFIG_NVM_PBLK_DEBUG - /* Caller must ensure that the access will not cause an overflow */ - BUG_ON(pos >= rb->nr_entries); -#endif - entry = &rb->entries[pos]; - w_ctx = &entry->w_ctx; - flags = READ_ONCE(w_ctx->flags); - - spin_lock(&rb->w_lock); - spin_lock(&pblk->trans_lock); - l2p_ppa = pblk_trans_map_get(pblk, lba); - spin_unlock(&pblk->trans_lock); - - /* Check if the entry has been overwritten or is scheduled to be */ - if (!pblk_ppa_comp(l2p_ppa, ppa) || w_ctx->lba != lba || - flags & PBLK_WRITABLE_ENTRY) { - ret = 0; - goto out; - } - data = bio_data(bio); - memcpy(data, entry->data, rb->seg_size); - -out: - spin_unlock(&rb->w_lock); - return ret; -} - -struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos) -{ - unsigned int entry = pblk_rb_ptr_wrap(rb, pos, 0); - - return &rb->entries[entry].w_ctx; -} - -unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags) - __acquires(&rb->s_lock) -{ - if (flags) - spin_lock_irqsave(&rb->s_lock, *flags); - else - spin_lock_irq(&rb->s_lock); - - return rb->sync; -} - -void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags) - __releases(&rb->s_lock) -{ - lockdep_assert_held(&rb->s_lock); - - if (flags) - spin_unlock_irqrestore(&rb->s_lock, *flags); - else - spin_unlock_irq(&rb->s_lock); -} - -unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries) -{ - unsigned int sync, flush_point; - lockdep_assert_held(&rb->s_lock); - - sync = READ_ONCE(rb->sync); - flush_point = READ_ONCE(rb->flush_point); - - if (flush_point != EMPTY_ENTRY) { - unsigned int secs_to_flush; - - secs_to_flush = pblk_rb_ring_count(flush_point, sync, - rb->nr_entries); - if (secs_to_flush < nr_entries) { - /* Protect flush points */ - smp_store_release(&rb->flush_point, EMPTY_ENTRY); - } - } - - sync = pblk_rb_ptr_wrap(rb, sync, nr_entries); - - /* Protect from counts */ - smp_store_release(&rb->sync, sync); - - return sync; -} - -/* Calculate how many sectors to submit up to the current flush point. */ -unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb) -{ - unsigned int subm, sync, flush_point; - unsigned int submitted, to_flush; - - /* Protect flush points */ - flush_point = smp_load_acquire(&rb->flush_point); - if (flush_point == EMPTY_ENTRY) - return 0; - - /* Protect syncs */ - sync = smp_load_acquire(&rb->sync); - - subm = READ_ONCE(rb->subm); - submitted = pblk_rb_ring_count(subm, sync, rb->nr_entries); - - /* The sync point itself counts as a sector to sync */ - to_flush = pblk_rb_ring_count(flush_point, sync, rb->nr_entries) + 1; - - return (submitted < to_flush) ? (to_flush - submitted) : 0; -} - -int pblk_rb_tear_down_check(struct pblk_rb *rb) -{ - struct pblk_rb_entry *entry; - int i; - int ret = 0; - - spin_lock(&rb->w_lock); - spin_lock_irq(&rb->s_lock); - - if ((rb->mem == rb->subm) && (rb->subm == rb->sync) && - (rb->sync == rb->l2p_update) && - (rb->flush_point == EMPTY_ENTRY)) { - goto out; - } - - if (!rb->entries) { - ret = 1; - goto out; - } - - for (i = 0; i < rb->nr_entries; i++) { - entry = &rb->entries[i]; - - if (!entry->data) { - ret = 1; - goto out; - } - } - -out: - spin_unlock_irq(&rb->s_lock); - spin_unlock(&rb->w_lock); - - return ret; -} - -unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos) -{ - return (pos & (rb->nr_entries - 1)); -} - -int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos) -{ - return (pos >= rb->nr_entries); -} - -ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf) -{ - struct pblk *pblk = container_of(rb, struct pblk, rwb); - struct pblk_c_ctx *c; - ssize_t offset; - int queued_entries = 0; - - spin_lock_irq(&rb->s_lock); - list_for_each_entry(c, &pblk->compl_list, list) - queued_entries++; - spin_unlock_irq(&rb->s_lock); - - if (rb->flush_point != EMPTY_ENTRY) - offset = scnprintf(buf, PAGE_SIZE, - "%u\t%u\t%u\t%u\t%u\t%u\t%u - %u/%u/%u - %d\n", - rb->nr_entries, - rb->mem, - rb->subm, - rb->sync, - rb->l2p_update, -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_read(&rb->inflight_flush_point), -#else - 0, -#endif - rb->flush_point, - pblk_rb_read_count(rb), - pblk_rb_space(rb), - pblk_rb_flush_point_count(rb), - queued_entries); - else - offset = scnprintf(buf, PAGE_SIZE, - "%u\t%u\t%u\t%u\t%u\t%u\tNULL - %u/%u/%u - %d\n", - rb->nr_entries, - rb->mem, - rb->subm, - rb->sync, - rb->l2p_update, -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_read(&rb->inflight_flush_point), -#else - 0, -#endif - pblk_rb_read_count(rb), - pblk_rb_space(rb), - pblk_rb_flush_point_count(rb), - queued_entries); - - return offset; -} diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c deleted file mode 100644 index c28537a489bc..000000000000 --- a/drivers/lightnvm/pblk-read.c +++ /dev/null @@ -1,474 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2016 CNEX Labs - * Initial release: Javier Gonzalez <javier@cnexlabs.com> - * Matias Bjorling <matias@cnexlabs.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * pblk-read.c - pblk's read path - */ - -#include "pblk.h" - -/* - * There is no guarantee that the value read from cache has not been updated and - * resides at another location in the cache. We guarantee though that if the - * value is read from the cache, it belongs to the mapped lba. In order to - * guarantee and order between writes and reads are ordered, a flush must be - * issued. - */ -static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio, - sector_t lba, struct ppa_addr ppa) -{ -#ifdef CONFIG_NVM_PBLK_DEBUG - /* Callers must ensure that the ppa points to a cache address */ - BUG_ON(pblk_ppa_empty(ppa)); - BUG_ON(!pblk_addr_in_cache(ppa)); -#endif - - return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa); -} - -static int pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, - struct bio *bio, sector_t blba, - bool *from_cache) -{ - void *meta_list = rqd->meta_list; - int nr_secs, i; - -retry: - nr_secs = pblk_lookup_l2p_seq(pblk, rqd->ppa_list, blba, rqd->nr_ppas, - from_cache); - - if (!*from_cache) - goto end; - - for (i = 0; i < nr_secs; i++) { - struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); - sector_t lba = blba + i; - - if (pblk_ppa_empty(rqd->ppa_list[i])) { - __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); - - meta->lba = addr_empty; - } else if (pblk_addr_in_cache(rqd->ppa_list[i])) { - /* - * Try to read from write buffer. The address is later - * checked on the write buffer to prevent retrieving - * overwritten data. - */ - if (!pblk_read_from_cache(pblk, bio, lba, - rqd->ppa_list[i])) { - if (i == 0) { - /* - * We didn't call with bio_advance() - * yet, so we can just retry. - */ - goto retry; - } else { - /* - * We already call bio_advance() - * so we cannot retry and we need - * to quit that function in order - * to allow caller to handle the bio - * splitting in the current sector - * position. - */ - nr_secs = i; - goto end; - } - } - meta->lba = cpu_to_le64(lba); -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_inc(&pblk->cache_reads); -#endif - } - bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE); - } - -end: - if (pblk_io_aligned(pblk, nr_secs)) - rqd->is_seq = 1; - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_add(nr_secs, &pblk->inflight_reads); -#endif - - return nr_secs; -} - - -static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd, - sector_t blba) -{ - void *meta_list = rqd->meta_list; - int nr_lbas = rqd->nr_ppas; - int i; - - if (!pblk_is_oob_meta_supported(pblk)) - return; - - for (i = 0; i < nr_lbas; i++) { - struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); - u64 lba = le64_to_cpu(meta->lba); - - if (lba == ADDR_EMPTY) - continue; - - if (lba != blba + i) { -#ifdef CONFIG_NVM_PBLK_DEBUG - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - - print_ppa(pblk, &ppa_list[i], "seq", i); -#endif - pblk_err(pblk, "corrupted read LBA (%llu/%llu)\n", - lba, (u64)blba + i); - WARN_ON(1); - } - } -} - -/* - * There can be holes in the lba list. - */ -static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd, - u64 *lba_list, int nr_lbas) -{ - void *meta_lba_list = rqd->meta_list; - int i, j; - - if (!pblk_is_oob_meta_supported(pblk)) - return; - - for (i = 0, j = 0; i < nr_lbas; i++) { - struct pblk_sec_meta *meta = pblk_get_meta(pblk, - meta_lba_list, j); - u64 lba = lba_list[i]; - u64 meta_lba; - - if (lba == ADDR_EMPTY) - continue; - - meta_lba = le64_to_cpu(meta->lba); - - if (lba != meta_lba) { -#ifdef CONFIG_NVM_PBLK_DEBUG - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - - print_ppa(pblk, &ppa_list[j], "rnd", j); -#endif - pblk_err(pblk, "corrupted read LBA (%llu/%llu)\n", - meta_lba, lba); - WARN_ON(1); - } - - j++; - } - - WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n"); -} - -static void pblk_end_user_read(struct bio *bio, int error) -{ - if (error && error != NVM_RSP_WARN_HIGHECC) - bio_io_error(bio); - else - bio_endio(bio); -} - -static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd, - bool put_line) -{ - struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); - struct bio *int_bio = rqd->bio; - unsigned long start_time = r_ctx->start_time; - - bio_end_io_acct(int_bio, start_time); - - if (rqd->error) - pblk_log_read_err(pblk, rqd); - - pblk_read_check_seq(pblk, rqd, r_ctx->lba); - bio_put(int_bio); - - if (put_line) - pblk_rq_to_line_put(pblk, rqd); - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_add(rqd->nr_ppas, &pblk->sync_reads); - atomic_long_sub(rqd->nr_ppas, &pblk->inflight_reads); -#endif - - pblk_free_rqd(pblk, rqd, PBLK_READ); - atomic_dec(&pblk->inflight_io); -} - -static void pblk_end_io_read(struct nvm_rq *rqd) -{ - struct pblk *pblk = rqd->private; - struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); - struct bio *bio = (struct bio *)r_ctx->private; - - pblk_end_user_read(bio, rqd->error); - __pblk_end_io_read(pblk, rqd, true); -} - -static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio, - sector_t lba, bool *from_cache) -{ - struct pblk_sec_meta *meta = pblk_get_meta(pblk, rqd->meta_list, 0); - struct ppa_addr ppa; - - pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache); - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_inc(&pblk->inflight_reads); -#endif - -retry: - if (pblk_ppa_empty(ppa)) { - __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); - - meta->lba = addr_empty; - return; - } - - /* Try to read from write buffer. The address is later checked on the - * write buffer to prevent retrieving overwritten data. - */ - if (pblk_addr_in_cache(ppa)) { - if (!pblk_read_from_cache(pblk, bio, lba, ppa)) { - pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache); - goto retry; - } - - meta->lba = cpu_to_le64(lba); - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_inc(&pblk->cache_reads); -#endif - } else { - rqd->ppa_addr = ppa; - } -} - -void pblk_submit_read(struct pblk *pblk, struct bio *bio) -{ - sector_t blba = pblk_get_lba(bio); - unsigned int nr_secs = pblk_get_secs(bio); - bool from_cache; - struct pblk_g_ctx *r_ctx; - struct nvm_rq *rqd; - struct bio *int_bio, *split_bio; - unsigned long start_time; - - start_time = bio_start_io_acct(bio); - - rqd = pblk_alloc_rqd(pblk, PBLK_READ); - - rqd->opcode = NVM_OP_PREAD; - rqd->nr_ppas = nr_secs; - rqd->private = pblk; - rqd->end_io = pblk_end_io_read; - - r_ctx = nvm_rq_to_pdu(rqd); - r_ctx->start_time = start_time; - r_ctx->lba = blba; - - if (pblk_alloc_rqd_meta(pblk, rqd)) { - bio_io_error(bio); - pblk_free_rqd(pblk, rqd, PBLK_READ); - return; - } - - /* Clone read bio to deal internally with: - * -read errors when reading from drive - * -bio_advance() calls during cache reads - */ - int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set); - - if (nr_secs > 1) - nr_secs = pblk_read_ppalist_rq(pblk, rqd, int_bio, blba, - &from_cache); - else - pblk_read_rq(pblk, rqd, int_bio, blba, &from_cache); - -split_retry: - r_ctx->private = bio; /* original bio */ - rqd->bio = int_bio; /* internal bio */ - - if (from_cache && nr_secs == rqd->nr_ppas) { - /* All data was read from cache, we can complete the IO. */ - pblk_end_user_read(bio, 0); - atomic_inc(&pblk->inflight_io); - __pblk_end_io_read(pblk, rqd, false); - } else if (nr_secs != rqd->nr_ppas) { - /* The read bio request could be partially filled by the write - * buffer, but there are some holes that need to be read from - * the drive. In order to handle this, we will use block layer - * mechanism to split this request in to smaller ones and make - * a chain of it. - */ - split_bio = bio_split(bio, nr_secs * NR_PHY_IN_LOG, GFP_KERNEL, - &pblk_bio_set); - bio_chain(split_bio, bio); - submit_bio_noacct(bio); - - /* New bio contains first N sectors of the previous one, so - * we can continue to use existing rqd, but we need to shrink - * the number of PPAs in it. New bio is also guaranteed that - * it contains only either data from cache or from drive, newer - * mix of them. - */ - bio = split_bio; - rqd->nr_ppas = nr_secs; - if (rqd->nr_ppas == 1) - rqd->ppa_addr = rqd->ppa_list[0]; - - /* Recreate int_bio - existing might have some needed internal - * fields modified already. - */ - bio_put(int_bio); - int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set); - goto split_retry; - } else if (pblk_submit_io(pblk, rqd, NULL)) { - /* Submitting IO to drive failed, let's report an error */ - rqd->error = -ENODEV; - pblk_end_io_read(rqd); - } -} - -static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, - struct pblk_line *line, u64 *lba_list, - u64 *paddr_list_gc, unsigned int nr_secs) -{ - struct ppa_addr ppa_list_l2p[NVM_MAX_VLBA]; - struct ppa_addr ppa_gc; - int valid_secs = 0; - int i; - - pblk_lookup_l2p_rand(pblk, ppa_list_l2p, lba_list, nr_secs); - - for (i = 0; i < nr_secs; i++) { - if (lba_list[i] == ADDR_EMPTY) - continue; - - ppa_gc = addr_to_gen_ppa(pblk, paddr_list_gc[i], line->id); - if (!pblk_ppa_comp(ppa_list_l2p[i], ppa_gc)) { - paddr_list_gc[i] = lba_list[i] = ADDR_EMPTY; - continue; - } - - rqd->ppa_list[valid_secs++] = ppa_list_l2p[i]; - } - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_add(valid_secs, &pblk->inflight_reads); -#endif - - return valid_secs; -} - -static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, - struct pblk_line *line, sector_t lba, - u64 paddr_gc) -{ - struct ppa_addr ppa_l2p, ppa_gc; - int valid_secs = 0; - - if (lba == ADDR_EMPTY) - goto out; - - /* logic error: lba out-of-bounds */ - if (lba >= pblk->capacity) { - WARN(1, "pblk: read lba out of bounds\n"); - goto out; - } - - spin_lock(&pblk->trans_lock); - ppa_l2p = pblk_trans_map_get(pblk, lba); - spin_unlock(&pblk->trans_lock); - - ppa_gc = addr_to_gen_ppa(pblk, paddr_gc, line->id); - if (!pblk_ppa_comp(ppa_l2p, ppa_gc)) - goto out; - - rqd->ppa_addr = ppa_l2p; - valid_secs = 1; - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_inc(&pblk->inflight_reads); -#endif - -out: - return valid_secs; -} - -int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) -{ - struct nvm_rq rqd; - int ret = NVM_IO_OK; - - memset(&rqd, 0, sizeof(struct nvm_rq)); - - ret = pblk_alloc_rqd_meta(pblk, &rqd); - if (ret) - return ret; - - if (gc_rq->nr_secs > 1) { - gc_rq->secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, gc_rq->line, - gc_rq->lba_list, - gc_rq->paddr_list, - gc_rq->nr_secs); - if (gc_rq->secs_to_gc == 1) - rqd.ppa_addr = rqd.ppa_list[0]; - } else { - gc_rq->secs_to_gc = read_rq_gc(pblk, &rqd, gc_rq->line, - gc_rq->lba_list[0], - gc_rq->paddr_list[0]); - } - - if (!(gc_rq->secs_to_gc)) - goto out; - - rqd.opcode = NVM_OP_PREAD; - rqd.nr_ppas = gc_rq->secs_to_gc; - - if (pblk_submit_io_sync(pblk, &rqd, gc_rq->data)) { - ret = -EIO; - goto err_free_dma; - } - - pblk_read_check_rand(pblk, &rqd, gc_rq->lba_list, gc_rq->nr_secs); - - atomic_dec(&pblk->inflight_io); - - if (rqd.error) { - atomic_long_inc(&pblk->read_failed_gc); -#ifdef CONFIG_NVM_PBLK_DEBUG - pblk_print_failed_rqd(pblk, &rqd, rqd.error); -#endif - } - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_add(gc_rq->secs_to_gc, &pblk->sync_reads); - atomic_long_add(gc_rq->secs_to_gc, &pblk->recov_gc_reads); - atomic_long_sub(gc_rq->secs_to_gc, &pblk->inflight_reads); -#endif - -out: - pblk_free_rqd_meta(pblk, &rqd); - return ret; - -err_free_dma: - pblk_free_rqd_meta(pblk, &rqd); - return ret; -} diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c deleted file mode 100644 index 0e6f0c76e930..000000000000 --- a/drivers/lightnvm/pblk-recovery.c +++ /dev/null @@ -1,874 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2016 CNEX Labs - * Initial: Javier Gonzalez <javier@cnexlabs.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * pblk-recovery.c - pblk's recovery path - * - * The L2P recovery path is single threaded as the L2P table is updated in order - * following the line sequence ID. - */ - -#include "pblk.h" -#include "pblk-trace.h" - -int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta_buf) -{ - u32 crc; - - crc = pblk_calc_emeta_crc(pblk, emeta_buf); - if (le32_to_cpu(emeta_buf->crc) != crc) - return 1; - - if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC) - return 1; - - return 0; -} - -static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_emeta *emeta = line->emeta; - struct line_emeta *emeta_buf = emeta->buf; - __le64 *lba_list; - u64 data_start, data_end; - u64 nr_valid_lbas, nr_lbas = 0; - u64 i; - - lba_list = emeta_to_lbas(pblk, emeta_buf); - if (!lba_list) - return 1; - - data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec; - data_end = line->emeta_ssec; - nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas); - - for (i = data_start; i < data_end; i++) { - struct ppa_addr ppa; - int pos; - - ppa = addr_to_gen_ppa(pblk, i, line->id); - pos = pblk_ppa_to_pos(geo, ppa); - - /* Do not update bad blocks */ - if (test_bit(pos, line->blk_bitmap)) - continue; - - if (le64_to_cpu(lba_list[i]) == ADDR_EMPTY) { - spin_lock(&line->lock); - if (test_and_set_bit(i, line->invalid_bitmap)) - WARN_ONCE(1, "pblk: rec. double invalidate:\n"); - else - le32_add_cpu(line->vsc, -1); - spin_unlock(&line->lock); - - continue; - } - - pblk_update_map(pblk, le64_to_cpu(lba_list[i]), ppa); - nr_lbas++; - } - - if (nr_valid_lbas != nr_lbas) - pblk_err(pblk, "line %d - inconsistent lba list(%llu/%llu)\n", - line->id, nr_valid_lbas, nr_lbas); - - line->left_msecs = 0; - - return 0; -} - -static void pblk_update_line_wp(struct pblk *pblk, struct pblk_line *line, - u64 written_secs) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - int i; - - for (i = 0; i < written_secs; i += pblk->min_write_pgs) - __pblk_alloc_page(pblk, line, pblk->min_write_pgs); - - spin_lock(&l_mg->free_lock); - if (written_secs > line->left_msecs) { - /* - * We have all data sectors written - * and some emeta sectors written too. - */ - line->left_msecs = 0; - } else { - /* We have only some data sectors written. */ - line->left_msecs -= written_secs; - } - spin_unlock(&l_mg->free_lock); -} - -static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_meta *lm = &pblk->lm; - int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line); - u64 written_secs = 0; - int valid_chunks = 0; - int i; - - for (i = 0; i < lm->blk_per_line; i++) { - struct nvm_chk_meta *chunk = &line->chks[i]; - - if (chunk->state & NVM_CHK_ST_OFFLINE) - continue; - - written_secs += chunk->wp; - valid_chunks++; - } - - if (lm->blk_per_line - nr_bb != valid_chunks) - pblk_err(pblk, "recovery line %d is bad\n", line->id); - - pblk_update_line_wp(pblk, line, written_secs - lm->smeta_sec); - - return written_secs; -} - -struct pblk_recov_alloc { - struct ppa_addr *ppa_list; - void *meta_list; - struct nvm_rq *rqd; - void *data; - dma_addr_t dma_ppa_list; - dma_addr_t dma_meta_list; -}; - -static void pblk_recov_complete(struct kref *ref) -{ - struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref); - - complete(&pad_rq->wait); -} - -static void pblk_end_io_recov(struct nvm_rq *rqd) -{ - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - struct pblk_pad_rq *pad_rq = rqd->private; - struct pblk *pblk = pad_rq->pblk; - - pblk_up_chunk(pblk, ppa_list[0]); - - pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); - - atomic_dec(&pblk->inflight_io); - kref_put(&pad_rq->ref, pblk_recov_complete); -} - -/* pad line using line bitmap. */ -static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line, - int left_ppas) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - void *meta_list; - struct pblk_pad_rq *pad_rq; - struct nvm_rq *rqd; - struct ppa_addr *ppa_list; - void *data; - __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf); - u64 w_ptr = line->cur_sec; - int left_line_ppas, rq_ppas; - int i, j; - int ret = 0; - - spin_lock(&line->lock); - left_line_ppas = line->left_msecs; - spin_unlock(&line->lock); - - pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL); - if (!pad_rq) - return -ENOMEM; - - data = vzalloc(array_size(pblk->max_write_pgs, geo->csecs)); - if (!data) { - ret = -ENOMEM; - goto free_rq; - } - - pad_rq->pblk = pblk; - init_completion(&pad_rq->wait); - kref_init(&pad_rq->ref); - -next_pad_rq: - rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false); - if (rq_ppas < pblk->min_write_pgs) { - pblk_err(pblk, "corrupted pad line %d\n", line->id); - goto fail_complete; - } - - rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT); - - ret = pblk_alloc_rqd_meta(pblk, rqd); - if (ret) { - pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); - goto fail_complete; - } - - rqd->bio = NULL; - rqd->opcode = NVM_OP_PWRITE; - rqd->is_seq = 1; - rqd->nr_ppas = rq_ppas; - rqd->end_io = pblk_end_io_recov; - rqd->private = pad_rq; - - ppa_list = nvm_rq_to_ppa_list(rqd); - meta_list = rqd->meta_list; - - for (i = 0; i < rqd->nr_ppas; ) { - struct ppa_addr ppa; - int pos; - - w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs); - ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); - pos = pblk_ppa_to_pos(geo, ppa); - - while (test_bit(pos, line->blk_bitmap)) { - w_ptr += pblk->min_write_pgs; - ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); - pos = pblk_ppa_to_pos(geo, ppa); - } - - for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) { - struct ppa_addr dev_ppa; - struct pblk_sec_meta *meta; - __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); - - dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); - - pblk_map_invalidate(pblk, dev_ppa); - lba_list[w_ptr] = addr_empty; - meta = pblk_get_meta(pblk, meta_list, i); - meta->lba = addr_empty; - ppa_list[i] = dev_ppa; - } - } - - kref_get(&pad_rq->ref); - pblk_down_chunk(pblk, ppa_list[0]); - - ret = pblk_submit_io(pblk, rqd, data); - if (ret) { - pblk_err(pblk, "I/O submission failed: %d\n", ret); - pblk_up_chunk(pblk, ppa_list[0]); - kref_put(&pad_rq->ref, pblk_recov_complete); - pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); - goto fail_complete; - } - - left_line_ppas -= rq_ppas; - left_ppas -= rq_ppas; - if (left_ppas && left_line_ppas) - goto next_pad_rq; - -fail_complete: - kref_put(&pad_rq->ref, pblk_recov_complete); - wait_for_completion(&pad_rq->wait); - - if (!pblk_line_is_full(line)) - pblk_err(pblk, "corrupted padded line: %d\n", line->id); - - vfree(data); -free_rq: - kfree(pad_rq); - return ret; -} - -static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - int distance = geo->mw_cunits * geo->all_luns * geo->ws_opt; - - return (distance > line->left_msecs) ? line->left_msecs : distance; -} - -/* Return a chunk belonging to a line by stripe(write order) index */ -static struct nvm_chk_meta *pblk_get_stripe_chunk(struct pblk *pblk, - struct pblk_line *line, - int index) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_lun *rlun; - struct ppa_addr ppa; - int pos; - - rlun = &pblk->luns[index]; - ppa = rlun->bppa; - pos = pblk_ppa_to_pos(geo, ppa); - - return &line->chks[pos]; -} - -static int pblk_line_wps_are_unbalanced(struct pblk *pblk, - struct pblk_line *line) -{ - struct pblk_line_meta *lm = &pblk->lm; - int blk_in_line = lm->blk_per_line; - struct nvm_chk_meta *chunk; - u64 max_wp, min_wp; - int i; - - i = find_first_zero_bit(line->blk_bitmap, blk_in_line); - - /* If there is one or zero good chunks in the line, - * the write pointers can't be unbalanced. - */ - if (i >= (blk_in_line - 1)) - return 0; - - chunk = pblk_get_stripe_chunk(pblk, line, i); - max_wp = chunk->wp; - if (max_wp > pblk->max_write_pgs) - min_wp = max_wp - pblk->max_write_pgs; - else - min_wp = 0; - - i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1); - while (i < blk_in_line) { - chunk = pblk_get_stripe_chunk(pblk, line, i); - if (chunk->wp > max_wp || chunk->wp < min_wp) - return 1; - - i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1); - } - - return 0; -} - -static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, - struct pblk_recov_alloc p) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct pblk_line_meta *lm = &pblk->lm; - struct nvm_geo *geo = &dev->geo; - struct ppa_addr *ppa_list; - void *meta_list; - struct nvm_rq *rqd; - void *data; - dma_addr_t dma_ppa_list, dma_meta_list; - __le64 *lba_list; - u64 paddr = pblk_line_smeta_start(pblk, line) + lm->smeta_sec; - bool padded = false; - int rq_ppas; - int i, j; - int ret; - u64 left_ppas = pblk_sec_in_open_line(pblk, line) - lm->smeta_sec; - - if (pblk_line_wps_are_unbalanced(pblk, line)) - pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id); - - ppa_list = p.ppa_list; - meta_list = p.meta_list; - rqd = p.rqd; - data = p.data; - dma_ppa_list = p.dma_ppa_list; - dma_meta_list = p.dma_meta_list; - - lba_list = emeta_to_lbas(pblk, line->emeta->buf); - -next_rq: - memset(rqd, 0, pblk_g_rq_size); - - rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false); - if (!rq_ppas) - rq_ppas = pblk->min_write_pgs; - -retry_rq: - rqd->bio = NULL; - rqd->opcode = NVM_OP_PREAD; - rqd->meta_list = meta_list; - rqd->nr_ppas = rq_ppas; - rqd->ppa_list = ppa_list; - rqd->dma_ppa_list = dma_ppa_list; - rqd->dma_meta_list = dma_meta_list; - ppa_list = nvm_rq_to_ppa_list(rqd); - - if (pblk_io_aligned(pblk, rq_ppas)) - rqd->is_seq = 1; - - for (i = 0; i < rqd->nr_ppas; ) { - struct ppa_addr ppa; - int pos; - - ppa = addr_to_gen_ppa(pblk, paddr, line->id); - pos = pblk_ppa_to_pos(geo, ppa); - - while (test_bit(pos, line->blk_bitmap)) { - paddr += pblk->min_write_pgs; - ppa = addr_to_gen_ppa(pblk, paddr, line->id); - pos = pblk_ppa_to_pos(geo, ppa); - } - - for (j = 0; j < pblk->min_write_pgs; j++, i++) - ppa_list[i] = - addr_to_gen_ppa(pblk, paddr + j, line->id); - } - - ret = pblk_submit_io_sync(pblk, rqd, data); - if (ret) { - pblk_err(pblk, "I/O submission failed: %d\n", ret); - return ret; - } - - atomic_dec(&pblk->inflight_io); - - /* If a read fails, do a best effort by padding the line and retrying */ - if (rqd->error && rqd->error != NVM_RSP_WARN_HIGHECC) { - int pad_distance, ret; - - if (padded) { - pblk_log_read_err(pblk, rqd); - return -EINTR; - } - - pad_distance = pblk_pad_distance(pblk, line); - ret = pblk_recov_pad_line(pblk, line, pad_distance); - if (ret) { - return ret; - } - - padded = true; - goto retry_rq; - } - - pblk_get_packed_meta(pblk, rqd); - - for (i = 0; i < rqd->nr_ppas; i++) { - struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); - u64 lba = le64_to_cpu(meta->lba); - - lba_list[paddr++] = cpu_to_le64(lba); - - if (lba == ADDR_EMPTY || lba >= pblk->capacity) - continue; - - line->nr_valid_lbas++; - pblk_update_map(pblk, lba, ppa_list[i]); - } - - left_ppas -= rq_ppas; - if (left_ppas > 0) - goto next_rq; - -#ifdef CONFIG_NVM_PBLK_DEBUG - WARN_ON(padded && !pblk_line_is_full(line)); -#endif - - return 0; -} - -/* Scan line for lbas on out of bound area */ -static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct nvm_rq *rqd; - struct ppa_addr *ppa_list; - void *meta_list; - struct pblk_recov_alloc p; - void *data; - dma_addr_t dma_ppa_list, dma_meta_list; - int ret = 0; - - meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); - if (!meta_list) - return -ENOMEM; - - ppa_list = (void *)(meta_list) + pblk_dma_meta_size(pblk); - dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk); - - data = kcalloc(pblk->max_write_pgs, geo->csecs, GFP_KERNEL); - if (!data) { - ret = -ENOMEM; - goto free_meta_list; - } - - rqd = mempool_alloc(&pblk->r_rq_pool, GFP_KERNEL); - memset(rqd, 0, pblk_g_rq_size); - - p.ppa_list = ppa_list; - p.meta_list = meta_list; - p.rqd = rqd; - p.data = data; - p.dma_ppa_list = dma_ppa_list; - p.dma_meta_list = dma_meta_list; - - ret = pblk_recov_scan_oob(pblk, line, p); - if (ret) { - pblk_err(pblk, "could not recover L2P form OOB\n"); - goto out; - } - - if (pblk_line_is_full(line)) - pblk_line_recov_close(pblk, line); - -out: - mempool_free(rqd, &pblk->r_rq_pool); - kfree(data); -free_meta_list: - nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); - - return ret; -} - -/* Insert lines ordered by sequence number (seq_num) on list */ -static void pblk_recov_line_add_ordered(struct list_head *head, - struct pblk_line *line) -{ - struct pblk_line *t = NULL; - - list_for_each_entry(t, head, list) - if (t->seq_nr > line->seq_nr) - break; - - __list_add(&line->list, t->list.prev, &t->list); -} - -static u64 pblk_line_emeta_start(struct pblk *pblk, struct pblk_line *line) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - unsigned int emeta_secs; - u64 emeta_start; - struct ppa_addr ppa; - int pos; - - emeta_secs = lm->emeta_sec[0]; - emeta_start = lm->sec_per_line; - - while (emeta_secs) { - emeta_start--; - ppa = addr_to_gen_ppa(pblk, emeta_start, line->id); - pos = pblk_ppa_to_pos(geo, ppa); - if (!test_bit(pos, line->blk_bitmap)) - emeta_secs--; - } - - return emeta_start; -} - -static int pblk_recov_check_line_version(struct pblk *pblk, - struct line_emeta *emeta) -{ - struct line_header *header = &emeta->header; - - if (header->version_major != EMETA_VERSION_MAJOR) { - pblk_err(pblk, "line major version mismatch: %d, expected: %d\n", - header->version_major, EMETA_VERSION_MAJOR); - return 1; - } - -#ifdef CONFIG_NVM_PBLK_DEBUG - if (header->version_minor > EMETA_VERSION_MINOR) - pblk_info(pblk, "newer line minor version found: %d\n", - header->version_minor); -#endif - - return 0; -} - -static void pblk_recov_wa_counters(struct pblk *pblk, - struct line_emeta *emeta) -{ - struct pblk_line_meta *lm = &pblk->lm; - struct line_header *header = &emeta->header; - struct wa_counters *wa = emeta_to_wa(lm, emeta); - - /* WA counters were introduced in emeta version 0.2 */ - if (header->version_major > 0 || header->version_minor >= 2) { - u64 user = le64_to_cpu(wa->user); - u64 pad = le64_to_cpu(wa->pad); - u64 gc = le64_to_cpu(wa->gc); - - atomic64_set(&pblk->user_wa, user); - atomic64_set(&pblk->pad_wa, pad); - atomic64_set(&pblk->gc_wa, gc); - - pblk->user_rst_wa = user; - pblk->pad_rst_wa = pad; - pblk->gc_rst_wa = gc; - } -} - -static int pblk_line_was_written(struct pblk_line *line, - struct pblk *pblk) -{ - - struct pblk_line_meta *lm = &pblk->lm; - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct nvm_chk_meta *chunk; - struct ppa_addr bppa; - int smeta_blk; - - if (line->state == PBLK_LINESTATE_BAD) - return 0; - - smeta_blk = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); - if (smeta_blk >= lm->blk_per_line) - return 0; - - bppa = pblk->luns[smeta_blk].bppa; - chunk = &line->chks[pblk_ppa_to_pos(geo, bppa)]; - - if (chunk->state & NVM_CHK_ST_CLOSED || - (chunk->state & NVM_CHK_ST_OPEN - && chunk->wp >= lm->smeta_sec)) - return 1; - - return 0; -} - -static bool pblk_line_is_open(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_meta *lm = &pblk->lm; - int i; - - for (i = 0; i < lm->blk_per_line; i++) - if (line->chks[i].state & NVM_CHK_ST_OPEN) - return true; - - return false; -} - -struct pblk_line *pblk_recov_l2p(struct pblk *pblk) -{ - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line *line, *tline, *data_line = NULL; - struct pblk_smeta *smeta; - struct pblk_emeta *emeta; - struct line_smeta *smeta_buf; - int found_lines = 0, recovered_lines = 0, open_lines = 0; - int is_next = 0; - int meta_line; - int i, valid_uuid = 0; - LIST_HEAD(recov_list); - - /* TODO: Implement FTL snapshot */ - - /* Scan recovery - takes place when FTL snapshot fails */ - spin_lock(&l_mg->free_lock); - meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES); - set_bit(meta_line, &l_mg->meta_bitmap); - smeta = l_mg->sline_meta[meta_line]; - emeta = l_mg->eline_meta[meta_line]; - smeta_buf = (struct line_smeta *)smeta; - spin_unlock(&l_mg->free_lock); - - /* Order data lines using their sequence number */ - for (i = 0; i < l_mg->nr_lines; i++) { - u32 crc; - - line = &pblk->lines[i]; - - memset(smeta, 0, lm->smeta_len); - line->smeta = smeta; - line->lun_bitmap = ((void *)(smeta_buf)) + - sizeof(struct line_smeta); - - if (!pblk_line_was_written(line, pblk)) - continue; - - /* Lines that cannot be read are assumed as not written here */ - if (pblk_line_smeta_read(pblk, line)) - continue; - - crc = pblk_calc_smeta_crc(pblk, smeta_buf); - if (le32_to_cpu(smeta_buf->crc) != crc) - continue; - - if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC) - continue; - - if (smeta_buf->header.version_major != SMETA_VERSION_MAJOR) { - pblk_err(pblk, "found incompatible line version %u\n", - smeta_buf->header.version_major); - return ERR_PTR(-EINVAL); - } - - /* The first valid instance uuid is used for initialization */ - if (!valid_uuid) { - import_guid(&pblk->instance_uuid, smeta_buf->header.uuid); - valid_uuid = 1; - } - - if (!guid_equal(&pblk->instance_uuid, - (guid_t *)&smeta_buf->header.uuid)) { - pblk_debug(pblk, "ignore line %u due to uuid mismatch\n", - i); - continue; - } - - /* Update line metadata */ - spin_lock(&line->lock); - line->id = le32_to_cpu(smeta_buf->header.id); - line->type = le16_to_cpu(smeta_buf->header.type); - line->seq_nr = le64_to_cpu(smeta_buf->seq_nr); - spin_unlock(&line->lock); - - /* Update general metadata */ - spin_lock(&l_mg->free_lock); - if (line->seq_nr >= l_mg->d_seq_nr) - l_mg->d_seq_nr = line->seq_nr + 1; - l_mg->nr_free_lines--; - spin_unlock(&l_mg->free_lock); - - if (pblk_line_recov_alloc(pblk, line)) - goto out; - - pblk_recov_line_add_ordered(&recov_list, line); - found_lines++; - pblk_debug(pblk, "recovering data line %d, seq:%llu\n", - line->id, smeta_buf->seq_nr); - } - - if (!found_lines) { - guid_gen(&pblk->instance_uuid); - - spin_lock(&l_mg->free_lock); - WARN_ON_ONCE(!test_and_clear_bit(meta_line, - &l_mg->meta_bitmap)); - spin_unlock(&l_mg->free_lock); - - goto out; - } - - /* Verify closed blocks and recover this portion of L2P table*/ - list_for_each_entry_safe(line, tline, &recov_list, list) { - recovered_lines++; - - line->emeta_ssec = pblk_line_emeta_start(pblk, line); - line->emeta = emeta; - memset(line->emeta->buf, 0, lm->emeta_len[0]); - - if (pblk_line_is_open(pblk, line)) { - pblk_recov_l2p_from_oob(pblk, line); - goto next; - } - - if (pblk_line_emeta_read(pblk, line, line->emeta->buf)) { - pblk_recov_l2p_from_oob(pblk, line); - goto next; - } - - if (pblk_recov_check_emeta(pblk, line->emeta->buf)) { - pblk_recov_l2p_from_oob(pblk, line); - goto next; - } - - if (pblk_recov_check_line_version(pblk, line->emeta->buf)) - return ERR_PTR(-EINVAL); - - pblk_recov_wa_counters(pblk, line->emeta->buf); - - if (pblk_recov_l2p_from_emeta(pblk, line)) - pblk_recov_l2p_from_oob(pblk, line); - -next: - if (pblk_line_is_full(line)) { - struct list_head *move_list; - - spin_lock(&line->lock); - line->state = PBLK_LINESTATE_CLOSED; - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - move_list = pblk_line_gc_list(pblk, line); - spin_unlock(&line->lock); - - spin_lock(&l_mg->gc_lock); - list_move_tail(&line->list, move_list); - spin_unlock(&l_mg->gc_lock); - - mempool_free(line->map_bitmap, l_mg->bitmap_pool); - line->map_bitmap = NULL; - line->smeta = NULL; - line->emeta = NULL; - } else { - spin_lock(&line->lock); - line->state = PBLK_LINESTATE_OPEN; - spin_unlock(&line->lock); - - line->emeta->mem = 0; - atomic_set(&line->emeta->sync, 0); - - trace_pblk_line_state(pblk_disk_name(pblk), line->id, - line->state); - - data_line = line; - line->meta_line = meta_line; - - open_lines++; - } - } - - if (!open_lines) { - spin_lock(&l_mg->free_lock); - WARN_ON_ONCE(!test_and_clear_bit(meta_line, - &l_mg->meta_bitmap)); - spin_unlock(&l_mg->free_lock); - } else { - spin_lock(&l_mg->free_lock); - l_mg->data_line = data_line; - /* Allocate next line for preparation */ - l_mg->data_next = pblk_line_get(pblk); - if (l_mg->data_next) { - l_mg->data_next->seq_nr = l_mg->d_seq_nr++; - l_mg->data_next->type = PBLK_LINETYPE_DATA; - is_next = 1; - } - spin_unlock(&l_mg->free_lock); - } - - if (is_next) - pblk_line_erase(pblk, l_mg->data_next); - -out: - if (found_lines != recovered_lines) - pblk_err(pblk, "failed to recover all found lines %d/%d\n", - found_lines, recovered_lines); - - return data_line; -} - -/* - * Pad current line - */ -int pblk_recov_pad(struct pblk *pblk) -{ - struct pblk_line *line; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - int left_msecs; - int ret = 0; - - spin_lock(&l_mg->free_lock); - line = l_mg->data_line; - left_msecs = line->left_msecs; - spin_unlock(&l_mg->free_lock); - - ret = pblk_recov_pad_line(pblk, line, left_msecs); - if (ret) { - pblk_err(pblk, "tear down padding failed (%d)\n", ret); - return ret; - } - - pblk_line_close_meta(pblk, line); - return ret; -} diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c deleted file mode 100644 index a5f8bc2defbc..000000000000 --- a/drivers/lightnvm/pblk-rl.c +++ /dev/null @@ -1,254 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2016 CNEX Labs - * Initial release: Javier Gonzalez <javier@cnexlabs.com> - * Matias Bjorling <matias@cnexlabs.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * pblk-rl.c - pblk's rate limiter for user I/O - * - */ - -#include "pblk.h" - -static void pblk_rl_kick_u_timer(struct pblk_rl *rl) -{ - mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000)); -} - -int pblk_rl_is_limit(struct pblk_rl *rl) -{ - int rb_space; - - rb_space = atomic_read(&rl->rb_space); - - return (rb_space == 0); -} - -int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries) -{ - int rb_user_cnt = atomic_read(&rl->rb_user_cnt); - int rb_space = atomic_read(&rl->rb_space); - - if (unlikely(rb_space >= 0) && (rb_space - nr_entries < 0)) - return NVM_IO_ERR; - - if (rb_user_cnt >= rl->rb_user_max) - return NVM_IO_REQUEUE; - - return NVM_IO_OK; -} - -void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries) -{ - int rb_space = atomic_read(&rl->rb_space); - - if (unlikely(rb_space >= 0)) - atomic_sub(nr_entries, &rl->rb_space); -} - -int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries) -{ - int rb_gc_cnt = atomic_read(&rl->rb_gc_cnt); - int rb_user_active; - - /* If there is no user I/O let GC take over space on the write buffer */ - rb_user_active = READ_ONCE(rl->rb_user_active); - return (!(rb_gc_cnt >= rl->rb_gc_max && rb_user_active)); -} - -void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries) -{ - atomic_add(nr_entries, &rl->rb_user_cnt); - - /* Release user I/O state. Protect from GC */ - smp_store_release(&rl->rb_user_active, 1); - pblk_rl_kick_u_timer(rl); -} - -void pblk_rl_werr_line_in(struct pblk_rl *rl) -{ - atomic_inc(&rl->werr_lines); -} - -void pblk_rl_werr_line_out(struct pblk_rl *rl) -{ - atomic_dec(&rl->werr_lines); -} - -void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries) -{ - atomic_add(nr_entries, &rl->rb_gc_cnt); -} - -void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc) -{ - atomic_sub(nr_user, &rl->rb_user_cnt); - atomic_sub(nr_gc, &rl->rb_gc_cnt); -} - -unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl) -{ - return atomic_read(&rl->free_blocks); -} - -unsigned long pblk_rl_nr_user_free_blks(struct pblk_rl *rl) -{ - return atomic_read(&rl->free_user_blocks); -} - -static void __pblk_rl_update_rates(struct pblk_rl *rl, - unsigned long free_blocks) -{ - struct pblk *pblk = container_of(rl, struct pblk, rl); - int max = rl->rb_budget; - int werr_gc_needed = atomic_read(&rl->werr_lines); - - if (free_blocks >= rl->high) { - if (werr_gc_needed) { - /* Allocate a small budget for recovering - * lines with write errors - */ - rl->rb_gc_max = 1 << rl->rb_windows_pw; - rl->rb_user_max = max - rl->rb_gc_max; - rl->rb_state = PBLK_RL_WERR; - } else { - rl->rb_user_max = max; - rl->rb_gc_max = 0; - rl->rb_state = PBLK_RL_OFF; - } - } else if (free_blocks < rl->high) { - int shift = rl->high_pw - rl->rb_windows_pw; - int user_windows = free_blocks >> shift; - int user_max = user_windows << ilog2(NVM_MAX_VLBA); - - rl->rb_user_max = user_max; - rl->rb_gc_max = max - user_max; - - if (free_blocks <= rl->rsv_blocks) { - rl->rb_user_max = 0; - rl->rb_gc_max = max; - } - - /* In the worst case, we will need to GC lines in the low list - * (high valid sector count). If there are lines to GC on high - * or mid lists, these will be prioritized - */ - rl->rb_state = PBLK_RL_LOW; - } - - if (rl->rb_state != PBLK_RL_OFF) - pblk_gc_should_start(pblk); - else - pblk_gc_should_stop(pblk); -} - -void pblk_rl_update_rates(struct pblk_rl *rl) -{ - __pblk_rl_update_rates(rl, pblk_rl_nr_user_free_blks(rl)); -} - -void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line) -{ - int blk_in_line = atomic_read(&line->blk_in_line); - int free_blocks; - - atomic_add(blk_in_line, &rl->free_blocks); - free_blocks = atomic_add_return(blk_in_line, &rl->free_user_blocks); - - __pblk_rl_update_rates(rl, free_blocks); -} - -void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line, - bool used) -{ - int blk_in_line = atomic_read(&line->blk_in_line); - int free_blocks; - - atomic_sub(blk_in_line, &rl->free_blocks); - - if (used) - free_blocks = atomic_sub_return(blk_in_line, - &rl->free_user_blocks); - else - free_blocks = atomic_read(&rl->free_user_blocks); - - __pblk_rl_update_rates(rl, free_blocks); -} - -int pblk_rl_high_thrs(struct pblk_rl *rl) -{ - return rl->high; -} - -int pblk_rl_max_io(struct pblk_rl *rl) -{ - return rl->rb_max_io; -} - -static void pblk_rl_u_timer(struct timer_list *t) -{ - struct pblk_rl *rl = from_timer(rl, t, u_timer); - - /* Release user I/O state. Protect from GC */ - smp_store_release(&rl->rb_user_active, 0); -} - -void pblk_rl_free(struct pblk_rl *rl) -{ - del_timer(&rl->u_timer); -} - -void pblk_rl_init(struct pblk_rl *rl, int budget, int threshold) -{ - struct pblk *pblk = container_of(rl, struct pblk, rl); - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line_meta *lm = &pblk->lm; - int sec_meta, blk_meta; - unsigned int rb_windows; - - /* Consider sectors used for metadata */ - sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines; - blk_meta = DIV_ROUND_UP(sec_meta, geo->clba); - - rl->high = pblk->op_blks - blk_meta - lm->blk_per_line; - rl->high_pw = get_count_order(rl->high); - - rl->rsv_blocks = pblk_get_min_chks(pblk); - - /* This will always be a power-of-2 */ - rb_windows = budget / NVM_MAX_VLBA; - rl->rb_windows_pw = get_count_order(rb_windows); - - /* To start with, all buffer is available to user I/O writers */ - rl->rb_budget = budget; - rl->rb_user_max = budget; - rl->rb_gc_max = 0; - rl->rb_state = PBLK_RL_HIGH; - - /* Maximize I/O size and ansure that back threshold is respected */ - if (threshold) - rl->rb_max_io = budget - pblk->min_write_pgs_data - threshold; - else - rl->rb_max_io = budget - pblk->min_write_pgs_data - 1; - - atomic_set(&rl->rb_user_cnt, 0); - atomic_set(&rl->rb_gc_cnt, 0); - atomic_set(&rl->rb_space, -1); - atomic_set(&rl->werr_lines, 0); - - timer_setup(&rl->u_timer, pblk_rl_u_timer, 0); - - rl->rb_user_active = 0; - rl->rb_gc_active = 0; -} diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c deleted file mode 100644 index 6387302b03f2..000000000000 --- a/drivers/lightnvm/pblk-sysfs.c +++ /dev/null @@ -1,728 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2016 CNEX Labs - * Initial release: Javier Gonzalez <javier@cnexlabs.com> - * Matias Bjorling <matias@cnexlabs.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Implementation of a physical block-device target for Open-channel SSDs. - * - * pblk-sysfs.c - pblk's sysfs - * - */ - -#include "pblk.h" - -static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_lun *rlun; - ssize_t sz = 0; - int i; - - for (i = 0; i < geo->all_luns; i++) { - int active = 1; - - rlun = &pblk->luns[i]; - if (!down_trylock(&rlun->wr_sem)) { - active = 0; - up(&rlun->wr_sem); - } - sz += scnprintf(page + sz, PAGE_SIZE - sz, - "pblk: pos:%d, ch:%d, lun:%d - %d\n", - i, - rlun->bppa.a.ch, - rlun->bppa.a.lun, - active); - } - - return sz; -} - -static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page) -{ - int free_blocks, free_user_blocks, total_blocks; - int rb_user_max, rb_user_cnt; - int rb_gc_max, rb_gc_cnt, rb_budget, rb_state; - - free_blocks = pblk_rl_nr_free_blks(&pblk->rl); - free_user_blocks = pblk_rl_nr_user_free_blks(&pblk->rl); - rb_user_max = pblk->rl.rb_user_max; - rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt); - rb_gc_max = pblk->rl.rb_gc_max; - rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt); - rb_budget = pblk->rl.rb_budget; - rb_state = pblk->rl.rb_state; - - total_blocks = pblk->rl.total_blocks; - - return snprintf(page, PAGE_SIZE, - "u:%u/%u,gc:%u/%u(%u)(stop:<%u,full:>%u,free:%d/%d/%d)-%d\n", - rb_user_cnt, - rb_user_max, - rb_gc_cnt, - rb_gc_max, - rb_state, - rb_budget, - pblk->rl.high, - free_blocks, - free_user_blocks, - total_blocks, - READ_ONCE(pblk->rl.rb_user_active)); -} - -static ssize_t pblk_sysfs_gc_state_show(struct pblk *pblk, char *page) -{ - int gc_enabled, gc_active; - - pblk_gc_sysfs_state_show(pblk, &gc_enabled, &gc_active); - return snprintf(page, PAGE_SIZE, "gc_enabled=%d, gc_active=%d\n", - gc_enabled, gc_active); -} - -static ssize_t pblk_sysfs_stats(struct pblk *pblk, char *page) -{ - ssize_t sz; - - sz = snprintf(page, PAGE_SIZE, - "read_failed=%lu, read_high_ecc=%lu, read_empty=%lu, read_failed_gc=%lu, write_failed=%lu, erase_failed=%lu\n", - atomic_long_read(&pblk->read_failed), - atomic_long_read(&pblk->read_high_ecc), - atomic_long_read(&pblk->read_empty), - atomic_long_read(&pblk->read_failed_gc), - atomic_long_read(&pblk->write_failed), - atomic_long_read(&pblk->erase_failed)); - - return sz; -} - -static ssize_t pblk_sysfs_write_buffer(struct pblk *pblk, char *page) -{ - return pblk_rb_sysfs(&pblk->rwb, page); -} - -static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - ssize_t sz = 0; - - if (geo->version == NVM_OCSSD_SPEC_12) { - struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf; - struct nvm_addrf_12 *gppaf = (struct nvm_addrf_12 *)&geo->addrf; - - sz = scnprintf(page, PAGE_SIZE, - "g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n", - pblk->addrf_len, - ppaf->blk_offset, ppaf->blk_len, - ppaf->pg_offset, ppaf->pg_len, - ppaf->lun_offset, ppaf->lun_len, - ppaf->ch_offset, ppaf->ch_len, - ppaf->pln_offset, ppaf->pln_len, - ppaf->sec_offset, ppaf->sec_len); - - sz += scnprintf(page + sz, PAGE_SIZE - sz, - "d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n", - gppaf->blk_offset, gppaf->blk_len, - gppaf->pg_offset, gppaf->pg_len, - gppaf->lun_offset, gppaf->lun_len, - gppaf->ch_offset, gppaf->ch_len, - gppaf->pln_offset, gppaf->pln_len, - gppaf->sec_offset, gppaf->sec_len); - } else { - struct nvm_addrf *ppaf = &pblk->addrf; - struct nvm_addrf *gppaf = &geo->addrf; - - sz = scnprintf(page, PAGE_SIZE, - "pblk:(s:%d)ch:%d/%d,lun:%d/%d,chk:%d/%d/sec:%d/%d\n", - pblk->addrf_len, - ppaf->ch_offset, ppaf->ch_len, - ppaf->lun_offset, ppaf->lun_len, - ppaf->chk_offset, ppaf->chk_len, - ppaf->sec_offset, ppaf->sec_len); - - sz += scnprintf(page + sz, PAGE_SIZE - sz, - "device:ch:%d/%d,lun:%d/%d,chk:%d/%d,sec:%d/%d\n", - gppaf->ch_offset, gppaf->ch_len, - gppaf->lun_offset, gppaf->lun_len, - gppaf->chk_offset, gppaf->chk_len, - gppaf->sec_offset, gppaf->sec_len); - } - - return sz; -} - -static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line *line; - ssize_t sz = 0; - int nr_free_lines; - int cur_data, cur_log; - int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0; - int d_line_cnt = 0, l_line_cnt = 0; - int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0; - int gc_werr = 0; - - int bad = 0, cor = 0; - int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0; - int map_weight = 0, meta_weight = 0; - - spin_lock(&l_mg->free_lock); - cur_data = (l_mg->data_line) ? l_mg->data_line->id : -1; - cur_log = (l_mg->log_line) ? l_mg->log_line->id : -1; - nr_free_lines = l_mg->nr_free_lines; - - list_for_each_entry(line, &l_mg->free_list, list) - free_line_cnt++; - spin_unlock(&l_mg->free_lock); - - spin_lock(&l_mg->close_lock); - list_for_each_entry(line, &l_mg->emeta_list, list) - emeta_line_cnt++; - spin_unlock(&l_mg->close_lock); - - spin_lock(&l_mg->gc_lock); - list_for_each_entry(line, &l_mg->gc_full_list, list) { - if (line->type == PBLK_LINETYPE_DATA) - d_line_cnt++; - else if (line->type == PBLK_LINETYPE_LOG) - l_line_cnt++; - closed_line_cnt++; - gc_full++; - } - - list_for_each_entry(line, &l_mg->gc_high_list, list) { - if (line->type == PBLK_LINETYPE_DATA) - d_line_cnt++; - else if (line->type == PBLK_LINETYPE_LOG) - l_line_cnt++; - closed_line_cnt++; - gc_high++; - } - - list_for_each_entry(line, &l_mg->gc_mid_list, list) { - if (line->type == PBLK_LINETYPE_DATA) - d_line_cnt++; - else if (line->type == PBLK_LINETYPE_LOG) - l_line_cnt++; - closed_line_cnt++; - gc_mid++; - } - - list_for_each_entry(line, &l_mg->gc_low_list, list) { - if (line->type == PBLK_LINETYPE_DATA) - d_line_cnt++; - else if (line->type == PBLK_LINETYPE_LOG) - l_line_cnt++; - closed_line_cnt++; - gc_low++; - } - - list_for_each_entry(line, &l_mg->gc_empty_list, list) { - if (line->type == PBLK_LINETYPE_DATA) - d_line_cnt++; - else if (line->type == PBLK_LINETYPE_LOG) - l_line_cnt++; - closed_line_cnt++; - gc_empty++; - } - - list_for_each_entry(line, &l_mg->gc_werr_list, list) { - if (line->type == PBLK_LINETYPE_DATA) - d_line_cnt++; - else if (line->type == PBLK_LINETYPE_LOG) - l_line_cnt++; - closed_line_cnt++; - gc_werr++; - } - - list_for_each_entry(line, &l_mg->bad_list, list) - bad++; - list_for_each_entry(line, &l_mg->corrupt_list, list) - cor++; - spin_unlock(&l_mg->gc_lock); - - spin_lock(&l_mg->free_lock); - if (l_mg->data_line) { - cur_sec = l_mg->data_line->cur_sec; - msecs = l_mg->data_line->left_msecs; - vsc = le32_to_cpu(*l_mg->data_line->vsc); - sec_in_line = l_mg->data_line->sec_in_line; - meta_weight = bitmap_weight(&l_mg->meta_bitmap, - PBLK_DATA_LINES); - - spin_lock(&l_mg->data_line->lock); - if (l_mg->data_line->map_bitmap) - map_weight = bitmap_weight(l_mg->data_line->map_bitmap, - lm->sec_per_line); - else - map_weight = 0; - spin_unlock(&l_mg->data_line->lock); - } - spin_unlock(&l_mg->free_lock); - - if (nr_free_lines != free_line_cnt) - pblk_err(pblk, "corrupted free line list:%d/%d\n", - nr_free_lines, free_line_cnt); - - sz = scnprintf(page, PAGE_SIZE - sz, - "line: nluns:%d, nblks:%d, nsecs:%d\n", - geo->all_luns, lm->blk_per_line, lm->sec_per_line); - - sz += scnprintf(page + sz, PAGE_SIZE - sz, - "lines:d:%d,l:%d-f:%d,m:%d/%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n", - cur_data, cur_log, - nr_free_lines, - emeta_line_cnt, meta_weight, - closed_line_cnt, - bad, cor, - d_line_cnt, l_line_cnt, - l_mg->nr_lines); - - sz += scnprintf(page + sz, PAGE_SIZE - sz, - "GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, werr: %d, queue:%d\n", - gc_full, gc_high, gc_mid, gc_low, gc_empty, gc_werr, - atomic_read(&pblk->gc.read_inflight_gc)); - - sz += scnprintf(page + sz, PAGE_SIZE - sz, - "data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n", - cur_data, cur_sec, msecs, vsc, sec_in_line, - map_weight, lm->sec_per_line, - atomic_read(&pblk->inflight_io)); - - return sz; -} - -static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_meta *lm = &pblk->lm; - ssize_t sz = 0; - - sz = scnprintf(page, PAGE_SIZE - sz, - "smeta - len:%d, secs:%d\n", - lm->smeta_len, lm->smeta_sec); - sz += scnprintf(page + sz, PAGE_SIZE - sz, - "emeta - len:%d, sec:%d, bb_start:%d\n", - lm->emeta_len[0], lm->emeta_sec[0], - lm->emeta_bb); - sz += scnprintf(page + sz, PAGE_SIZE - sz, - "bitmap lengths: sec:%d, blk:%d, lun:%d\n", - lm->sec_bitmap_len, - lm->blk_bitmap_len, - lm->lun_bitmap_len); - sz += scnprintf(page + sz, PAGE_SIZE - sz, - "blk_line:%d, sec_line:%d, sec_blk:%d\n", - lm->blk_per_line, - lm->sec_per_line, - geo->clba); - - return sz; -} - -static ssize_t pblk_sysfs_get_sec_per_write(struct pblk *pblk, char *page) -{ - return snprintf(page, PAGE_SIZE, "%d\n", pblk->sec_per_write); -} - -static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad, - char *page) -{ - int sz; - - sz = scnprintf(page, PAGE_SIZE, - "user:%lld gc:%lld pad:%lld WA:", - user, gc, pad); - - if (!user) { - sz += scnprintf(page + sz, PAGE_SIZE - sz, "NaN\n"); - } else { - u64 wa_int; - u32 wa_frac; - - wa_int = (user + gc + pad) * 100000; - wa_int = div64_u64(wa_int, user); - wa_int = div_u64_rem(wa_int, 100000, &wa_frac); - - sz += scnprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n", - wa_int, wa_frac); - } - - return sz; -} - -static ssize_t pblk_sysfs_get_write_amp_mileage(struct pblk *pblk, char *page) -{ - return pblk_get_write_amp(atomic64_read(&pblk->user_wa), - atomic64_read(&pblk->gc_wa), atomic64_read(&pblk->pad_wa), - page); -} - -static ssize_t pblk_sysfs_get_write_amp_trip(struct pblk *pblk, char *page) -{ - return pblk_get_write_amp( - atomic64_read(&pblk->user_wa) - pblk->user_rst_wa, - atomic64_read(&pblk->gc_wa) - pblk->gc_rst_wa, - atomic64_read(&pblk->pad_wa) - pblk->pad_rst_wa, page); -} - -static long long bucket_percentage(unsigned long long bucket, - unsigned long long total) -{ - int p = bucket * 100; - - p = div_u64(p, total); - - return p; -} - -static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page) -{ - int sz = 0; - unsigned long long total; - unsigned long long total_buckets = 0; - int buckets = pblk->min_write_pgs - 1; - int i; - - total = atomic64_read(&pblk->nr_flush) - pblk->nr_flush_rst; - if (!total) { - for (i = 0; i < (buckets + 1); i++) - sz += scnprintf(page + sz, PAGE_SIZE - sz, - "%d:0 ", i); - sz += scnprintf(page + sz, PAGE_SIZE - sz, "\n"); - - return sz; - } - - for (i = 0; i < buckets; i++) - total_buckets += atomic64_read(&pblk->pad_dist[i]); - - sz += scnprintf(page + sz, PAGE_SIZE - sz, "0:%lld%% ", - bucket_percentage(total - total_buckets, total)); - - for (i = 0; i < buckets; i++) { - unsigned long long p; - - p = bucket_percentage(atomic64_read(&pblk->pad_dist[i]), - total); - sz += scnprintf(page + sz, PAGE_SIZE - sz, "%d:%lld%% ", - i + 1, p); - } - sz += scnprintf(page + sz, PAGE_SIZE - sz, "\n"); - - return sz; -} - -#ifdef CONFIG_NVM_PBLK_DEBUG -static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page) -{ - return snprintf(page, PAGE_SIZE, - "%lu\t%lu\t%ld\t%llu\t%ld\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n", - atomic_long_read(&pblk->inflight_writes), - atomic_long_read(&pblk->inflight_reads), - atomic_long_read(&pblk->req_writes), - (u64)atomic64_read(&pblk->nr_flush), - atomic_long_read(&pblk->padded_writes), - atomic_long_read(&pblk->padded_wb), - atomic_long_read(&pblk->sub_writes), - atomic_long_read(&pblk->sync_writes), - atomic_long_read(&pblk->recov_writes), - atomic_long_read(&pblk->recov_gc_writes), - atomic_long_read(&pblk->recov_gc_reads), - atomic_long_read(&pblk->cache_reads), - atomic_long_read(&pblk->sync_reads)); -} -#endif - -static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page, - size_t len) -{ - size_t c_len; - int force; - - c_len = strcspn(page, "\n"); - if (c_len >= len) - return -EINVAL; - - if (kstrtouint(page, 0, &force)) - return -EINVAL; - - pblk_gc_sysfs_force(pblk, force); - - return len; -} - -static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk, - const char *page, size_t len) -{ - size_t c_len; - int sec_per_write; - - c_len = strcspn(page, "\n"); - if (c_len >= len) - return -EINVAL; - - if (kstrtouint(page, 0, &sec_per_write)) - return -EINVAL; - - if (!pblk_is_oob_meta_supported(pblk)) { - /* For packed metadata case it is - * not allowed to change sec_per_write. - */ - return -EINVAL; - } - - if (sec_per_write < pblk->min_write_pgs - || sec_per_write > pblk->max_write_pgs - || sec_per_write % pblk->min_write_pgs != 0) - return -EINVAL; - - pblk_set_sec_per_write(pblk, sec_per_write); - - return len; -} - -static ssize_t pblk_sysfs_set_write_amp_trip(struct pblk *pblk, - const char *page, size_t len) -{ - size_t c_len; - int reset_value; - - c_len = strcspn(page, "\n"); - if (c_len >= len) - return -EINVAL; - - if (kstrtouint(page, 0, &reset_value)) - return -EINVAL; - - if (reset_value != 0) - return -EINVAL; - - pblk->user_rst_wa = atomic64_read(&pblk->user_wa); - pblk->pad_rst_wa = atomic64_read(&pblk->pad_wa); - pblk->gc_rst_wa = atomic64_read(&pblk->gc_wa); - - return len; -} - - -static ssize_t pblk_sysfs_set_padding_dist(struct pblk *pblk, - const char *page, size_t len) -{ - size_t c_len; - int reset_value; - int buckets = pblk->min_write_pgs - 1; - int i; - - c_len = strcspn(page, "\n"); - if (c_len >= len) - return -EINVAL; - - if (kstrtouint(page, 0, &reset_value)) - return -EINVAL; - - if (reset_value != 0) - return -EINVAL; - - for (i = 0; i < buckets; i++) - atomic64_set(&pblk->pad_dist[i], 0); - - pblk->nr_flush_rst = atomic64_read(&pblk->nr_flush); - - return len; -} - -static struct attribute sys_write_luns = { - .name = "write_luns", - .mode = 0444, -}; - -static struct attribute sys_rate_limiter_attr = { - .name = "rate_limiter", - .mode = 0444, -}; - -static struct attribute sys_gc_state = { - .name = "gc_state", - .mode = 0444, -}; - -static struct attribute sys_errors_attr = { - .name = "errors", - .mode = 0444, -}; - -static struct attribute sys_rb_attr = { - .name = "write_buffer", - .mode = 0444, -}; - -static struct attribute sys_stats_ppaf_attr = { - .name = "ppa_format", - .mode = 0444, -}; - -static struct attribute sys_lines_attr = { - .name = "lines", - .mode = 0444, -}; - -static struct attribute sys_lines_info_attr = { - .name = "lines_info", - .mode = 0444, -}; - -static struct attribute sys_gc_force = { - .name = "gc_force", - .mode = 0200, -}; - -static struct attribute sys_max_sec_per_write = { - .name = "max_sec_per_write", - .mode = 0644, -}; - -static struct attribute sys_write_amp_mileage = { - .name = "write_amp_mileage", - .mode = 0444, -}; - -static struct attribute sys_write_amp_trip = { - .name = "write_amp_trip", - .mode = 0644, -}; - -static struct attribute sys_padding_dist = { - .name = "padding_dist", - .mode = 0644, -}; - -#ifdef CONFIG_NVM_PBLK_DEBUG -static struct attribute sys_stats_debug_attr = { - .name = "stats", - .mode = 0444, -}; -#endif - -static struct attribute *pblk_attrs[] = { - &sys_write_luns, - &sys_rate_limiter_attr, - &sys_errors_attr, - &sys_gc_state, - &sys_gc_force, - &sys_max_sec_per_write, - &sys_rb_attr, - &sys_stats_ppaf_attr, - &sys_lines_attr, - &sys_lines_info_attr, - &sys_write_amp_mileage, - &sys_write_amp_trip, - &sys_padding_dist, -#ifdef CONFIG_NVM_PBLK_DEBUG - &sys_stats_debug_attr, -#endif - NULL, -}; - -static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct pblk *pblk = container_of(kobj, struct pblk, kobj); - - if (strcmp(attr->name, "rate_limiter") == 0) - return pblk_sysfs_rate_limiter(pblk, buf); - else if (strcmp(attr->name, "write_luns") == 0) - return pblk_sysfs_luns_show(pblk, buf); - else if (strcmp(attr->name, "gc_state") == 0) - return pblk_sysfs_gc_state_show(pblk, buf); - else if (strcmp(attr->name, "errors") == 0) - return pblk_sysfs_stats(pblk, buf); - else if (strcmp(attr->name, "write_buffer") == 0) - return pblk_sysfs_write_buffer(pblk, buf); - else if (strcmp(attr->name, "ppa_format") == 0) - return pblk_sysfs_ppaf(pblk, buf); - else if (strcmp(attr->name, "lines") == 0) - return pblk_sysfs_lines(pblk, buf); - else if (strcmp(attr->name, "lines_info") == 0) - return pblk_sysfs_lines_info(pblk, buf); - else if (strcmp(attr->name, "max_sec_per_write") == 0) - return pblk_sysfs_get_sec_per_write(pblk, buf); - else if (strcmp(attr->name, "write_amp_mileage") == 0) - return pblk_sysfs_get_write_amp_mileage(pblk, buf); - else if (strcmp(attr->name, "write_amp_trip") == 0) - return pblk_sysfs_get_write_amp_trip(pblk, buf); - else if (strcmp(attr->name, "padding_dist") == 0) - return pblk_sysfs_get_padding_dist(pblk, buf); -#ifdef CONFIG_NVM_PBLK_DEBUG - else if (strcmp(attr->name, "stats") == 0) - return pblk_sysfs_stats_debug(pblk, buf); -#endif - return 0; -} - -static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t len) -{ - struct pblk *pblk = container_of(kobj, struct pblk, kobj); - - if (strcmp(attr->name, "gc_force") == 0) - return pblk_sysfs_gc_force(pblk, buf, len); - else if (strcmp(attr->name, "max_sec_per_write") == 0) - return pblk_sysfs_set_sec_per_write(pblk, buf, len); - else if (strcmp(attr->name, "write_amp_trip") == 0) - return pblk_sysfs_set_write_amp_trip(pblk, buf, len); - else if (strcmp(attr->name, "padding_dist") == 0) - return pblk_sysfs_set_padding_dist(pblk, buf, len); - return 0; -} - -static const struct sysfs_ops pblk_sysfs_ops = { - .show = pblk_sysfs_show, - .store = pblk_sysfs_store, -}; - -static struct kobj_type pblk_ktype = { - .sysfs_ops = &pblk_sysfs_ops, - .default_attrs = pblk_attrs, -}; - -int pblk_sysfs_init(struct gendisk *tdisk) -{ - struct pblk *pblk = tdisk->private_data; - struct device *parent_dev = disk_to_dev(pblk->disk); - int ret; - - ret = kobject_init_and_add(&pblk->kobj, &pblk_ktype, - kobject_get(&parent_dev->kobj), - "%s", "pblk"); - if (ret) { - pblk_err(pblk, "could not register\n"); - return ret; - } - - kobject_uevent(&pblk->kobj, KOBJ_ADD); - return 0; -} - -void pblk_sysfs_exit(struct gendisk *tdisk) -{ - struct pblk *pblk = tdisk->private_data; - - kobject_uevent(&pblk->kobj, KOBJ_REMOVE); - kobject_del(&pblk->kobj); - kobject_put(&pblk->kobj); -} diff --git a/drivers/lightnvm/pblk-trace.h b/drivers/lightnvm/pblk-trace.h deleted file mode 100644 index 47b67c6bff7a..000000000000 --- a/drivers/lightnvm/pblk-trace.h +++ /dev/null @@ -1,145 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM pblk - -#if !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_PBLK_H - -#include <linux/tracepoint.h> - -struct ppa_addr; - -#define show_chunk_flags(state) __print_flags(state, "", \ - { NVM_CHK_ST_FREE, "FREE", }, \ - { NVM_CHK_ST_CLOSED, "CLOSED", }, \ - { NVM_CHK_ST_OPEN, "OPEN", }, \ - { NVM_CHK_ST_OFFLINE, "OFFLINE", }) - -#define show_line_state(state) __print_symbolic(state, \ - { PBLK_LINESTATE_NEW, "NEW", }, \ - { PBLK_LINESTATE_FREE, "FREE", }, \ - { PBLK_LINESTATE_OPEN, "OPEN", }, \ - { PBLK_LINESTATE_CLOSED, "CLOSED", }, \ - { PBLK_LINESTATE_GC, "GC", }, \ - { PBLK_LINESTATE_BAD, "BAD", }, \ - { PBLK_LINESTATE_CORRUPT, "CORRUPT" }) - - -#define show_pblk_state(state) __print_symbolic(state, \ - { PBLK_STATE_RUNNING, "RUNNING", }, \ - { PBLK_STATE_STOPPING, "STOPPING", }, \ - { PBLK_STATE_RECOVERING, "RECOVERING", }, \ - { PBLK_STATE_STOPPED, "STOPPED" }) - -#define show_chunk_erase_state(state) __print_symbolic(state, \ - { PBLK_CHUNK_RESET_START, "START", }, \ - { PBLK_CHUNK_RESET_DONE, "OK", }, \ - { PBLK_CHUNK_RESET_FAILED, "FAILED" }) - - -TRACE_EVENT(pblk_chunk_reset, - - TP_PROTO(const char *name, struct ppa_addr *ppa, int state), - - TP_ARGS(name, ppa, state), - - TP_STRUCT__entry( - __string(name, name) - __field(u64, ppa) - __field(int, state) - ), - - TP_fast_assign( - __assign_str(name, name); - __entry->ppa = ppa->ppa; - __entry->state = state; - ), - - TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name), - (u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp), - (u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu), - (u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk), - show_chunk_erase_state((int)__entry->state)) - -); - -TRACE_EVENT(pblk_chunk_state, - - TP_PROTO(const char *name, struct ppa_addr *ppa, int state), - - TP_ARGS(name, ppa, state), - - TP_STRUCT__entry( - __string(name, name) - __field(u64, ppa) - __field(int, state) - ), - - TP_fast_assign( - __assign_str(name, name); - __entry->ppa = ppa->ppa; - __entry->state = state; - ), - - TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name), - (u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp), - (u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu), - (u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk), - show_chunk_flags((int)__entry->state)) - -); - -TRACE_EVENT(pblk_line_state, - - TP_PROTO(const char *name, int line, int state), - - TP_ARGS(name, line, state), - - TP_STRUCT__entry( - __string(name, name) - __field(int, line) - __field(int, state) - ), - - TP_fast_assign( - __assign_str(name, name); - __entry->line = line; - __entry->state = state; - ), - - TP_printk("dev=%s line=%d state=%s", __get_str(name), - (int)__entry->line, - show_line_state((int)__entry->state)) - -); - -TRACE_EVENT(pblk_state, - - TP_PROTO(const char *name, int state), - - TP_ARGS(name, state), - - TP_STRUCT__entry( - __string(name, name) - __field(int, state) - ), - - TP_fast_assign( - __assign_str(name, name); - __entry->state = state; - ), - - TP_printk("dev=%s state=%s", __get_str(name), - show_pblk_state((int)__entry->state)) - -); - -#endif /* !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ) */ - -/* This part must be outside protection */ - -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH ../../drivers/lightnvm -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE pblk-trace -#include <trace/define_trace.h> diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c deleted file mode 100644 index b9a2aeba95ab..000000000000 --- a/drivers/lightnvm/pblk-write.c +++ /dev/null @@ -1,665 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2016 CNEX Labs - * Initial release: Javier Gonzalez <javier@cnexlabs.com> - * Matias Bjorling <matias@cnexlabs.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * pblk-write.c - pblk's write path from write buffer to media - */ - -#include "pblk.h" -#include "pblk-trace.h" - -static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd, - struct pblk_c_ctx *c_ctx) -{ - struct bio *original_bio; - struct pblk_rb *rwb = &pblk->rwb; - unsigned long ret; - int i; - - for (i = 0; i < c_ctx->nr_valid; i++) { - struct pblk_w_ctx *w_ctx; - int pos = c_ctx->sentry + i; - int flags; - - w_ctx = pblk_rb_w_ctx(rwb, pos); - flags = READ_ONCE(w_ctx->flags); - - if (flags & PBLK_FLUSH_ENTRY) { - flags &= ~PBLK_FLUSH_ENTRY; - /* Release flags on context. Protect from writes */ - smp_store_release(&w_ctx->flags, flags); - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_dec(&rwb->inflight_flush_point); -#endif - } - - while ((original_bio = bio_list_pop(&w_ctx->bios))) - bio_endio(original_bio); - } - - if (c_ctx->nr_padded) - pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid, - c_ctx->nr_padded); - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_add(rqd->nr_ppas, &pblk->sync_writes); -#endif - - ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid); - - bio_put(rqd->bio); - pblk_free_rqd(pblk, rqd, PBLK_WRITE); - - return ret; -} - -static unsigned long pblk_end_queued_w_bio(struct pblk *pblk, - struct nvm_rq *rqd, - struct pblk_c_ctx *c_ctx) -{ - list_del(&c_ctx->list); - return pblk_end_w_bio(pblk, rqd, c_ctx); -} - -static void pblk_complete_write(struct pblk *pblk, struct nvm_rq *rqd, - struct pblk_c_ctx *c_ctx) -{ - struct pblk_c_ctx *c, *r; - unsigned long flags; - unsigned long pos; - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_sub(c_ctx->nr_valid, &pblk->inflight_writes); -#endif - pblk_up_rq(pblk, c_ctx->lun_bitmap); - - pos = pblk_rb_sync_init(&pblk->rwb, &flags); - if (pos == c_ctx->sentry) { - pos = pblk_end_w_bio(pblk, rqd, c_ctx); - -retry: - list_for_each_entry_safe(c, r, &pblk->compl_list, list) { - rqd = nvm_rq_from_c_ctx(c); - if (c->sentry == pos) { - pos = pblk_end_queued_w_bio(pblk, rqd, c); - goto retry; - } - } - } else { - WARN_ON(nvm_rq_from_c_ctx(c_ctx) != rqd); - list_add_tail(&c_ctx->list, &pblk->compl_list); - } - pblk_rb_sync_end(&pblk->rwb, &flags); -} - -/* Map remaining sectors in chunk, starting from ppa */ -static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa, - int rqd_ppas) -{ - struct pblk_line *line; - struct ppa_addr map_ppa = *ppa; - __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); - __le64 *lba_list; - u64 paddr; - int done = 0; - int n = 0; - - line = pblk_ppa_to_line(pblk, *ppa); - lba_list = emeta_to_lbas(pblk, line->emeta->buf); - - spin_lock(&line->lock); - - while (!done) { - paddr = pblk_dev_ppa_to_line_addr(pblk, map_ppa); - - if (!test_and_set_bit(paddr, line->map_bitmap)) - line->left_msecs--; - - if (n < rqd_ppas && lba_list[paddr] != addr_empty) - line->nr_valid_lbas--; - - lba_list[paddr] = addr_empty; - - if (!test_and_set_bit(paddr, line->invalid_bitmap)) - le32_add_cpu(line->vsc, -1); - - done = nvm_next_ppa_in_chk(pblk->dev, &map_ppa); - - n++; - } - - line->w_err_gc->has_write_err = 1; - spin_unlock(&line->lock); -} - -static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry, - unsigned int nr_entries) -{ - struct pblk_rb *rb = &pblk->rwb; - struct pblk_rb_entry *entry; - struct pblk_line *line; - struct pblk_w_ctx *w_ctx; - struct ppa_addr ppa_l2p; - int flags; - unsigned int i; - - spin_lock(&pblk->trans_lock); - for (i = 0; i < nr_entries; i++) { - entry = &rb->entries[pblk_rb_ptr_wrap(rb, sentry, i)]; - w_ctx = &entry->w_ctx; - - /* Check if the lba has been overwritten */ - if (w_ctx->lba != ADDR_EMPTY) { - ppa_l2p = pblk_trans_map_get(pblk, w_ctx->lba); - if (!pblk_ppa_comp(ppa_l2p, entry->cacheline)) - w_ctx->lba = ADDR_EMPTY; - } - - /* Mark up the entry as submittable again */ - flags = READ_ONCE(w_ctx->flags); - flags |= PBLK_WRITTEN_DATA; - /* Release flags on write context. Protect from writes */ - smp_store_release(&w_ctx->flags, flags); - - /* Decrease the reference count to the line as we will - * re-map these entries - */ - line = pblk_ppa_to_line(pblk, w_ctx->ppa); - atomic_dec(&line->sec_to_update); - kref_put(&line->ref, pblk_line_put); - } - spin_unlock(&pblk->trans_lock); -} - -static void pblk_queue_resubmit(struct pblk *pblk, struct pblk_c_ctx *c_ctx) -{ - struct pblk_c_ctx *r_ctx; - - r_ctx = kzalloc(sizeof(struct pblk_c_ctx), GFP_KERNEL); - if (!r_ctx) - return; - - r_ctx->lun_bitmap = NULL; - r_ctx->sentry = c_ctx->sentry; - r_ctx->nr_valid = c_ctx->nr_valid; - r_ctx->nr_padded = c_ctx->nr_padded; - - spin_lock(&pblk->resubmit_lock); - list_add_tail(&r_ctx->list, &pblk->resubmit_list); - spin_unlock(&pblk->resubmit_lock); - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_add(c_ctx->nr_valid, &pblk->recov_writes); -#endif -} - -static void pblk_submit_rec(struct work_struct *work) -{ - struct pblk_rec_ctx *recovery = - container_of(work, struct pblk_rec_ctx, ws_rec); - struct pblk *pblk = recovery->pblk; - struct nvm_rq *rqd = recovery->rqd; - struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - - pblk_log_write_err(pblk, rqd); - - pblk_map_remaining(pblk, ppa_list, rqd->nr_ppas); - pblk_queue_resubmit(pblk, c_ctx); - - pblk_up_rq(pblk, c_ctx->lun_bitmap); - if (c_ctx->nr_padded) - pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid, - c_ctx->nr_padded); - bio_put(rqd->bio); - pblk_free_rqd(pblk, rqd, PBLK_WRITE); - mempool_free(recovery, &pblk->rec_pool); - - atomic_dec(&pblk->inflight_io); - pblk_write_kick(pblk); -} - - -static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd) -{ - struct pblk_rec_ctx *recovery; - - recovery = mempool_alloc(&pblk->rec_pool, GFP_ATOMIC); - if (!recovery) { - pblk_err(pblk, "could not allocate recovery work\n"); - return; - } - - recovery->pblk = pblk; - recovery->rqd = rqd; - - INIT_WORK(&recovery->ws_rec, pblk_submit_rec); - queue_work(pblk->close_wq, &recovery->ws_rec); -} - -static void pblk_end_io_write(struct nvm_rq *rqd) -{ - struct pblk *pblk = rqd->private; - struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); - - if (rqd->error) { - pblk_end_w_fail(pblk, rqd); - return; - } else { - if (trace_pblk_chunk_state_enabled()) - pblk_check_chunk_state_update(pblk, rqd); -#ifdef CONFIG_NVM_PBLK_DEBUG - WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n"); -#endif - } - - pblk_complete_write(pblk, rqd, c_ctx); - atomic_dec(&pblk->inflight_io); -} - -static void pblk_end_io_write_meta(struct nvm_rq *rqd) -{ - struct pblk *pblk = rqd->private; - struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd); - struct pblk_line *line = m_ctx->private; - struct pblk_emeta *emeta = line->emeta; - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - int sync; - - pblk_up_chunk(pblk, ppa_list[0]); - - if (rqd->error) { - pblk_log_write_err(pblk, rqd); - pblk_err(pblk, "metadata I/O failed. Line %d\n", line->id); - line->w_err_gc->has_write_err = 1; - } else { - if (trace_pblk_chunk_state_enabled()) - pblk_check_chunk_state_update(pblk, rqd); - } - - sync = atomic_add_return(rqd->nr_ppas, &emeta->sync); - if (sync == emeta->nr_entries) - pblk_gen_run_ws(pblk, line, NULL, pblk_line_close_ws, - GFP_ATOMIC, pblk->close_wq); - - pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); - - atomic_dec(&pblk->inflight_io); -} - -static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd, - unsigned int nr_secs, nvm_end_io_fn(*end_io)) -{ - /* Setup write request */ - rqd->opcode = NVM_OP_PWRITE; - rqd->nr_ppas = nr_secs; - rqd->is_seq = 1; - rqd->private = pblk; - rqd->end_io = end_io; - - return pblk_alloc_rqd_meta(pblk, rqd); -} - -static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd, - struct ppa_addr *erase_ppa) -{ - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line *e_line = pblk_line_get_erase(pblk); - struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); - unsigned int valid = c_ctx->nr_valid; - unsigned int padded = c_ctx->nr_padded; - unsigned int nr_secs = valid + padded; - unsigned long *lun_bitmap; - int ret; - - lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL); - if (!lun_bitmap) - return -ENOMEM; - c_ctx->lun_bitmap = lun_bitmap; - - ret = pblk_alloc_w_rq(pblk, rqd, nr_secs, pblk_end_io_write); - if (ret) { - kfree(lun_bitmap); - return ret; - } - - if (likely(!e_line || !atomic_read(&e_line->left_eblks))) - ret = pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, - valid, 0); - else - ret = pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, - valid, erase_ppa); - - return ret; -} - -static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail, - unsigned int secs_to_flush) -{ - int secs_to_sync; - - secs_to_sync = pblk_calc_secs(pblk, secs_avail, secs_to_flush, true); - -#ifdef CONFIG_NVM_PBLK_DEBUG - if ((!secs_to_sync && secs_to_flush) - || (secs_to_sync < 0) - || (secs_to_sync > secs_avail && !secs_to_flush)) { - pblk_err(pblk, "bad sector calculation (a:%d,s:%d,f:%d)\n", - secs_avail, secs_to_sync, secs_to_flush); - } -#endif - - return secs_to_sync; -} - -int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_emeta *emeta = meta_line->emeta; - struct ppa_addr *ppa_list; - struct pblk_g_ctx *m_ctx; - struct nvm_rq *rqd; - void *data; - u64 paddr; - int rq_ppas = pblk->min_write_pgs; - int id = meta_line->id; - int rq_len; - int i, j; - int ret; - - rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT); - - m_ctx = nvm_rq_to_pdu(rqd); - m_ctx->private = meta_line; - - rq_len = rq_ppas * geo->csecs; - data = ((void *)emeta->buf) + emeta->mem; - - ret = pblk_alloc_w_rq(pblk, rqd, rq_ppas, pblk_end_io_write_meta); - if (ret) - goto fail_free_rqd; - - ppa_list = nvm_rq_to_ppa_list(rqd); - for (i = 0; i < rqd->nr_ppas; ) { - spin_lock(&meta_line->lock); - paddr = __pblk_alloc_page(pblk, meta_line, rq_ppas); - spin_unlock(&meta_line->lock); - for (j = 0; j < rq_ppas; j++, i++, paddr++) - ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id); - } - - spin_lock(&l_mg->close_lock); - emeta->mem += rq_len; - if (emeta->mem >= lm->emeta_len[0]) - list_del(&meta_line->list); - spin_unlock(&l_mg->close_lock); - - pblk_down_chunk(pblk, ppa_list[0]); - - ret = pblk_submit_io(pblk, rqd, data); - if (ret) { - pblk_err(pblk, "emeta I/O submission failed: %d\n", ret); - goto fail_rollback; - } - - return NVM_IO_OK; - -fail_rollback: - pblk_up_chunk(pblk, ppa_list[0]); - spin_lock(&l_mg->close_lock); - pblk_dealloc_page(pblk, meta_line, rq_ppas); - list_add(&meta_line->list, &meta_line->list); - spin_unlock(&l_mg->close_lock); -fail_free_rqd: - pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); - return ret; -} - -static inline bool pblk_valid_meta_ppa(struct pblk *pblk, - struct pblk_line *meta_line, - struct nvm_rq *data_rqd) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_c_ctx *data_c_ctx = nvm_rq_to_pdu(data_rqd); - struct pblk_line *data_line = pblk_line_get_data(pblk); - struct ppa_addr ppa, ppa_opt; - u64 paddr; - int pos_opt; - - /* Schedule a metadata I/O that is half the distance from the data I/O - * with regards to the number of LUNs forming the pblk instance. This - * balances LUN conflicts across every I/O. - * - * When the LUN configuration changes (e.g., due to GC), this distance - * can align, which would result on metadata and data I/Os colliding. In - * this case, modify the distance to not be optimal, but move the - * optimal in the right direction. - */ - paddr = pblk_lookup_page(pblk, meta_line); - ppa = addr_to_gen_ppa(pblk, paddr, 0); - ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0); - pos_opt = pblk_ppa_to_pos(geo, ppa_opt); - - if (test_bit(pos_opt, data_c_ctx->lun_bitmap) || - test_bit(pos_opt, data_line->blk_bitmap)) - return true; - - if (unlikely(pblk_ppa_comp(ppa_opt, ppa))) - data_line->meta_distance--; - - return false; -} - -static struct pblk_line *pblk_should_submit_meta_io(struct pblk *pblk, - struct nvm_rq *data_rqd) -{ - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line *meta_line; - - spin_lock(&l_mg->close_lock); - if (list_empty(&l_mg->emeta_list)) { - spin_unlock(&l_mg->close_lock); - return NULL; - } - meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list); - if (meta_line->emeta->mem >= lm->emeta_len[0]) { - spin_unlock(&l_mg->close_lock); - return NULL; - } - spin_unlock(&l_mg->close_lock); - - if (!pblk_valid_meta_ppa(pblk, meta_line, data_rqd)) - return NULL; - - return meta_line; -} - -static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd) -{ - struct ppa_addr erase_ppa; - struct pblk_line *meta_line; - int err; - - pblk_ppa_set_empty(&erase_ppa); - - /* Assign lbas to ppas and populate request structure */ - err = pblk_setup_w_rq(pblk, rqd, &erase_ppa); - if (err) { - pblk_err(pblk, "could not setup write request: %d\n", err); - return NVM_IO_ERR; - } - - meta_line = pblk_should_submit_meta_io(pblk, rqd); - - /* Submit data write for current data line */ - err = pblk_submit_io(pblk, rqd, NULL); - if (err) { - pblk_err(pblk, "data I/O submission failed: %d\n", err); - return NVM_IO_ERR; - } - - if (!pblk_ppa_empty(erase_ppa)) { - /* Submit erase for next data line */ - if (pblk_blk_erase_async(pblk, erase_ppa)) { - struct pblk_line *e_line = pblk_line_get_erase(pblk); - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - int bit; - - atomic_inc(&e_line->left_eblks); - bit = pblk_ppa_to_pos(geo, erase_ppa); - WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap)); - } - } - - if (meta_line) { - /* Submit metadata write for previous data line */ - err = pblk_submit_meta_io(pblk, meta_line); - if (err) { - pblk_err(pblk, "metadata I/O submission failed: %d", - err); - return NVM_IO_ERR; - } - } - - return NVM_IO_OK; -} - -static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd) -{ - struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); - struct bio *bio = rqd->bio; - - if (c_ctx->nr_padded) - pblk_bio_free_pages(pblk, bio, c_ctx->nr_valid, - c_ctx->nr_padded); -} - -static int pblk_submit_write(struct pblk *pblk, int *secs_left) -{ - struct bio *bio; - struct nvm_rq *rqd; - unsigned int secs_avail, secs_to_sync, secs_to_com; - unsigned int secs_to_flush, packed_meta_pgs; - unsigned long pos; - unsigned int resubmit; - - *secs_left = 0; - - spin_lock(&pblk->resubmit_lock); - resubmit = !list_empty(&pblk->resubmit_list); - spin_unlock(&pblk->resubmit_lock); - - /* Resubmit failed writes first */ - if (resubmit) { - struct pblk_c_ctx *r_ctx; - - spin_lock(&pblk->resubmit_lock); - r_ctx = list_first_entry(&pblk->resubmit_list, - struct pblk_c_ctx, list); - list_del(&r_ctx->list); - spin_unlock(&pblk->resubmit_lock); - - secs_avail = r_ctx->nr_valid; - pos = r_ctx->sentry; - - pblk_prepare_resubmit(pblk, pos, secs_avail); - secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, - secs_avail); - - kfree(r_ctx); - } else { - /* If there are no sectors in the cache, - * flushes (bios without data) will be cleared on - * the cache threads - */ - secs_avail = pblk_rb_read_count(&pblk->rwb); - if (!secs_avail) - return 0; - - secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb); - if (!secs_to_flush && secs_avail < pblk->min_write_pgs_data) - return 0; - - secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, - secs_to_flush); - if (secs_to_sync > pblk->max_write_pgs) { - pblk_err(pblk, "bad buffer sync calculation\n"); - return 0; - } - - secs_to_com = (secs_to_sync > secs_avail) ? - secs_avail : secs_to_sync; - pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com); - } - - packed_meta_pgs = (pblk->min_write_pgs - pblk->min_write_pgs_data); - bio = bio_alloc(GFP_KERNEL, secs_to_sync + packed_meta_pgs); - - bio->bi_iter.bi_sector = 0; /* internal bio */ - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - - rqd = pblk_alloc_rqd(pblk, PBLK_WRITE); - rqd->bio = bio; - - if (pblk_rb_read_to_bio(&pblk->rwb, rqd, pos, secs_to_sync, - secs_avail)) { - pblk_err(pblk, "corrupted write bio\n"); - goto fail_put_bio; - } - - if (pblk_submit_io_set(pblk, rqd)) - goto fail_free_bio; - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_long_add(secs_to_sync, &pblk->sub_writes); -#endif - - *secs_left = 1; - return 0; - -fail_free_bio: - pblk_free_write_rqd(pblk, rqd); -fail_put_bio: - bio_put(bio); - pblk_free_rqd(pblk, rqd, PBLK_WRITE); - - return -EINTR; -} - -int pblk_write_ts(void *data) -{ - struct pblk *pblk = data; - int secs_left; - int write_failure = 0; - - while (!kthread_should_stop()) { - if (!write_failure) { - write_failure = pblk_submit_write(pblk, &secs_left); - - if (secs_left) - continue; - } - set_current_state(TASK_INTERRUPTIBLE); - io_schedule(); - } - - return 0; -} diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h deleted file mode 100644 index 86ffa875bfe1..000000000000 --- a/drivers/lightnvm/pblk.h +++ /dev/null @@ -1,1358 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2015 IT University of Copenhagen (rrpc.h) - * Copyright (C) 2016 CNEX Labs - * Initial release: Matias Bjorling <matias@cnexlabs.com> - * Write buffering: Javier Gonzalez <javier@cnexlabs.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Implementation of a Physical Block-device target for Open-channel SSDs. - * - */ - -#ifndef PBLK_H_ -#define PBLK_H_ - -#include <linux/blkdev.h> -#include <linux/blk-mq.h> -#include <linux/bio.h> -#include <linux/module.h> -#include <linux/kthread.h> -#include <linux/vmalloc.h> -#include <linux/crc32.h> -#include <linux/uuid.h> - -#include <linux/lightnvm.h> - -/* Run only GC if less than 1/X blocks are free */ -#define GC_LIMIT_INVERSE 5 -#define GC_TIME_MSECS 1000 - -#define PBLK_SECTOR (512) -#define PBLK_EXPOSED_PAGE_SIZE (4096) - -#define PBLK_NR_CLOSE_JOBS (4) - -#define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16) - -/* Max 512 LUNs per device */ -#define PBLK_MAX_LUNS_BITMAP (4) - -#define NR_PHY_IN_LOG (PBLK_EXPOSED_PAGE_SIZE / PBLK_SECTOR) - -/* Static pool sizes */ -#define PBLK_GEN_WS_POOL_SIZE (2) - -#define PBLK_DEFAULT_OP (11) - -enum { - PBLK_READ = READ, - PBLK_WRITE = WRITE,/* Write from write buffer */ - PBLK_WRITE_INT, /* Internal write - no write buffer */ - PBLK_READ_RECOV, /* Recovery read - errors allowed */ - PBLK_ERASE, -}; - -enum { - /* IO Types */ - PBLK_IOTYPE_USER = 1 << 0, - PBLK_IOTYPE_GC = 1 << 1, - - /* Write buffer flags */ - PBLK_FLUSH_ENTRY = 1 << 2, - PBLK_WRITTEN_DATA = 1 << 3, - PBLK_SUBMITTED_ENTRY = 1 << 4, - PBLK_WRITABLE_ENTRY = 1 << 5, -}; - -enum { - PBLK_BLK_ST_OPEN = 0x1, - PBLK_BLK_ST_CLOSED = 0x2, -}; - -enum { - PBLK_CHUNK_RESET_START, - PBLK_CHUNK_RESET_DONE, - PBLK_CHUNK_RESET_FAILED, -}; - -struct pblk_sec_meta { - u64 reserved; - __le64 lba; -}; - -/* The number of GC lists and the rate-limiter states go together. This way the - * rate-limiter can dictate how much GC is needed based on resource utilization. - */ -#define PBLK_GC_NR_LISTS 4 - -enum { - PBLK_RL_OFF = 0, - PBLK_RL_WERR = 1, - PBLK_RL_HIGH = 2, - PBLK_RL_MID = 3, - PBLK_RL_LOW = 4 -}; - -#define pblk_dma_ppa_size (sizeof(u64) * NVM_MAX_VLBA) - -/* write buffer completion context */ -struct pblk_c_ctx { - struct list_head list; /* Head for out-of-order completion */ - - unsigned long *lun_bitmap; /* Luns used on current request */ - unsigned int sentry; - unsigned int nr_valid; - unsigned int nr_padded; -}; - -/* read context */ -struct pblk_g_ctx { - void *private; - unsigned long start_time; - u64 lba; -}; - -/* Pad context */ -struct pblk_pad_rq { - struct pblk *pblk; - struct completion wait; - struct kref ref; -}; - -/* Recovery context */ -struct pblk_rec_ctx { - struct pblk *pblk; - struct nvm_rq *rqd; - struct work_struct ws_rec; -}; - -/* Write context */ -struct pblk_w_ctx { - struct bio_list bios; /* Original bios - used for completion - * in REQ_FUA, REQ_FLUSH case - */ - u64 lba; /* Logic addr. associated with entry */ - struct ppa_addr ppa; /* Physic addr. associated with entry */ - int flags; /* Write context flags */ -}; - -struct pblk_rb_entry { - struct ppa_addr cacheline; /* Cacheline for this entry */ - void *data; /* Pointer to data on this entry */ - struct pblk_w_ctx w_ctx; /* Context for this entry */ - struct list_head index; /* List head to enable indexes */ -}; - -#define EMPTY_ENTRY (~0U) - -struct pblk_rb_pages { - struct page *pages; - int order; - struct list_head list; -}; - -struct pblk_rb { - struct pblk_rb_entry *entries; /* Ring buffer entries */ - unsigned int mem; /* Write offset - points to next - * writable entry in memory - */ - unsigned int subm; /* Read offset - points to last entry - * that has been submitted to the media - * to be persisted - */ - unsigned int sync; /* Synced - backpointer that signals - * the last submitted entry that has - * been successfully persisted to media - */ - unsigned int flush_point; /* Sync point - last entry that must be - * flushed to the media. Used with - * REQ_FLUSH and REQ_FUA - */ - unsigned int l2p_update; /* l2p update point - next entry for - * which l2p mapping will be updated to - * contain a device ppa address (instead - * of a cacheline - */ - unsigned int nr_entries; /* Number of entries in write buffer - - * must be a power of two - */ - unsigned int seg_size; /* Size of the data segments being - * stored on each entry. Typically this - * will be 4KB - */ - - unsigned int back_thres; /* Threshold that shall be maintained by - * the backpointer in order to respect - * geo->mw_cunits on a per chunk basis - */ - - struct list_head pages; /* List of data pages */ - - spinlock_t w_lock; /* Write lock */ - spinlock_t s_lock; /* Sync lock */ - -#ifdef CONFIG_NVM_PBLK_DEBUG - atomic_t inflight_flush_point; /* Not served REQ_FLUSH | REQ_FUA */ -#endif -}; - -#define PBLK_RECOVERY_SECTORS 16 - -struct pblk_lun { - struct ppa_addr bppa; - struct semaphore wr_sem; -}; - -struct pblk_gc_rq { - struct pblk_line *line; - void *data; - u64 paddr_list[NVM_MAX_VLBA]; - u64 lba_list[NVM_MAX_VLBA]; - int nr_secs; - int secs_to_gc; - struct list_head list; -}; - -struct pblk_gc { - /* These states are not protected by a lock since (i) they are in the - * fast path, and (ii) they are not critical. - */ - int gc_active; - int gc_enabled; - int gc_forced; - - struct task_struct *gc_ts; - struct task_struct *gc_writer_ts; - struct task_struct *gc_reader_ts; - - struct workqueue_struct *gc_line_reader_wq; - struct workqueue_struct *gc_reader_wq; - - struct timer_list gc_timer; - - struct semaphore gc_sem; - atomic_t read_inflight_gc; /* Number of lines with inflight GC reads */ - atomic_t pipeline_gc; /* Number of lines in the GC pipeline - - * started reads to finished writes - */ - int w_entries; - - struct list_head w_list; - struct list_head r_list; - - spinlock_t lock; - spinlock_t w_lock; - spinlock_t r_lock; -}; - -struct pblk_rl { - unsigned int high; /* Upper threshold for rate limiter (free run - - * user I/O rate limiter - */ - unsigned int high_pw; /* High rounded up as a power of 2 */ - -#define PBLK_USER_HIGH_THRS 8 /* Begin write limit at 12% available blks */ -#define PBLK_USER_LOW_THRS 10 /* Aggressive GC at 10% available blocks */ - - int rb_windows_pw; /* Number of rate windows in the write buffer - * given as a power-of-2. This guarantees that - * when user I/O is being rate limited, there - * will be reserved enough space for the GC to - * place its payload. A window is of - * pblk->max_write_pgs size, which in NVMe is - * 64, i.e., 256kb. - */ - int rb_budget; /* Total number of entries available for I/O */ - int rb_user_max; /* Max buffer entries available for user I/O */ - int rb_gc_max; /* Max buffer entries available for GC I/O */ - int rb_gc_rsv; /* Reserved buffer entries for GC I/O */ - int rb_state; /* Rate-limiter current state */ - int rb_max_io; /* Maximum size for an I/O giving the config */ - - atomic_t rb_user_cnt; /* User I/O buffer counter */ - atomic_t rb_gc_cnt; /* GC I/O buffer counter */ - atomic_t rb_space; /* Space limit in case of reaching capacity */ - - int rsv_blocks; /* Reserved blocks for GC */ - - int rb_user_active; - int rb_gc_active; - - atomic_t werr_lines; /* Number of write error lines that needs gc */ - - struct timer_list u_timer; - - unsigned long total_blocks; - - atomic_t free_blocks; /* Total number of free blocks (+ OP) */ - atomic_t free_user_blocks; /* Number of user free blocks (no OP) */ -}; - -#define PBLK_LINE_EMPTY (~0U) - -enum { - /* Line Types */ - PBLK_LINETYPE_FREE = 0, - PBLK_LINETYPE_LOG = 1, - PBLK_LINETYPE_DATA = 2, - - /* Line state */ - PBLK_LINESTATE_NEW = 9, - PBLK_LINESTATE_FREE = 10, - PBLK_LINESTATE_OPEN = 11, - PBLK_LINESTATE_CLOSED = 12, - PBLK_LINESTATE_GC = 13, - PBLK_LINESTATE_BAD = 14, - PBLK_LINESTATE_CORRUPT = 15, - - /* GC group */ - PBLK_LINEGC_NONE = 20, - PBLK_LINEGC_EMPTY = 21, - PBLK_LINEGC_LOW = 22, - PBLK_LINEGC_MID = 23, - PBLK_LINEGC_HIGH = 24, - PBLK_LINEGC_FULL = 25, - PBLK_LINEGC_WERR = 26 -}; - -#define PBLK_MAGIC 0x70626c6b /*pblk*/ - -/* emeta/smeta persistent storage format versions: - * Changes in major version requires offline migration. - * Changes in minor version are handled automatically during - * recovery. - */ - -#define SMETA_VERSION_MAJOR (0) -#define SMETA_VERSION_MINOR (1) - -#define EMETA_VERSION_MAJOR (0) -#define EMETA_VERSION_MINOR (2) - -struct line_header { - __le32 crc; - __le32 identifier; /* pblk identifier */ - __u8 uuid[16]; /* instance uuid */ - __le16 type; /* line type */ - __u8 version_major; /* version major */ - __u8 version_minor; /* version minor */ - __le32 id; /* line id for current line */ -}; - -struct line_smeta { - struct line_header header; - - __le32 crc; /* Full structure including struct crc */ - /* Previous line metadata */ - __le32 prev_id; /* Line id for previous line */ - - /* Current line metadata */ - __le64 seq_nr; /* Sequence number for current line */ - - /* Active writers */ - __le32 window_wr_lun; /* Number of parallel LUNs to write */ - - __le32 rsvd[2]; - - __le64 lun_bitmap[]; -}; - - -/* - * Metadata layout in media: - * First sector: - * 1. struct line_emeta - * 2. bad block bitmap (u64 * window_wr_lun) - * 3. write amplification counters - * Mid sectors (start at lbas_sector): - * 3. nr_lbas (u64) forming lba list - * Last sectors (start at vsc_sector): - * 4. u32 valid sector count (vsc) for all lines (~0U: free line) - */ -struct line_emeta { - struct line_header header; - - __le32 crc; /* Full structure including struct crc */ - - /* Previous line metadata */ - __le32 prev_id; /* Line id for prev line */ - - /* Current line metadata */ - __le64 seq_nr; /* Sequence number for current line */ - - /* Active writers */ - __le32 window_wr_lun; /* Number of parallel LUNs to write */ - - /* Bookkeeping for recovery */ - __le32 next_id; /* Line id for next line */ - __le64 nr_lbas; /* Number of lbas mapped in line */ - __le64 nr_valid_lbas; /* Number of valid lbas mapped in line */ - __le64 bb_bitmap[]; /* Updated bad block bitmap for line */ -}; - - -/* Write amplification counters stored on media */ -struct wa_counters { - __le64 user; /* Number of user written sectors */ - __le64 gc; /* Number of sectors written by GC*/ - __le64 pad; /* Number of padded sectors */ -}; - -struct pblk_emeta { - struct line_emeta *buf; /* emeta buffer in media format */ - int mem; /* Write offset - points to next - * writable entry in memory - */ - atomic_t sync; /* Synced - backpointer that signals the - * last entry that has been successfully - * persisted to media - */ - unsigned int nr_entries; /* Number of emeta entries */ -}; - -struct pblk_smeta { - struct line_smeta *buf; /* smeta buffer in persistent format */ -}; - -struct pblk_w_err_gc { - int has_write_err; - int has_gc_err; - __le64 *lba_list; -}; - -struct pblk_line { - struct pblk *pblk; - unsigned int id; /* Line number corresponds to the - * block line - */ - unsigned int seq_nr; /* Unique line sequence number */ - - int state; /* PBLK_LINESTATE_X */ - int type; /* PBLK_LINETYPE_X */ - int gc_group; /* PBLK_LINEGC_X */ - struct list_head list; /* Free, GC lists */ - - unsigned long *lun_bitmap; /* Bitmap for LUNs mapped in line */ - - struct nvm_chk_meta *chks; /* Chunks forming line */ - - struct pblk_smeta *smeta; /* Start metadata */ - struct pblk_emeta *emeta; /* End medatada */ - - int meta_line; /* Metadata line id */ - int meta_distance; /* Distance between data and metadata */ - - u64 emeta_ssec; /* Sector where emeta starts */ - - unsigned int sec_in_line; /* Number of usable secs in line */ - - atomic_t blk_in_line; /* Number of good blocks in line */ - unsigned long *blk_bitmap; /* Bitmap for valid/invalid blocks */ - unsigned long *erase_bitmap; /* Bitmap for erased blocks */ - - unsigned long *map_bitmap; /* Bitmap for mapped sectors in line */ - unsigned long *invalid_bitmap; /* Bitmap for invalid sectors in line */ - - atomic_t left_eblks; /* Blocks left for erasing */ - atomic_t left_seblks; /* Blocks left for sync erasing */ - - int left_msecs; /* Sectors left for mapping */ - unsigned int cur_sec; /* Sector map pointer */ - unsigned int nr_valid_lbas; /* Number of valid lbas in line */ - - __le32 *vsc; /* Valid sector count in line */ - - struct kref ref; /* Write buffer L2P references */ - atomic_t sec_to_update; /* Outstanding L2P updates to ppa */ - - struct pblk_w_err_gc *w_err_gc; /* Write error gc recovery metadata */ - - spinlock_t lock; /* Necessary for invalid_bitmap only */ -}; - -#define PBLK_DATA_LINES 4 - -enum { - PBLK_EMETA_TYPE_HEADER = 1, /* struct line_emeta first sector */ - PBLK_EMETA_TYPE_LLBA = 2, /* lba list - type: __le64 */ - PBLK_EMETA_TYPE_VSC = 3, /* vsc list - type: __le32 */ -}; - -struct pblk_line_mgmt { - int nr_lines; /* Total number of full lines */ - int nr_free_lines; /* Number of full lines in free list */ - - /* Free lists - use free_lock */ - struct list_head free_list; /* Full lines ready to use */ - struct list_head corrupt_list; /* Full lines corrupted */ - struct list_head bad_list; /* Full lines bad */ - - /* GC lists - use gc_lock */ - struct list_head *gc_lists[PBLK_GC_NR_LISTS]; - struct list_head gc_high_list; /* Full lines ready to GC, high isc */ - struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */ - struct list_head gc_low_list; /* Full lines ready to GC, low isc */ - - struct list_head gc_werr_list; /* Write err recovery list */ - - struct list_head gc_full_list; /* Full lines ready to GC, no valid */ - struct list_head gc_empty_list; /* Full lines close, all valid */ - - struct pblk_line *log_line; /* Current FTL log line */ - struct pblk_line *data_line; /* Current data line */ - struct pblk_line *log_next; /* Next FTL log line */ - struct pblk_line *data_next; /* Next data line */ - - struct list_head emeta_list; /* Lines queued to schedule emeta */ - - __le32 *vsc_list; /* Valid sector counts for all lines */ - - /* Pre-allocated metadata for data lines */ - struct pblk_smeta *sline_meta[PBLK_DATA_LINES]; - struct pblk_emeta *eline_meta[PBLK_DATA_LINES]; - unsigned long meta_bitmap; - - /* Cache and mempool for map/invalid bitmaps */ - struct kmem_cache *bitmap_cache; - mempool_t *bitmap_pool; - - /* Helpers for fast bitmap calculations */ - unsigned long *bb_template; - unsigned long *bb_aux; - - unsigned long d_seq_nr; /* Data line unique sequence number */ - unsigned long l_seq_nr; /* Log line unique sequence number */ - - spinlock_t free_lock; - spinlock_t close_lock; - spinlock_t gc_lock; -}; - -struct pblk_line_meta { - unsigned int smeta_len; /* Total length for smeta */ - unsigned int smeta_sec; /* Sectors needed for smeta */ - - unsigned int emeta_len[4]; /* Lengths for emeta: - * [0]: Total - * [1]: struct line_emeta + - * bb_bitmap + struct wa_counters - * [2]: L2P portion - * [3]: vsc - */ - unsigned int emeta_sec[4]; /* Sectors needed for emeta. Same layout - * as emeta_len - */ - - unsigned int emeta_bb; /* Boundary for bb that affects emeta */ - - unsigned int vsc_list_len; /* Length for vsc list */ - unsigned int sec_bitmap_len; /* Length for sector bitmap in line */ - unsigned int blk_bitmap_len; /* Length for block bitmap in line */ - unsigned int lun_bitmap_len; /* Length for lun bitmap in line */ - - unsigned int blk_per_line; /* Number of blocks in a full line */ - unsigned int sec_per_line; /* Number of sectors in a line */ - unsigned int dsec_per_line; /* Number of data sectors in a line */ - unsigned int min_blk_line; /* Min. number of good blocks in line */ - - unsigned int mid_thrs; /* Threshold for GC mid list */ - unsigned int high_thrs; /* Threshold for GC high list */ - - unsigned int meta_distance; /* Distance between data and metadata */ -}; - -enum { - PBLK_STATE_RUNNING = 0, - PBLK_STATE_STOPPING = 1, - PBLK_STATE_RECOVERING = 2, - PBLK_STATE_STOPPED = 3, -}; - -/* Internal format to support not power-of-2 device formats */ -struct pblk_addrf { - /* gen to dev */ - int sec_stripe; - int ch_stripe; - int lun_stripe; - - /* dev to gen */ - int sec_lun_stripe; - int sec_ws_stripe; -}; - -struct pblk { - struct nvm_tgt_dev *dev; - struct gendisk *disk; - - struct kobject kobj; - - struct pblk_lun *luns; - - struct pblk_line *lines; /* Line array */ - struct pblk_line_mgmt l_mg; /* Line management */ - struct pblk_line_meta lm; /* Line metadata */ - - struct nvm_addrf addrf; /* Aligned address format */ - struct pblk_addrf uaddrf; /* Unaligned address format */ - int addrf_len; - - struct pblk_rb rwb; - - int state; /* pblk line state */ - - int min_write_pgs; /* Minimum amount of pages required by controller */ - int min_write_pgs_data; /* Minimum amount of payload pages */ - int max_write_pgs; /* Maximum amount of pages supported by controller */ - int oob_meta_size; /* Size of OOB sector metadata */ - - sector_t capacity; /* Device capacity when bad blocks are subtracted */ - - int op; /* Percentage of device used for over-provisioning */ - int op_blks; /* Number of blocks used for over-provisioning */ - - /* pblk provisioning values. Used by rate limiter */ - struct pblk_rl rl; - - int sec_per_write; - - guid_t instance_uuid; - - /* Persistent write amplification counters, 4kb sector I/Os */ - atomic64_t user_wa; /* Sectors written by user */ - atomic64_t gc_wa; /* Sectors written by GC */ - atomic64_t pad_wa; /* Padded sectors written */ - - /* Reset values for delta write amplification measurements */ - u64 user_rst_wa; - u64 gc_rst_wa; - u64 pad_rst_wa; - - /* Counters used for calculating padding distribution */ - atomic64_t *pad_dist; /* Padding distribution buckets */ - u64 nr_flush_rst; /* Flushes reset value for pad dist.*/ - atomic64_t nr_flush; /* Number of flush/fua I/O */ - -#ifdef CONFIG_NVM_PBLK_DEBUG - /* Non-persistent debug counters, 4kb sector I/Os */ - atomic_long_t inflight_writes; /* Inflight writes (user and gc) */ - atomic_long_t padded_writes; /* Sectors padded due to flush/fua */ - atomic_long_t padded_wb; /* Sectors padded in write buffer */ - atomic_long_t req_writes; /* Sectors stored on write buffer */ - atomic_long_t sub_writes; /* Sectors submitted from buffer */ - atomic_long_t sync_writes; /* Sectors synced to media */ - atomic_long_t inflight_reads; /* Inflight sector read requests */ - atomic_long_t cache_reads; /* Read requests that hit the cache */ - atomic_long_t sync_reads; /* Completed sector read requests */ - atomic_long_t recov_writes; /* Sectors submitted from recovery */ - atomic_long_t recov_gc_writes; /* Sectors submitted from write GC */ - atomic_long_t recov_gc_reads; /* Sectors submitted from read GC */ -#endif - - spinlock_t lock; - - atomic_long_t read_failed; - atomic_long_t read_empty; - atomic_long_t read_high_ecc; - atomic_long_t read_failed_gc; - atomic_long_t write_failed; - atomic_long_t erase_failed; - - atomic_t inflight_io; /* General inflight I/O counter */ - - struct task_struct *writer_ts; - - /* Simple translation map of logical addresses to physical addresses. - * The logical addresses is known by the host system, while the physical - * addresses are used when writing to the disk block device. - */ - unsigned char *trans_map; - spinlock_t trans_lock; - - struct list_head compl_list; - - spinlock_t resubmit_lock; /* Resubmit list lock */ - struct list_head resubmit_list; /* Resubmit list for failed writes*/ - - mempool_t page_bio_pool; - mempool_t gen_ws_pool; - mempool_t rec_pool; - mempool_t r_rq_pool; - mempool_t w_rq_pool; - mempool_t e_rq_pool; - - struct workqueue_struct *close_wq; - struct workqueue_struct *bb_wq; - struct workqueue_struct *r_end_wq; - - struct timer_list wtimer; - - struct pblk_gc gc; -}; - -struct pblk_line_ws { - struct pblk *pblk; - struct pblk_line *line; - void *priv; - struct work_struct ws; -}; - -#define pblk_g_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_g_ctx)) -#define pblk_w_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_c_ctx)) - -#define pblk_err(pblk, fmt, ...) \ - pr_err("pblk %s: " fmt, pblk->disk->disk_name, ##__VA_ARGS__) -#define pblk_info(pblk, fmt, ...) \ - pr_info("pblk %s: " fmt, pblk->disk->disk_name, ##__VA_ARGS__) -#define pblk_warn(pblk, fmt, ...) \ - pr_warn("pblk %s: " fmt, pblk->disk->disk_name, ##__VA_ARGS__) -#define pblk_debug(pblk, fmt, ...) \ - pr_debug("pblk %s: " fmt, pblk->disk->disk_name, ##__VA_ARGS__) - -/* - * pblk ring buffer operations - */ -int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold, - unsigned int seg_sz); -int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio, - unsigned int nr_entries, unsigned int *pos); -int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries, - unsigned int *pos); -void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data, - struct pblk_w_ctx w_ctx, unsigned int pos); -void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data, - struct pblk_w_ctx w_ctx, struct pblk_line *line, - u64 paddr, unsigned int pos); -struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos); -void pblk_rb_flush(struct pblk_rb *rb); - -void pblk_rb_sync_l2p(struct pblk_rb *rb); -unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd, - unsigned int pos, unsigned int nr_entries, - unsigned int count); -int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, - struct ppa_addr ppa); -unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries); - -unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags); -unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries); -unsigned int pblk_rb_ptr_wrap(struct pblk_rb *rb, unsigned int p, - unsigned int nr_entries); -void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags); -unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb); - -unsigned int pblk_rb_read_count(struct pblk_rb *rb); -unsigned int pblk_rb_sync_count(struct pblk_rb *rb); -unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos); - -int pblk_rb_tear_down_check(struct pblk_rb *rb); -int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos); -void pblk_rb_free(struct pblk_rb *rb); -ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf); - -/* - * pblk core - */ -struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type); -void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type); -int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd); -void pblk_free_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd); -void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write); -int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, - struct pblk_c_ctx *c_ctx); -void pblk_discard(struct pblk *pblk, struct bio *bio); -struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk); -struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, - struct nvm_chk_meta *lp, - struct ppa_addr ppa); -void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd); -void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd); -int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd, void *buf); -int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd, void *buf); -int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line); -void pblk_check_chunk_state_update(struct pblk *pblk, struct nvm_rq *rqd); -struct pblk_line *pblk_line_get(struct pblk *pblk); -struct pblk_line *pblk_line_get_first_data(struct pblk *pblk); -struct pblk_line *pblk_line_replace_data(struct pblk *pblk); -void pblk_ppa_to_line_put(struct pblk *pblk, struct ppa_addr ppa); -void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd); -int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line); -void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line); -struct pblk_line *pblk_line_get_data(struct pblk *pblk); -struct pblk_line *pblk_line_get_erase(struct pblk *pblk); -int pblk_line_erase(struct pblk *pblk, struct pblk_line *line); -int pblk_line_is_full(struct pblk_line *line); -void pblk_line_free(struct pblk_line *line); -void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line); -void pblk_line_close(struct pblk *pblk, struct pblk_line *line); -void pblk_line_close_ws(struct work_struct *work); -void pblk_pipeline_stop(struct pblk *pblk); -void __pblk_pipeline_stop(struct pblk *pblk); -void __pblk_pipeline_flush(struct pblk *pblk); -void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, - void (*work)(struct work_struct *), gfp_t gfp_mask, - struct workqueue_struct *wq); -u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line); -int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line); -int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line, - void *emeta_buf); -int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa); -void pblk_line_put(struct kref *ref); -void pblk_line_put_wq(struct kref *ref); -struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line); -u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line); -void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); -u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); -u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); -int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail, - unsigned long secs_to_flush, bool skip_meta); -void pblk_down_rq(struct pblk *pblk, struct ppa_addr ppa, - unsigned long *lun_bitmap); -void pblk_down_chunk(struct pblk *pblk, struct ppa_addr ppa); -void pblk_up_chunk(struct pblk *pblk, struct ppa_addr ppa); -void pblk_up_rq(struct pblk *pblk, unsigned long *lun_bitmap); -int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags, - int nr_pages); -void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off, - int nr_pages); -void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa); -void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, - u64 paddr); -void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa); -void pblk_update_map_cache(struct pblk *pblk, sector_t lba, - struct ppa_addr ppa); -void pblk_update_map_dev(struct pblk *pblk, sector_t lba, - struct ppa_addr ppa, struct ppa_addr entry_line); -int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa, - struct pblk_line *gc_line, u64 paddr); -void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, - u64 *lba_list, int nr_secs); -int pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, - sector_t blba, int nr_secs, bool *from_cache); -void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd); -void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd); - -/* - * pblk user I/O write path - */ -void pblk_write_to_cache(struct pblk *pblk, struct bio *bio, - unsigned long flags); -int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq); - -/* - * pblk map - */ -int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, - unsigned int sentry, unsigned long *lun_bitmap, - unsigned int valid_secs, struct ppa_addr *erase_ppa); -int pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, - unsigned long *lun_bitmap, unsigned int valid_secs, - unsigned int off); - -/* - * pblk write thread - */ -int pblk_write_ts(void *data); -void pblk_write_timer_fn(struct timer_list *t); -void pblk_write_should_kick(struct pblk *pblk); -void pblk_write_kick(struct pblk *pblk); - -/* - * pblk read path - */ -extern struct bio_set pblk_bio_set; -void pblk_submit_read(struct pblk *pblk, struct bio *bio); -int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq); -/* - * pblk recovery - */ -struct pblk_line *pblk_recov_l2p(struct pblk *pblk); -int pblk_recov_pad(struct pblk *pblk); -int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta); - -/* - * pblk gc - */ -#define PBLK_GC_MAX_READERS 8 /* Max number of outstanding GC reader jobs */ -#define PBLK_GC_RQ_QD 128 /* Queue depth for inflight GC requests */ -#define PBLK_GC_L_QD 4 /* Queue depth for inflight GC lines */ - -int pblk_gc_init(struct pblk *pblk); -void pblk_gc_exit(struct pblk *pblk, bool graceful); -void pblk_gc_should_start(struct pblk *pblk); -void pblk_gc_should_stop(struct pblk *pblk); -void pblk_gc_should_kick(struct pblk *pblk); -void pblk_gc_free_full_lines(struct pblk *pblk); -void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, - int *gc_active); -int pblk_gc_sysfs_force(struct pblk *pblk, int force); -void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line); - -/* - * pblk rate limiter - */ -void pblk_rl_init(struct pblk_rl *rl, int budget, int threshold); -void pblk_rl_free(struct pblk_rl *rl); -void pblk_rl_update_rates(struct pblk_rl *rl); -int pblk_rl_high_thrs(struct pblk_rl *rl); -unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl); -unsigned long pblk_rl_nr_user_free_blks(struct pblk_rl *rl); -int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries); -void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries); -void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries); -int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries); -void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries); -void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc); -int pblk_rl_max_io(struct pblk_rl *rl); -void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line); -void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line, - bool used); -int pblk_rl_is_limit(struct pblk_rl *rl); - -void pblk_rl_werr_line_in(struct pblk_rl *rl); -void pblk_rl_werr_line_out(struct pblk_rl *rl); - -/* - * pblk sysfs - */ -int pblk_sysfs_init(struct gendisk *tdisk); -void pblk_sysfs_exit(struct gendisk *tdisk); - -static inline struct nvm_rq *nvm_rq_from_c_ctx(void *c_ctx) -{ - return c_ctx - sizeof(struct nvm_rq); -} - -static inline void *emeta_to_bb(struct line_emeta *emeta) -{ - return emeta->bb_bitmap; -} - -static inline void *emeta_to_wa(struct pblk_line_meta *lm, - struct line_emeta *emeta) -{ - return emeta->bb_bitmap + lm->blk_bitmap_len; -} - -static inline void *emeta_to_lbas(struct pblk *pblk, struct line_emeta *emeta) -{ - return ((void *)emeta + pblk->lm.emeta_len[1]); -} - -static inline void *emeta_to_vsc(struct pblk *pblk, struct line_emeta *emeta) -{ - return (emeta_to_lbas(pblk, emeta) + pblk->lm.emeta_len[2]); -} - -static inline int pblk_line_vsc(struct pblk_line *line) -{ - return le32_to_cpu(*line->vsc); -} - -static inline int pblk_ppa_to_line_id(struct ppa_addr p) -{ - return p.a.blk; -} - -static inline struct pblk_line *pblk_ppa_to_line(struct pblk *pblk, - struct ppa_addr p) -{ - return &pblk->lines[pblk_ppa_to_line_id(p)]; -} - -static inline int pblk_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p) -{ - return p.a.lun * geo->num_ch + p.a.ch; -} - -static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr, - u64 line_id) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct ppa_addr ppa; - - if (geo->version == NVM_OCSSD_SPEC_12) { - struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf; - - ppa.ppa = 0; - ppa.g.blk = line_id; - ppa.g.pg = (paddr & ppaf->pg_mask) >> ppaf->pg_offset; - ppa.g.lun = (paddr & ppaf->lun_mask) >> ppaf->lun_offset; - ppa.g.ch = (paddr & ppaf->ch_mask) >> ppaf->ch_offset; - ppa.g.pl = (paddr & ppaf->pln_mask) >> ppaf->pln_offset; - ppa.g.sec = (paddr & ppaf->sec_mask) >> ppaf->sec_offset; - } else { - struct pblk_addrf *uaddrf = &pblk->uaddrf; - int secs, chnls, luns; - - ppa.ppa = 0; - - ppa.m.chk = line_id; - - paddr = div_u64_rem(paddr, uaddrf->sec_stripe, &secs); - ppa.m.sec = secs; - - paddr = div_u64_rem(paddr, uaddrf->ch_stripe, &chnls); - ppa.m.grp = chnls; - - paddr = div_u64_rem(paddr, uaddrf->lun_stripe, &luns); - ppa.m.pu = luns; - - ppa.m.sec += uaddrf->sec_stripe * paddr; - } - - return ppa; -} - -static inline struct nvm_chk_meta *pblk_dev_ppa_to_chunk(struct pblk *pblk, - struct ppa_addr p) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line *line = pblk_ppa_to_line(pblk, p); - int pos = pblk_ppa_to_pos(geo, p); - - return &line->chks[pos]; -} - -static inline u64 pblk_dev_ppa_to_chunk_addr(struct pblk *pblk, - struct ppa_addr p) -{ - struct nvm_tgt_dev *dev = pblk->dev; - - return dev_to_chunk_addr(dev->parent, &pblk->addrf, p); -} - -static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk, - struct ppa_addr p) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - u64 paddr; - - if (geo->version == NVM_OCSSD_SPEC_12) { - struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf; - - paddr = (u64)p.g.ch << ppaf->ch_offset; - paddr |= (u64)p.g.lun << ppaf->lun_offset; - paddr |= (u64)p.g.pg << ppaf->pg_offset; - paddr |= (u64)p.g.pl << ppaf->pln_offset; - paddr |= (u64)p.g.sec << ppaf->sec_offset; - } else { - struct pblk_addrf *uaddrf = &pblk->uaddrf; - u64 secs = p.m.sec; - int sec_stripe; - - paddr = (u64)p.m.grp * uaddrf->sec_stripe; - paddr += (u64)p.m.pu * uaddrf->sec_lun_stripe; - - secs = div_u64_rem(secs, uaddrf->sec_stripe, &sec_stripe); - paddr += secs * uaddrf->sec_ws_stripe; - paddr += sec_stripe; - } - - return paddr; -} - -static inline struct ppa_addr pblk_ppa32_to_ppa64(struct pblk *pblk, u32 ppa32) -{ - struct nvm_tgt_dev *dev = pblk->dev; - - return nvm_ppa32_to_ppa64(dev->parent, &pblk->addrf, ppa32); -} - -static inline u32 pblk_ppa64_to_ppa32(struct pblk *pblk, struct ppa_addr ppa64) -{ - struct nvm_tgt_dev *dev = pblk->dev; - - return nvm_ppa64_to_ppa32(dev->parent, &pblk->addrf, ppa64); -} - -static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk, - sector_t lba) -{ - struct ppa_addr ppa; - - if (pblk->addrf_len < 32) { - u32 *map = (u32 *)pblk->trans_map; - - ppa = pblk_ppa32_to_ppa64(pblk, map[lba]); - } else { - struct ppa_addr *map = (struct ppa_addr *)pblk->trans_map; - - ppa = map[lba]; - } - - return ppa; -} - -static inline void pblk_trans_map_set(struct pblk *pblk, sector_t lba, - struct ppa_addr ppa) -{ - if (pblk->addrf_len < 32) { - u32 *map = (u32 *)pblk->trans_map; - - map[lba] = pblk_ppa64_to_ppa32(pblk, ppa); - } else { - u64 *map = (u64 *)pblk->trans_map; - - map[lba] = ppa.ppa; - } -} - -static inline int pblk_ppa_empty(struct ppa_addr ppa_addr) -{ - return (ppa_addr.ppa == ADDR_EMPTY); -} - -static inline void pblk_ppa_set_empty(struct ppa_addr *ppa_addr) -{ - ppa_addr->ppa = ADDR_EMPTY; -} - -static inline bool pblk_ppa_comp(struct ppa_addr lppa, struct ppa_addr rppa) -{ - return (lppa.ppa == rppa.ppa); -} - -static inline int pblk_addr_in_cache(struct ppa_addr ppa) -{ - return (ppa.ppa != ADDR_EMPTY && ppa.c.is_cached); -} - -static inline int pblk_addr_to_cacheline(struct ppa_addr ppa) -{ - return ppa.c.line; -} - -static inline struct ppa_addr pblk_cacheline_to_addr(int addr) -{ - struct ppa_addr p; - - p.c.line = addr; - p.c.is_cached = 1; - - return p; -} - -static inline u32 pblk_calc_meta_header_crc(struct pblk *pblk, - struct line_header *header) -{ - u32 crc = ~(u32)0; - - crc = crc32_le(crc, (unsigned char *)header + sizeof(crc), - sizeof(struct line_header) - sizeof(crc)); - - return crc; -} - -static inline u32 pblk_calc_smeta_crc(struct pblk *pblk, - struct line_smeta *smeta) -{ - struct pblk_line_meta *lm = &pblk->lm; - u32 crc = ~(u32)0; - - crc = crc32_le(crc, (unsigned char *)smeta + - sizeof(struct line_header) + sizeof(crc), - lm->smeta_len - - sizeof(struct line_header) - sizeof(crc)); - - return crc; -} - -static inline u32 pblk_calc_emeta_crc(struct pblk *pblk, - struct line_emeta *emeta) -{ - struct pblk_line_meta *lm = &pblk->lm; - u32 crc = ~(u32)0; - - crc = crc32_le(crc, (unsigned char *)emeta + - sizeof(struct line_header) + sizeof(crc), - lm->emeta_len[0] - - sizeof(struct line_header) - sizeof(crc)); - - return crc; -} - -static inline int pblk_io_aligned(struct pblk *pblk, int nr_secs) -{ - return !(nr_secs % pblk->min_write_pgs); -} - -#ifdef CONFIG_NVM_PBLK_DEBUG -static inline void print_ppa(struct pblk *pblk, struct ppa_addr *p, - char *msg, int error) -{ - struct nvm_geo *geo = &pblk->dev->geo; - - if (p->c.is_cached) { - pblk_err(pblk, "ppa: (%s: %x) cache line: %llu\n", - msg, error, (u64)p->c.line); - } else if (geo->version == NVM_OCSSD_SPEC_12) { - pblk_err(pblk, "ppa: (%s: %x):ch:%d,lun:%d,blk:%d,pg:%d,pl:%d,sec:%d\n", - msg, error, - p->g.ch, p->g.lun, p->g.blk, - p->g.pg, p->g.pl, p->g.sec); - } else { - pblk_err(pblk, "ppa: (%s: %x):ch:%d,lun:%d,chk:%d,sec:%d\n", - msg, error, - p->m.grp, p->m.pu, p->m.chk, p->m.sec); - } -} - -static inline void pblk_print_failed_rqd(struct pblk *pblk, struct nvm_rq *rqd, - int error) -{ - int bit = -1; - - if (rqd->nr_ppas == 1) { - print_ppa(pblk, &rqd->ppa_addr, "rqd", error); - return; - } - - while ((bit = find_next_bit((void *)&rqd->ppa_status, rqd->nr_ppas, - bit + 1)) < rqd->nr_ppas) { - print_ppa(pblk, &rqd->ppa_list[bit], "rqd", error); - } - - pblk_err(pblk, "error:%d, ppa_status:%llx\n", error, rqd->ppa_status); -} - -static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev, - struct ppa_addr *ppas, int nr_ppas) -{ - struct nvm_geo *geo = &tgt_dev->geo; - struct ppa_addr *ppa; - int i; - - for (i = 0; i < nr_ppas; i++) { - ppa = &ppas[i]; - - if (geo->version == NVM_OCSSD_SPEC_12) { - if (!ppa->c.is_cached && - ppa->g.ch < geo->num_ch && - ppa->g.lun < geo->num_lun && - ppa->g.pl < geo->num_pln && - ppa->g.blk < geo->num_chk && - ppa->g.pg < geo->num_pg && - ppa->g.sec < geo->ws_min) - continue; - } else { - if (!ppa->c.is_cached && - ppa->m.grp < geo->num_ch && - ppa->m.pu < geo->num_lun && - ppa->m.chk < geo->num_chk && - ppa->m.sec < geo->clba) - continue; - } - - print_ppa(tgt_dev->q->queuedata, ppa, "boundary", i); - - return 1; - } - return 0; -} - -static inline int pblk_check_io(struct pblk *pblk, struct nvm_rq *rqd) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); - - if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) { - WARN_ON(1); - return -EINVAL; - } - - if (rqd->opcode == NVM_OP_PWRITE) { - struct pblk_line *line; - int i; - - for (i = 0; i < rqd->nr_ppas; i++) { - line = pblk_ppa_to_line(pblk, ppa_list[i]); - - spin_lock(&line->lock); - if (line->state != PBLK_LINESTATE_OPEN) { - pblk_err(pblk, "bad ppa: line:%d,state:%d\n", - line->id, line->state); - WARN_ON(1); - spin_unlock(&line->lock); - return -EINVAL; - } - spin_unlock(&line->lock); - } - } - - return 0; -} -#endif - -static inline int pblk_boundary_paddr_checks(struct pblk *pblk, u64 paddr) -{ - struct pblk_line_meta *lm = &pblk->lm; - - if (paddr > lm->sec_per_line) - return 1; - - return 0; -} - -static inline unsigned int pblk_get_bi_idx(struct bio *bio) -{ - return bio->bi_iter.bi_idx; -} - -static inline sector_t pblk_get_lba(struct bio *bio) -{ - return bio->bi_iter.bi_sector / NR_PHY_IN_LOG; -} - -static inline unsigned int pblk_get_secs(struct bio *bio) -{ - return bio->bi_iter.bi_size / PBLK_EXPOSED_PAGE_SIZE; -} - -static inline char *pblk_disk_name(struct pblk *pblk) -{ - struct gendisk *disk = pblk->disk; - - return disk->disk_name; -} - -static inline unsigned int pblk_get_min_chks(struct pblk *pblk) -{ - struct pblk_line_meta *lm = &pblk->lm; - /* In a worst-case scenario every line will have OP invalid sectors. - * We will then need a minimum of 1/OP lines to free up a single line - */ - - return DIV_ROUND_UP(100, pblk->op) * lm->blk_per_line; -} - -static inline struct pblk_sec_meta *pblk_get_meta(struct pblk *pblk, - void *meta, int index) -{ - return meta + - max_t(int, sizeof(struct pblk_sec_meta), pblk->oob_meta_size) - * index; -} - -static inline int pblk_dma_meta_size(struct pblk *pblk) -{ - return max_t(int, sizeof(struct pblk_sec_meta), pblk->oob_meta_size) - * NVM_MAX_VLBA; -} - -static inline int pblk_is_oob_meta_supported(struct pblk *pblk) -{ - return pblk->oob_meta_size >= sizeof(struct pblk_sec_meta); -} -#endif /* PBLK_H_ */ diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 3c44c4bb40fc..19598bd38939 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1329,6 +1329,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, struct raid1_plug_cb *plug = NULL; int first_clone; int max_sectors; + bool write_behind = false; if (mddev_is_clustered(mddev) && md_cluster_ops->area_resyncing(mddev, WRITE, @@ -1381,6 +1382,15 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, max_sectors = r1_bio->sectors; for (i = 0; i < disks; i++) { struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); + + /* + * The write-behind io is only attempted on drives marked as + * write-mostly, which means we could allocate write behind + * bio later. + */ + if (rdev && test_bit(WriteMostly, &rdev->flags)) + write_behind = true; + if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { atomic_inc(&rdev->nr_pending); blocked_rdev = rdev; @@ -1454,6 +1464,15 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, goto retry_write; } + /* + * When using a bitmap, we may call alloc_behind_master_bio below. + * alloc_behind_master_bio allocates a copy of the data payload a page + * at a time and thus needs a new bio that can fit the whole payload + * this bio in page sized chunks. + */ + if (write_behind && bitmap) + max_sectors = min_t(int, max_sectors, + BIO_MAX_VECS * (PAGE_SIZE >> 9)); if (max_sectors < bio_sectors(bio)) { struct bio *split = bio_split(bio, max_sectors, GFP_NOIO, &conf->bio_split); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 07119d7e0fdf..aa2636582841 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1712,6 +1712,11 @@ retry_discard: } else r10_bio->master_bio = (struct bio *)first_r10bio; + /* + * first select target devices under rcu_lock and + * inc refcount on their rdev. Record them by setting + * bios[x] to bio + */ rcu_read_lock(); for (disk = 0; disk < geo->raid_disks; disk++) { struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev); @@ -1743,9 +1748,6 @@ retry_discard: for (disk = 0; disk < geo->raid_disks; disk++) { sector_t dev_start, dev_end; struct bio *mbio, *rbio = NULL; - struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev); - struct md_rdev *rrdev = rcu_dereference( - conf->mirrors[disk].replacement); /* * Now start to calculate the start and end address for each disk. @@ -1775,9 +1777,12 @@ retry_discard: /* * It only handles discard bio which size is >= stripe size, so - * dev_end > dev_start all the time + * dev_end > dev_start all the time. + * It doesn't need to use rcu lock to get rdev here. We already + * add rdev->nr_pending in the first loop. */ if (r10_bio->devs[disk].bio) { + struct md_rdev *rdev = conf->mirrors[disk].rdev; mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); mbio->bi_end_io = raid10_end_discard_request; mbio->bi_private = r10_bio; @@ -1790,6 +1795,7 @@ retry_discard: bio_endio(mbio); } if (r10_bio->devs[disk].repl_bio) { + struct md_rdev *rrdev = conf->mirrors[disk].replacement; rbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); rbio->bi_end_io = raid10_end_discard_request; rbio->bi_private = r10_bio; diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index c3f3d77f1aac..dc0450ca23a3 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -33,12 +33,12 @@ config NVME_HWMON in the system. config NVME_FABRICS + select NVME_CORE tristate config NVME_RDMA tristate "NVM Express over Fabrics RDMA host driver" depends on INFINIBAND && INFINIBAND_ADDR_TRANS && BLOCK - select NVME_CORE select NVME_FABRICS select SG_POOL help @@ -55,7 +55,6 @@ config NVME_FC tristate "NVM Express over Fabrics FC host driver" depends on BLOCK depends on HAS_DMA - select NVME_CORE select NVME_FABRICS select SG_POOL help @@ -72,7 +71,6 @@ config NVME_TCP tristate "NVM Express over Fabrics TCP host driver" depends on INET depends on BLOCK - select NVME_CORE select NVME_FABRICS select CRYPTO select CRYPTO_CRC32C diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index cbc509784b2e..dfaacd472e5d 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -12,7 +12,6 @@ obj-$(CONFIG_NVME_TCP) += nvme-tcp.o nvme-core-y := core.o ioctl.o nvme-core-$(CONFIG_TRACING) += trace.o nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o -nvme-core-$(CONFIG_NVM) += lightnvm.o nvme-core-$(CONFIG_BLK_DEV_ZONED) += zns.o nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 68acd33c3856..8679a108f571 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -587,9 +587,6 @@ static void nvme_free_ns(struct kref *kref) { struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref); - if (ns->ndev) - nvme_nvm_unregister(ns); - put_disk(ns->disk); nvme_put_ns_head(ns->head); nvme_put_ctrl(ns->ctrl); @@ -1028,7 +1025,8 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req) return BLK_STS_IOERR; } - cmd->common.command_id = req->tag; + nvme_req(req)->genctr++; + cmd->common.command_id = nvme_cid(req); trace_nvme_setup_cmd(req, cmd); return ret; } @@ -3217,9 +3215,6 @@ static const struct attribute_group nvme_ns_id_attr_group = { const struct attribute_group *nvme_ns_id_attr_groups[] = { &nvme_ns_id_attr_group, -#ifdef CONFIG_NVM - &nvme_nvm_attr_group, -#endif NULL, }; @@ -3762,13 +3757,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, if (nvme_update_ns_info(ns, id)) goto out_unlink_ns; - if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { - if (nvme_nvm_register(ns, disk->disk_name, node)) { - dev_warn(ctrl->device, "LightNVM init failure\n"); - goto out_unlink_ns; - } - } - down_write(&ctrl->namespaces_rwsem); list_add_tail(&ns->list, &ctrl->namespaces); up_write(&ctrl->namespaces_rwsem); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index a5469fd9d4c3..668c6bb7a567 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -719,7 +719,6 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -EINVAL; goto out; } - nvmf_host_put(opts->host); opts->host = nvmf_host_add(p); kfree(p); if (!opts->host) { diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 305ddd415e45..22314962842d 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -342,9 +342,7 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, case NVME_IOCTL_IO64_CMD: return nvme_user_cmd64(ns->ctrl, ns, argp); default: - if (!ns->ndev) - return -ENOTTY; - return nvme_nvm_ioctl(ns, cmd, argp); + return -ENOTTY; } } diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c deleted file mode 100644 index e9d9ad47f70f..000000000000 --- a/drivers/nvme/host/lightnvm.c +++ /dev/null @@ -1,1274 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * nvme-lightnvm.c - LightNVM NVMe device - * - * Copyright (C) 2014-2015 IT University of Copenhagen - * Initial release: Matias Bjorling <mb@lightnvm.io> - */ - -#include "nvme.h" - -#include <linux/nvme.h> -#include <linux/bitops.h> -#include <linux/lightnvm.h> -#include <linux/vmalloc.h> -#include <linux/sched/sysctl.h> -#include <uapi/linux/lightnvm.h> - -enum nvme_nvm_admin_opcode { - nvme_nvm_admin_identity = 0xe2, - nvme_nvm_admin_get_bb_tbl = 0xf2, - nvme_nvm_admin_set_bb_tbl = 0xf1, -}; - -enum nvme_nvm_log_page { - NVME_NVM_LOG_REPORT_CHUNK = 0xca, -}; - -struct nvme_nvm_ph_rw { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd2; - __le64 metadata; - __le64 prp1; - __le64 prp2; - __le64 spba; - __le16 length; - __le16 control; - __le32 dsmgmt; - __le64 resv; -}; - -struct nvme_nvm_erase_blk { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd[2]; - __le64 prp1; - __le64 prp2; - __le64 spba; - __le16 length; - __le16 control; - __le32 dsmgmt; - __le64 resv; -}; - -struct nvme_nvm_identity { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd[2]; - __le64 prp1; - __le64 prp2; - __u32 rsvd11[6]; -}; - -struct nvme_nvm_getbbtbl { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd[2]; - __le64 prp1; - __le64 prp2; - __le64 spba; - __u32 rsvd4[4]; -}; - -struct nvme_nvm_setbbtbl { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __le64 rsvd[2]; - __le64 prp1; - __le64 prp2; - __le64 spba; - __le16 nlb; - __u8 value; - __u8 rsvd3; - __u32 rsvd4[3]; -}; - -struct nvme_nvm_command { - union { - struct nvme_common_command common; - struct nvme_nvm_ph_rw ph_rw; - struct nvme_nvm_erase_blk erase; - struct nvme_nvm_identity identity; - struct nvme_nvm_getbbtbl get_bb; - struct nvme_nvm_setbbtbl set_bb; - }; -}; - -struct nvme_nvm_id12_grp { - __u8 mtype; - __u8 fmtype; - __le16 res16; - __u8 num_ch; - __u8 num_lun; - __u8 num_pln; - __u8 rsvd1; - __le16 num_chk; - __le16 num_pg; - __le16 fpg_sz; - __le16 csecs; - __le16 sos; - __le16 rsvd2; - __le32 trdt; - __le32 trdm; - __le32 tprt; - __le32 tprm; - __le32 tbet; - __le32 tbem; - __le32 mpos; - __le32 mccap; - __le16 cpar; - __u8 reserved[906]; -} __packed; - -struct nvme_nvm_id12_addrf { - __u8 ch_offset; - __u8 ch_len; - __u8 lun_offset; - __u8 lun_len; - __u8 pln_offset; - __u8 pln_len; - __u8 blk_offset; - __u8 blk_len; - __u8 pg_offset; - __u8 pg_len; - __u8 sec_offset; - __u8 sec_len; - __u8 res[4]; -} __packed; - -struct nvme_nvm_id12 { - __u8 ver_id; - __u8 vmnt; - __u8 cgrps; - __u8 res; - __le32 cap; - __le32 dom; - struct nvme_nvm_id12_addrf ppaf; - __u8 resv[228]; - struct nvme_nvm_id12_grp grp; - __u8 resv2[2880]; -} __packed; - -struct nvme_nvm_bb_tbl { - __u8 tblid[4]; - __le16 verid; - __le16 revid; - __le32 rvsd1; - __le32 tblks; - __le32 tfact; - __le32 tgrown; - __le32 tdresv; - __le32 thresv; - __le32 rsvd2[8]; - __u8 blk[]; -}; - -struct nvme_nvm_id20_addrf { - __u8 grp_len; - __u8 pu_len; - __u8 chk_len; - __u8 lba_len; - __u8 resv[4]; -}; - -struct nvme_nvm_id20 { - __u8 mjr; - __u8 mnr; - __u8 resv[6]; - - struct nvme_nvm_id20_addrf lbaf; - - __le32 mccap; - __u8 resv2[12]; - - __u8 wit; - __u8 resv3[31]; - - /* Geometry */ - __le16 num_grp; - __le16 num_pu; - __le32 num_chk; - __le32 clba; - __u8 resv4[52]; - - /* Write data requirements */ - __le32 ws_min; - __le32 ws_opt; - __le32 mw_cunits; - __le32 maxoc; - __le32 maxocpu; - __u8 resv5[44]; - - /* Performance related metrics */ - __le32 trdt; - __le32 trdm; - __le32 twrt; - __le32 twrm; - __le32 tcrst; - __le32 tcrsm; - __u8 resv6[40]; - - /* Reserved area */ - __u8 resv7[2816]; - - /* Vendor specific */ - __u8 vs[1024]; -}; - -struct nvme_nvm_chk_meta { - __u8 state; - __u8 type; - __u8 wi; - __u8 rsvd[5]; - __le64 slba; - __le64 cnlb; - __le64 wp; -}; - -/* - * Check we didn't inadvertently grow the command struct - */ -static inline void _nvme_nvm_check_size(void) -{ - BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_grp) != 960); - BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_addrf) != 16); - BUILD_BUG_ON(sizeof(struct nvme_nvm_id12) != NVME_IDENTIFY_DATA_SIZE); - BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_id20_addrf) != 8); - BUILD_BUG_ON(sizeof(struct nvme_nvm_id20) != NVME_IDENTIFY_DATA_SIZE); - BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) != 32); - BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) != - sizeof(struct nvm_chk_meta)); -} - -static void nvme_nvm_set_addr_12(struct nvm_addrf_12 *dst, - struct nvme_nvm_id12_addrf *src) -{ - dst->ch_len = src->ch_len; - dst->lun_len = src->lun_len; - dst->blk_len = src->blk_len; - dst->pg_len = src->pg_len; - dst->pln_len = src->pln_len; - dst->sec_len = src->sec_len; - - dst->ch_offset = src->ch_offset; - dst->lun_offset = src->lun_offset; - dst->blk_offset = src->blk_offset; - dst->pg_offset = src->pg_offset; - dst->pln_offset = src->pln_offset; - dst->sec_offset = src->sec_offset; - - dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset; - dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset; - dst->blk_mask = ((1ULL << dst->blk_len) - 1) << dst->blk_offset; - dst->pg_mask = ((1ULL << dst->pg_len) - 1) << dst->pg_offset; - dst->pln_mask = ((1ULL << dst->pln_len) - 1) << dst->pln_offset; - dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset; -} - -static int nvme_nvm_setup_12(struct nvme_nvm_id12 *id, - struct nvm_geo *geo) -{ - struct nvme_nvm_id12_grp *src; - int sec_per_pg, sec_per_pl, pg_per_blk; - - if (id->cgrps != 1) - return -EINVAL; - - src = &id->grp; - - if (src->mtype != 0) { - pr_err("nvm: memory type not supported\n"); - return -EINVAL; - } - - /* 1.2 spec. only reports a single version id - unfold */ - geo->major_ver_id = id->ver_id; - geo->minor_ver_id = 2; - - /* Set compacted version for upper layers */ - geo->version = NVM_OCSSD_SPEC_12; - - geo->num_ch = src->num_ch; - geo->num_lun = src->num_lun; - geo->all_luns = geo->num_ch * geo->num_lun; - - geo->num_chk = le16_to_cpu(src->num_chk); - - geo->csecs = le16_to_cpu(src->csecs); - geo->sos = le16_to_cpu(src->sos); - - pg_per_blk = le16_to_cpu(src->num_pg); - sec_per_pg = le16_to_cpu(src->fpg_sz) / geo->csecs; - sec_per_pl = sec_per_pg * src->num_pln; - geo->clba = sec_per_pl * pg_per_blk; - - geo->all_chunks = geo->all_luns * geo->num_chk; - geo->total_secs = geo->clba * geo->all_chunks; - - geo->ws_min = sec_per_pg; - geo->ws_opt = sec_per_pg; - geo->mw_cunits = geo->ws_opt << 3; /* default to MLC safe values */ - - /* Do not impose values for maximum number of open blocks as it is - * unspecified in 1.2. Users of 1.2 must be aware of this and eventually - * specify these values through a quirk if restrictions apply. - */ - geo->maxoc = geo->all_luns * geo->num_chk; - geo->maxocpu = geo->num_chk; - - geo->mccap = le32_to_cpu(src->mccap); - - geo->trdt = le32_to_cpu(src->trdt); - geo->trdm = le32_to_cpu(src->trdm); - geo->tprt = le32_to_cpu(src->tprt); - geo->tprm = le32_to_cpu(src->tprm); - geo->tbet = le32_to_cpu(src->tbet); - geo->tbem = le32_to_cpu(src->tbem); - - /* 1.2 compatibility */ - geo->vmnt = id->vmnt; - geo->cap = le32_to_cpu(id->cap); - geo->dom = le32_to_cpu(id->dom); - - geo->mtype = src->mtype; - geo->fmtype = src->fmtype; - - geo->cpar = le16_to_cpu(src->cpar); - geo->mpos = le32_to_cpu(src->mpos); - - geo->pln_mode = NVM_PLANE_SINGLE; - - if (geo->mpos & 0x020202) { - geo->pln_mode = NVM_PLANE_DOUBLE; - geo->ws_opt <<= 1; - } else if (geo->mpos & 0x040404) { - geo->pln_mode = NVM_PLANE_QUAD; - geo->ws_opt <<= 2; - } - - geo->num_pln = src->num_pln; - geo->num_pg = le16_to_cpu(src->num_pg); - geo->fpg_sz = le16_to_cpu(src->fpg_sz); - - nvme_nvm_set_addr_12((struct nvm_addrf_12 *)&geo->addrf, &id->ppaf); - - return 0; -} - -static void nvme_nvm_set_addr_20(struct nvm_addrf *dst, - struct nvme_nvm_id20_addrf *src) -{ - dst->ch_len = src->grp_len; - dst->lun_len = src->pu_len; - dst->chk_len = src->chk_len; - dst->sec_len = src->lba_len; - - dst->sec_offset = 0; - dst->chk_offset = dst->sec_len; - dst->lun_offset = dst->chk_offset + dst->chk_len; - dst->ch_offset = dst->lun_offset + dst->lun_len; - - dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset; - dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset; - dst->chk_mask = ((1ULL << dst->chk_len) - 1) << dst->chk_offset; - dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset; -} - -static int nvme_nvm_setup_20(struct nvme_nvm_id20 *id, - struct nvm_geo *geo) -{ - geo->major_ver_id = id->mjr; - geo->minor_ver_id = id->mnr; - - /* Set compacted version for upper layers */ - geo->version = NVM_OCSSD_SPEC_20; - - geo->num_ch = le16_to_cpu(id->num_grp); - geo->num_lun = le16_to_cpu(id->num_pu); - geo->all_luns = geo->num_ch * geo->num_lun; - - geo->num_chk = le32_to_cpu(id->num_chk); - geo->clba = le32_to_cpu(id->clba); - - geo->all_chunks = geo->all_luns * geo->num_chk; - geo->total_secs = geo->clba * geo->all_chunks; - - geo->ws_min = le32_to_cpu(id->ws_min); - geo->ws_opt = le32_to_cpu(id->ws_opt); - geo->mw_cunits = le32_to_cpu(id->mw_cunits); - geo->maxoc = le32_to_cpu(id->maxoc); - geo->maxocpu = le32_to_cpu(id->maxocpu); - - geo->trdt = le32_to_cpu(id->trdt); - geo->trdm = le32_to_cpu(id->trdm); - geo->tprt = le32_to_cpu(id->twrt); - geo->tprm = le32_to_cpu(id->twrm); - geo->tbet = le32_to_cpu(id->tcrst); - geo->tbem = le32_to_cpu(id->tcrsm); - - nvme_nvm_set_addr_20(&geo->addrf, &id->lbaf); - - return 0; -} - -static int nvme_nvm_identity(struct nvm_dev *nvmdev) -{ - struct nvme_ns *ns = nvmdev->q->queuedata; - struct nvme_nvm_id12 *id; - struct nvme_nvm_command c = {}; - int ret; - - c.identity.opcode = nvme_nvm_admin_identity; - c.identity.nsid = cpu_to_le32(ns->head->ns_id); - - id = kmalloc(sizeof(struct nvme_nvm_id12), GFP_KERNEL); - if (!id) - return -ENOMEM; - - ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c, - id, sizeof(struct nvme_nvm_id12)); - if (ret) { - ret = -EIO; - goto out; - } - - /* - * The 1.2 and 2.0 specifications share the first byte in their geometry - * command to make it possible to know what version a device implements. - */ - switch (id->ver_id) { - case 1: - ret = nvme_nvm_setup_12(id, &nvmdev->geo); - break; - case 2: - ret = nvme_nvm_setup_20((struct nvme_nvm_id20 *)id, - &nvmdev->geo); - break; - default: - dev_err(ns->ctrl->device, "OCSSD revision not supported (%d)\n", - id->ver_id); - ret = -EINVAL; - } - -out: - kfree(id); - return ret; -} - -static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, - u8 *blks) -{ - struct request_queue *q = nvmdev->q; - struct nvm_geo *geo = &nvmdev->geo; - struct nvme_ns *ns = q->queuedata; - struct nvme_ctrl *ctrl = ns->ctrl; - struct nvme_nvm_command c = {}; - struct nvme_nvm_bb_tbl *bb_tbl; - int nr_blks = geo->num_chk * geo->num_pln; - int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blks; - int ret = 0; - - c.get_bb.opcode = nvme_nvm_admin_get_bb_tbl; - c.get_bb.nsid = cpu_to_le32(ns->head->ns_id); - c.get_bb.spba = cpu_to_le64(ppa.ppa); - - bb_tbl = kzalloc(tblsz, GFP_KERNEL); - if (!bb_tbl) - return -ENOMEM; - - ret = nvme_submit_sync_cmd(ctrl->admin_q, (struct nvme_command *)&c, - bb_tbl, tblsz); - if (ret) { - dev_err(ctrl->device, "get bad block table failed (%d)\n", ret); - ret = -EIO; - goto out; - } - - if (bb_tbl->tblid[0] != 'B' || bb_tbl->tblid[1] != 'B' || - bb_tbl->tblid[2] != 'L' || bb_tbl->tblid[3] != 'T') { - dev_err(ctrl->device, "bbt format mismatch\n"); - ret = -EINVAL; - goto out; - } - - if (le16_to_cpu(bb_tbl->verid) != 1) { - ret = -EINVAL; - dev_err(ctrl->device, "bbt version not supported\n"); - goto out; - } - - if (le32_to_cpu(bb_tbl->tblks) != nr_blks) { - ret = -EINVAL; - dev_err(ctrl->device, - "bbt unsuspected blocks returned (%u!=%u)", - le32_to_cpu(bb_tbl->tblks), nr_blks); - goto out; - } - - memcpy(blks, bb_tbl->blk, geo->num_chk * geo->num_pln); -out: - kfree(bb_tbl); - return ret; -} - -static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas, - int nr_ppas, int type) -{ - struct nvme_ns *ns = nvmdev->q->queuedata; - struct nvme_nvm_command c = {}; - int ret = 0; - - c.set_bb.opcode = nvme_nvm_admin_set_bb_tbl; - c.set_bb.nsid = cpu_to_le32(ns->head->ns_id); - c.set_bb.spba = cpu_to_le64(ppas->ppa); - c.set_bb.nlb = cpu_to_le16(nr_ppas - 1); - c.set_bb.value = type; - - ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c, - NULL, 0); - if (ret) - dev_err(ns->ctrl->device, "set bad block table failed (%d)\n", - ret); - return ret; -} - -/* - * Expect the lba in device format - */ -static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, - sector_t slba, int nchks, - struct nvm_chk_meta *meta) -{ - struct nvm_geo *geo = &ndev->geo; - struct nvme_ns *ns = ndev->q->queuedata; - struct nvme_ctrl *ctrl = ns->ctrl; - struct nvme_nvm_chk_meta *dev_meta, *dev_meta_off; - struct ppa_addr ppa; - size_t left = nchks * sizeof(struct nvme_nvm_chk_meta); - size_t log_pos, offset, len; - int i, max_len; - int ret = 0; - - /* - * limit requests to maximum 256K to avoid issuing arbitrary large - * requests when the device does not specific a maximum transfer size. - */ - max_len = min_t(unsigned int, ctrl->max_hw_sectors << 9, 256 * 1024); - - dev_meta = kmalloc(max_len, GFP_KERNEL); - if (!dev_meta) - return -ENOMEM; - - /* Normalize lba address space to obtain log offset */ - ppa.ppa = slba; - ppa = dev_to_generic_addr(ndev, ppa); - - log_pos = ppa.m.chk; - log_pos += ppa.m.pu * geo->num_chk; - log_pos += ppa.m.grp * geo->num_lun * geo->num_chk; - - offset = log_pos * sizeof(struct nvme_nvm_chk_meta); - - while (left) { - len = min_t(unsigned int, left, max_len); - - memset(dev_meta, 0, max_len); - dev_meta_off = dev_meta; - - ret = nvme_get_log(ctrl, ns->head->ns_id, - NVME_NVM_LOG_REPORT_CHUNK, 0, NVME_CSI_NVM, - dev_meta, len, offset); - if (ret) { - dev_err(ctrl->device, "Get REPORT CHUNK log error\n"); - break; - } - - for (i = 0; i < len; i += sizeof(struct nvme_nvm_chk_meta)) { - meta->state = dev_meta_off->state; - meta->type = dev_meta_off->type; - meta->wi = dev_meta_off->wi; - meta->slba = le64_to_cpu(dev_meta_off->slba); - meta->cnlb = le64_to_cpu(dev_meta_off->cnlb); - meta->wp = le64_to_cpu(dev_meta_off->wp); - - meta++; - dev_meta_off++; - } - - offset += len; - left -= len; - } - - kfree(dev_meta); - - return ret; -} - -static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns, - struct nvme_nvm_command *c) -{ - c->ph_rw.opcode = rqd->opcode; - c->ph_rw.nsid = cpu_to_le32(ns->head->ns_id); - c->ph_rw.spba = cpu_to_le64(rqd->ppa_addr.ppa); - c->ph_rw.metadata = cpu_to_le64(rqd->dma_meta_list); - c->ph_rw.control = cpu_to_le16(rqd->flags); - c->ph_rw.length = cpu_to_le16(rqd->nr_ppas - 1); -} - -static void nvme_nvm_end_io(struct request *rq, blk_status_t status) -{ - struct nvm_rq *rqd = rq->end_io_data; - - rqd->ppa_status = le64_to_cpu(nvme_req(rq)->result.u64); - rqd->error = nvme_req(rq)->status; - nvm_end_io(rqd); - - kfree(nvme_req(rq)->cmd); - blk_mq_free_request(rq); -} - -static struct request *nvme_nvm_alloc_request(struct request_queue *q, - struct nvm_rq *rqd, - struct nvme_nvm_command *cmd) -{ - struct nvme_ns *ns = q->queuedata; - struct request *rq; - - nvme_nvm_rqtocmd(rqd, ns, cmd); - - rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0); - if (IS_ERR(rq)) - return rq; - - rq->cmd_flags &= ~REQ_FAILFAST_DRIVER; - - if (rqd->bio) - blk_rq_append_bio(rq, rqd->bio); - else - rq->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM); - - return rq; -} - -static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd, - void *buf) -{ - struct nvm_geo *geo = &dev->geo; - struct request_queue *q = dev->q; - struct nvme_nvm_command *cmd; - struct request *rq; - int ret; - - cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL); - if (!cmd) - return -ENOMEM; - - rq = nvme_nvm_alloc_request(q, rqd, cmd); - if (IS_ERR(rq)) { - ret = PTR_ERR(rq); - goto err_free_cmd; - } - - if (buf) { - ret = blk_rq_map_kern(q, rq, buf, geo->csecs * rqd->nr_ppas, - GFP_KERNEL); - if (ret) - goto err_free_cmd; - } - - rq->end_io_data = rqd; - - blk_execute_rq_nowait(NULL, rq, 0, nvme_nvm_end_io); - - return 0; - -err_free_cmd: - kfree(cmd); - return ret; -} - -static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name, - int size) -{ - struct nvme_ns *ns = nvmdev->q->queuedata; - - return dma_pool_create(name, ns->ctrl->dev, size, PAGE_SIZE, 0); -} - -static void nvme_nvm_destroy_dma_pool(void *pool) -{ - struct dma_pool *dma_pool = pool; - - dma_pool_destroy(dma_pool); -} - -static void *nvme_nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool, - gfp_t mem_flags, dma_addr_t *dma_handler) -{ - return dma_pool_alloc(pool, mem_flags, dma_handler); -} - -static void nvme_nvm_dev_dma_free(void *pool, void *addr, - dma_addr_t dma_handler) -{ - dma_pool_free(pool, addr, dma_handler); -} - -static struct nvm_dev_ops nvme_nvm_dev_ops = { - .identity = nvme_nvm_identity, - - .get_bb_tbl = nvme_nvm_get_bb_tbl, - .set_bb_tbl = nvme_nvm_set_bb_tbl, - - .get_chk_meta = nvme_nvm_get_chk_meta, - - .submit_io = nvme_nvm_submit_io, - - .create_dma_pool = nvme_nvm_create_dma_pool, - .destroy_dma_pool = nvme_nvm_destroy_dma_pool, - .dev_dma_alloc = nvme_nvm_dev_dma_alloc, - .dev_dma_free = nvme_nvm_dev_dma_free, -}; - -static int nvme_nvm_submit_user_cmd(struct request_queue *q, - struct nvme_ns *ns, - struct nvme_nvm_command *vcmd, - void __user *ubuf, unsigned int bufflen, - void __user *meta_buf, unsigned int meta_len, - void __user *ppa_buf, unsigned int ppa_len, - u32 *result, u64 *status, unsigned int timeout) -{ - bool write = nvme_is_write((struct nvme_command *)vcmd); - struct nvm_dev *dev = ns->ndev; - struct request *rq; - struct bio *bio = NULL; - __le64 *ppa_list = NULL; - dma_addr_t ppa_dma; - __le64 *metadata = NULL; - dma_addr_t metadata_dma; - DECLARE_COMPLETION_ONSTACK(wait); - int ret = 0; - - rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0); - if (IS_ERR(rq)) { - ret = -ENOMEM; - goto err_cmd; - } - - if (timeout) - rq->timeout = timeout; - - if (ppa_buf && ppa_len) { - ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma); - if (!ppa_list) { - ret = -ENOMEM; - goto err_rq; - } - if (copy_from_user(ppa_list, (void __user *)ppa_buf, - sizeof(u64) * (ppa_len + 1))) { - ret = -EFAULT; - goto err_ppa; - } - vcmd->ph_rw.spba = cpu_to_le64(ppa_dma); - } else { - vcmd->ph_rw.spba = cpu_to_le64((uintptr_t)ppa_buf); - } - - if (ubuf && bufflen) { - ret = blk_rq_map_user(q, rq, NULL, ubuf, bufflen, GFP_KERNEL); - if (ret) - goto err_ppa; - bio = rq->bio; - - if (meta_buf && meta_len) { - metadata = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, - &metadata_dma); - if (!metadata) { - ret = -ENOMEM; - goto err_map; - } - - if (write) { - if (copy_from_user(metadata, - (void __user *)meta_buf, - meta_len)) { - ret = -EFAULT; - goto err_meta; - } - } - vcmd->ph_rw.metadata = cpu_to_le64(metadata_dma); - } - - bio_set_dev(bio, ns->disk->part0); - } - - blk_execute_rq(NULL, rq, 0); - - if (nvme_req(rq)->flags & NVME_REQ_CANCELLED) - ret = -EINTR; - else if (nvme_req(rq)->status & 0x7ff) - ret = -EIO; - if (result) - *result = nvme_req(rq)->status & 0x7ff; - if (status) - *status = le64_to_cpu(nvme_req(rq)->result.u64); - - if (metadata && !ret && !write) { - if (copy_to_user(meta_buf, (void *)metadata, meta_len)) - ret = -EFAULT; - } -err_meta: - if (meta_buf && meta_len) - dma_pool_free(dev->dma_pool, metadata, metadata_dma); -err_map: - if (bio) - blk_rq_unmap_user(bio); -err_ppa: - if (ppa_buf && ppa_len) - dma_pool_free(dev->dma_pool, ppa_list, ppa_dma); -err_rq: - blk_mq_free_request(rq); -err_cmd: - return ret; -} - -static int nvme_nvm_submit_vio(struct nvme_ns *ns, - struct nvm_user_vio __user *uvio) -{ - struct nvm_user_vio vio; - struct nvme_nvm_command c; - unsigned int length; - int ret; - - if (copy_from_user(&vio, uvio, sizeof(vio))) - return -EFAULT; - if (vio.flags) - return -EINVAL; - - memset(&c, 0, sizeof(c)); - c.ph_rw.opcode = vio.opcode; - c.ph_rw.nsid = cpu_to_le32(ns->head->ns_id); - c.ph_rw.control = cpu_to_le16(vio.control); - c.ph_rw.length = cpu_to_le16(vio.nppas); - - length = (vio.nppas + 1) << ns->lba_shift; - - ret = nvme_nvm_submit_user_cmd(ns->queue, ns, &c, - (void __user *)(uintptr_t)vio.addr, length, - (void __user *)(uintptr_t)vio.metadata, - vio.metadata_len, - (void __user *)(uintptr_t)vio.ppa_list, vio.nppas, - &vio.result, &vio.status, 0); - - if (ret && copy_to_user(uvio, &vio, sizeof(vio))) - return -EFAULT; - - return ret; -} - -static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin, - struct nvm_passthru_vio __user *uvcmd) -{ - struct nvm_passthru_vio vcmd; - struct nvme_nvm_command c; - struct request_queue *q; - unsigned int timeout = 0; - int ret; - - if (copy_from_user(&vcmd, uvcmd, sizeof(vcmd))) - return -EFAULT; - if ((vcmd.opcode != 0xF2) && (!capable(CAP_SYS_ADMIN))) - return -EACCES; - if (vcmd.flags) - return -EINVAL; - - memset(&c, 0, sizeof(c)); - c.common.opcode = vcmd.opcode; - c.common.nsid = cpu_to_le32(ns->head->ns_id); - c.common.cdw2[0] = cpu_to_le32(vcmd.cdw2); - c.common.cdw2[1] = cpu_to_le32(vcmd.cdw3); - /* cdw11-12 */ - c.ph_rw.length = cpu_to_le16(vcmd.nppas); - c.ph_rw.control = cpu_to_le16(vcmd.control); - c.common.cdw13 = cpu_to_le32(vcmd.cdw13); - c.common.cdw14 = cpu_to_le32(vcmd.cdw14); - c.common.cdw15 = cpu_to_le32(vcmd.cdw15); - - if (vcmd.timeout_ms) - timeout = msecs_to_jiffies(vcmd.timeout_ms); - - q = admin ? ns->ctrl->admin_q : ns->queue; - - ret = nvme_nvm_submit_user_cmd(q, ns, - (struct nvme_nvm_command *)&c, - (void __user *)(uintptr_t)vcmd.addr, vcmd.data_len, - (void __user *)(uintptr_t)vcmd.metadata, - vcmd.metadata_len, - (void __user *)(uintptr_t)vcmd.ppa_list, vcmd.nppas, - &vcmd.result, &vcmd.status, timeout); - - if (ret && copy_to_user(uvcmd, &vcmd, sizeof(vcmd))) - return -EFAULT; - - return ret; -} - -int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *argp) -{ - switch (cmd) { - case NVME_NVM_IOCTL_ADMIN_VIO: - return nvme_nvm_user_vcmd(ns, 1, argp); - case NVME_NVM_IOCTL_IO_VIO: - return nvme_nvm_user_vcmd(ns, 0, argp); - case NVME_NVM_IOCTL_SUBMIT_VIO: - return nvme_nvm_submit_vio(ns, argp); - default: - return -ENOTTY; - } -} - -int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) -{ - struct request_queue *q = ns->queue; - struct nvm_dev *dev; - struct nvm_geo *geo; - - _nvme_nvm_check_size(); - - dev = nvm_alloc_dev(node); - if (!dev) - return -ENOMEM; - - /* Note that csecs and sos will be overridden if it is a 1.2 drive. */ - geo = &dev->geo; - geo->csecs = 1 << ns->lba_shift; - geo->sos = ns->ms; - if (ns->features & NVME_NS_EXT_LBAS) - geo->ext = true; - else - geo->ext = false; - geo->mdts = ns->ctrl->max_hw_sectors; - - dev->q = q; - memcpy(dev->name, disk_name, DISK_NAME_LEN); - dev->ops = &nvme_nvm_dev_ops; - dev->private_data = ns; - ns->ndev = dev; - - return nvm_register(dev); -} - -void nvme_nvm_unregister(struct nvme_ns *ns) -{ - nvm_unregister(ns->ndev); -} - -static ssize_t nvm_dev_attr_show(struct device *dev, - struct device_attribute *dattr, char *page) -{ - struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - struct nvm_dev *ndev = ns->ndev; - struct nvm_geo *geo = &ndev->geo; - struct attribute *attr; - - if (!ndev) - return 0; - - attr = &dattr->attr; - - if (strcmp(attr->name, "version") == 0) { - if (geo->major_ver_id == 1) - return scnprintf(page, PAGE_SIZE, "%u\n", - geo->major_ver_id); - else - return scnprintf(page, PAGE_SIZE, "%u.%u\n", - geo->major_ver_id, - geo->minor_ver_id); - } else if (strcmp(attr->name, "capabilities") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->cap); - } else if (strcmp(attr->name, "read_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdt); - } else if (strcmp(attr->name, "read_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdm); - } else { - return scnprintf(page, - PAGE_SIZE, - "Unhandled attr(%s) in `%s`\n", - attr->name, __func__); - } -} - -static ssize_t nvm_dev_attr_show_ppaf(struct nvm_addrf_12 *ppaf, char *page) -{ - return scnprintf(page, PAGE_SIZE, - "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", - ppaf->ch_offset, ppaf->ch_len, - ppaf->lun_offset, ppaf->lun_len, - ppaf->pln_offset, ppaf->pln_len, - ppaf->blk_offset, ppaf->blk_len, - ppaf->pg_offset, ppaf->pg_len, - ppaf->sec_offset, ppaf->sec_len); -} - -static ssize_t nvm_dev_attr_show_12(struct device *dev, - struct device_attribute *dattr, char *page) -{ - struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - struct nvm_dev *ndev = ns->ndev; - struct nvm_geo *geo = &ndev->geo; - struct attribute *attr; - - if (!ndev) - return 0; - - attr = &dattr->attr; - - if (strcmp(attr->name, "vendor_opcode") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->vmnt); - } else if (strcmp(attr->name, "device_mode") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->dom); - /* kept for compatibility */ - } else if (strcmp(attr->name, "media_manager") == 0) { - return scnprintf(page, PAGE_SIZE, "%s\n", "gennvm"); - } else if (strcmp(attr->name, "ppa_format") == 0) { - return nvm_dev_attr_show_ppaf((void *)&geo->addrf, page); - } else if (strcmp(attr->name, "media_type") == 0) { /* u8 */ - return scnprintf(page, PAGE_SIZE, "%u\n", geo->mtype); - } else if (strcmp(attr->name, "flash_media_type") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->fmtype); - } else if (strcmp(attr->name, "num_channels") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch); - } else if (strcmp(attr->name, "num_luns") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun); - } else if (strcmp(attr->name, "num_planes") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pln); - } else if (strcmp(attr->name, "num_blocks") == 0) { /* u16 */ - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk); - } else if (strcmp(attr->name, "num_pages") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pg); - } else if (strcmp(attr->name, "page_size") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->fpg_sz); - } else if (strcmp(attr->name, "hw_sector_size") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->csecs); - } else if (strcmp(attr->name, "oob_sector_size") == 0) {/* u32 */ - return scnprintf(page, PAGE_SIZE, "%u\n", geo->sos); - } else if (strcmp(attr->name, "prog_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt); - } else if (strcmp(attr->name, "prog_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm); - } else if (strcmp(attr->name, "erase_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet); - } else if (strcmp(attr->name, "erase_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem); - } else if (strcmp(attr->name, "multiplane_modes") == 0) { - return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mpos); - } else if (strcmp(attr->name, "media_capabilities") == 0) { - return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mccap); - } else if (strcmp(attr->name, "max_phys_secs") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", NVM_MAX_VLBA); - } else { - return scnprintf(page, PAGE_SIZE, - "Unhandled attr(%s) in `%s`\n", - attr->name, __func__); - } -} - -static ssize_t nvm_dev_attr_show_20(struct device *dev, - struct device_attribute *dattr, char *page) -{ - struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - struct nvm_dev *ndev = ns->ndev; - struct nvm_geo *geo = &ndev->geo; - struct attribute *attr; - - if (!ndev) - return 0; - - attr = &dattr->attr; - - if (strcmp(attr->name, "groups") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch); - } else if (strcmp(attr->name, "punits") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun); - } else if (strcmp(attr->name, "chunks") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk); - } else if (strcmp(attr->name, "clba") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->clba); - } else if (strcmp(attr->name, "ws_min") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_min); - } else if (strcmp(attr->name, "ws_opt") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_opt); - } else if (strcmp(attr->name, "maxoc") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxoc); - } else if (strcmp(attr->name, "maxocpu") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxocpu); - } else if (strcmp(attr->name, "mw_cunits") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->mw_cunits); - } else if (strcmp(attr->name, "write_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt); - } else if (strcmp(attr->name, "write_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm); - } else if (strcmp(attr->name, "reset_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet); - } else if (strcmp(attr->name, "reset_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem); - } else { - return scnprintf(page, PAGE_SIZE, - "Unhandled attr(%s) in `%s`\n", - attr->name, __func__); - } -} - -#define NVM_DEV_ATTR_RO(_name) \ - DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show, NULL) -#define NVM_DEV_ATTR_12_RO(_name) \ - DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_12, NULL) -#define NVM_DEV_ATTR_20_RO(_name) \ - DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_20, NULL) - -/* general attributes */ -static NVM_DEV_ATTR_RO(version); -static NVM_DEV_ATTR_RO(capabilities); - -static NVM_DEV_ATTR_RO(read_typ); -static NVM_DEV_ATTR_RO(read_max); - -/* 1.2 values */ -static NVM_DEV_ATTR_12_RO(vendor_opcode); -static NVM_DEV_ATTR_12_RO(device_mode); -static NVM_DEV_ATTR_12_RO(ppa_format); -static NVM_DEV_ATTR_12_RO(media_manager); -static NVM_DEV_ATTR_12_RO(media_type); -static NVM_DEV_ATTR_12_RO(flash_media_type); -static NVM_DEV_ATTR_12_RO(num_channels); -static NVM_DEV_ATTR_12_RO(num_luns); -static NVM_DEV_ATTR_12_RO(num_planes); -static NVM_DEV_ATTR_12_RO(num_blocks); -static NVM_DEV_ATTR_12_RO(num_pages); -static NVM_DEV_ATTR_12_RO(page_size); -static NVM_DEV_ATTR_12_RO(hw_sector_size); -static NVM_DEV_ATTR_12_RO(oob_sector_size); -static NVM_DEV_ATTR_12_RO(prog_typ); -static NVM_DEV_ATTR_12_RO(prog_max); -static NVM_DEV_ATTR_12_RO(erase_typ); -static NVM_DEV_ATTR_12_RO(erase_max); -static NVM_DEV_ATTR_12_RO(multiplane_modes); -static NVM_DEV_ATTR_12_RO(media_capabilities); -static NVM_DEV_ATTR_12_RO(max_phys_secs); - -/* 2.0 values */ -static NVM_DEV_ATTR_20_RO(groups); -static NVM_DEV_ATTR_20_RO(punits); -static NVM_DEV_ATTR_20_RO(chunks); -static NVM_DEV_ATTR_20_RO(clba); -static NVM_DEV_ATTR_20_RO(ws_min); -static NVM_DEV_ATTR_20_RO(ws_opt); -static NVM_DEV_ATTR_20_RO(maxoc); -static NVM_DEV_ATTR_20_RO(maxocpu); -static NVM_DEV_ATTR_20_RO(mw_cunits); -static NVM_DEV_ATTR_20_RO(write_typ); -static NVM_DEV_ATTR_20_RO(write_max); -static NVM_DEV_ATTR_20_RO(reset_typ); -static NVM_DEV_ATTR_20_RO(reset_max); - -static struct attribute *nvm_dev_attrs[] = { - /* version agnostic attrs */ - &dev_attr_version.attr, - &dev_attr_capabilities.attr, - &dev_attr_read_typ.attr, - &dev_attr_read_max.attr, - - /* 1.2 attrs */ - &dev_attr_vendor_opcode.attr, - &dev_attr_device_mode.attr, - &dev_attr_media_manager.attr, - &dev_attr_ppa_format.attr, - &dev_attr_media_type.attr, - &dev_attr_flash_media_type.attr, - &dev_attr_num_channels.attr, - &dev_attr_num_luns.attr, - &dev_attr_num_planes.attr, - &dev_attr_num_blocks.attr, - &dev_attr_num_pages.attr, - &dev_attr_page_size.attr, - &dev_attr_hw_sector_size.attr, - &dev_attr_oob_sector_size.attr, - &dev_attr_prog_typ.attr, - &dev_attr_prog_max.attr, - &dev_attr_erase_typ.attr, - &dev_attr_erase_max.attr, - &dev_attr_multiplane_modes.attr, - &dev_attr_media_capabilities.attr, - &dev_attr_max_phys_secs.attr, - - /* 2.0 attrs */ - &dev_attr_groups.attr, - &dev_attr_punits.attr, - &dev_attr_chunks.attr, - &dev_attr_clba.attr, - &dev_attr_ws_min.attr, - &dev_attr_ws_opt.attr, - &dev_attr_maxoc.attr, - &dev_attr_maxocpu.attr, - &dev_attr_mw_cunits.attr, - - &dev_attr_write_typ.attr, - &dev_attr_write_max.attr, - &dev_attr_reset_typ.attr, - &dev_attr_reset_max.attr, - - NULL, -}; - -static umode_t nvm_dev_attrs_visible(struct kobject *kobj, - struct attribute *attr, int index) -{ - struct device *dev = kobj_to_dev(kobj); - struct gendisk *disk = dev_to_disk(dev); - struct nvme_ns *ns = disk->private_data; - struct nvm_dev *ndev = ns->ndev; - struct device_attribute *dev_attr = - container_of(attr, typeof(*dev_attr), attr); - - if (!ndev) - return 0; - - if (dev_attr->show == nvm_dev_attr_show) - return attr->mode; - - switch (ndev->geo.major_ver_id) { - case 1: - if (dev_attr->show == nvm_dev_attr_show_12) - return attr->mode; - break; - case 2: - if (dev_attr->show == nvm_dev_attr_show_20) - return attr->mode; - break; - } - - return 0; -} - -const struct attribute_group nvme_nvm_attr_group = { - .name = "lightnvm", - .attrs = nvm_dev_attrs, - .is_visible = nvm_dev_attrs_visible, -}; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 5cd1fa3b8464..a2e1f298b217 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -11,7 +11,6 @@ #include <linux/pci.h> #include <linux/kref.h> #include <linux/blk-mq.h> -#include <linux/lightnvm.h> #include <linux/sed-opal.h> #include <linux/fault-inject.h> #include <linux/rcupdate.h> @@ -48,11 +47,6 @@ extern struct workqueue_struct *nvme_wq; extern struct workqueue_struct *nvme_reset_wq; extern struct workqueue_struct *nvme_delete_wq; -enum { - NVME_NS_LBA = 0, - NVME_NS_LIGHTNVM = 1, -}; - /* * List of workarounds for devices that required behavior not specified in * the standard. @@ -93,11 +87,6 @@ enum nvme_quirks { NVME_QUIRK_NO_DEEPEST_PS = (1 << 5), /* - * Supports the LighNVM command set if indicated in vs[1]. - */ - NVME_QUIRK_LIGHTNVM = (1 << 6), - - /* * Set MEDIUM priority on SQ creation */ NVME_QUIRK_MEDIUM_PRIO_SQ = (1 << 7), @@ -158,6 +147,7 @@ enum nvme_quirks { struct nvme_request { struct nvme_command *cmd; union nvme_result result; + u8 genctr; u8 retries; u8 flags; u16 status; @@ -449,7 +439,6 @@ struct nvme_ns { u32 ana_grpid; #endif struct list_head siblings; - struct nvm_dev *ndev; struct kref kref; struct nvme_ns_head *head; @@ -497,6 +486,49 @@ struct nvme_ctrl_ops { int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); }; +/* + * nvme command_id is constructed as such: + * | xxxx | xxxxxxxxxxxx | + * gen request tag + */ +#define nvme_genctr_mask(gen) (gen & 0xf) +#define nvme_cid_install_genctr(gen) (nvme_genctr_mask(gen) << 12) +#define nvme_genctr_from_cid(cid) ((cid & 0xf000) >> 12) +#define nvme_tag_from_cid(cid) (cid & 0xfff) + +static inline u16 nvme_cid(struct request *rq) +{ + return nvme_cid_install_genctr(nvme_req(rq)->genctr) | rq->tag; +} + +static inline struct request *nvme_find_rq(struct blk_mq_tags *tags, + u16 command_id) +{ + u8 genctr = nvme_genctr_from_cid(command_id); + u16 tag = nvme_tag_from_cid(command_id); + struct request *rq; + + rq = blk_mq_tag_to_rq(tags, tag); + if (unlikely(!rq)) { + pr_err("could not locate request for tag %#x\n", + tag); + return NULL; + } + if (unlikely(nvme_genctr_mask(nvme_req(rq)->genctr) != genctr)) { + dev_err(nvme_req(rq)->ctrl->device, + "request %#x genctr mismatch (got %#x expected %#x)\n", + tag, genctr, nvme_genctr_mask(nvme_req(rq)->genctr)); + return NULL; + } + return rq; +} + +static inline struct request *nvme_cid_to_rq(struct blk_mq_tags *tags, + u16 command_id) +{ + return blk_mq_tag_to_rq(tags, nvme_tag_from_cid(command_id)); +} + #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj, const char *dev_name); @@ -594,7 +626,8 @@ static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl) static inline bool nvme_is_aen_req(u16 qid, __u16 command_id) { - return !qid && command_id >= NVME_AQ_BLK_MQ_DEPTH; + return !qid && + nvme_tag_from_cid(command_id) >= NVME_AQ_BLK_MQ_DEPTH; } void nvme_complete_rq(struct request *req); @@ -823,26 +856,6 @@ static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) } #endif -#ifdef CONFIG_NVM -int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); -void nvme_nvm_unregister(struct nvme_ns *ns); -extern const struct attribute_group nvme_nvm_attr_group; -int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *argp); -#else -static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, - int node) -{ - return 0; -} - -static inline void nvme_nvm_unregister(struct nvme_ns *ns) {}; -static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, - void __user *argp) -{ - return -ENOTTY; -} -#endif /* CONFIG_NVM */ - static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) { return dev_to_disk(dev)->private_data; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 51852085239e..b82492cd7503 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -60,6 +60,8 @@ MODULE_PARM_DESC(sgl_threshold, "Use SGLs when average request segment size is larger or equal to " "this size. Use 0 to disable SGLs."); +#define NVME_PCI_MIN_QUEUE_SIZE 2 +#define NVME_PCI_MAX_QUEUE_SIZE 4095 static int io_queue_depth_set(const char *val, const struct kernel_param *kp); static const struct kernel_param_ops io_queue_depth_ops = { .set = io_queue_depth_set, @@ -68,7 +70,7 @@ static const struct kernel_param_ops io_queue_depth_ops = { static unsigned int io_queue_depth = 1024; module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644); -MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2"); +MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2 and < 4096"); static int io_queue_count_set(const char *val, const struct kernel_param *kp) { @@ -135,6 +137,7 @@ struct nvme_dev { u32 cmbloc; struct nvme_ctrl ctrl; u32 last_ps; + bool hmb; mempool_t *iod_mempool; @@ -153,18 +156,14 @@ struct nvme_dev { unsigned int nr_allocated_queues; unsigned int nr_write_queues; unsigned int nr_poll_queues; + + bool attrs_added; }; static int io_queue_depth_set(const char *val, const struct kernel_param *kp) { - int ret; - u32 n; - - ret = kstrtou32(val, 10, &n); - if (ret != 0 || n < 2) - return -EINVAL; - - return param_set_uint(val, kp); + return param_set_uint_minmax(val, kp, NVME_PCI_MIN_QUEUE_SIZE, + NVME_PCI_MAX_QUEUE_SIZE); } static inline unsigned int sq_idx(unsigned int qid, u32 stride) @@ -1014,7 +1013,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) return; } - req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), command_id); + req = nvme_find_rq(nvme_queue_tagset(nvmeq), command_id); if (unlikely(!req)) { dev_warn(nvmeq->dev->ctrl.device, "invalid id %d completed on queue %d\n", @@ -1808,17 +1807,6 @@ static int nvme_create_io_queues(struct nvme_dev *dev) return ret >= 0 ? 0 : ret; } -static ssize_t nvme_cmb_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev)); - - return scnprintf(buf, PAGE_SIZE, "cmbloc : x%08x\ncmbsz : x%08x\n", - ndev->cmbloc, ndev->cmbsz); -} -static DEVICE_ATTR(cmb, S_IRUGO, nvme_cmb_show, NULL); - static u64 nvme_cmb_size_unit(struct nvme_dev *dev) { u8 szu = (dev->cmbsz >> NVME_CMBSZ_SZU_SHIFT) & NVME_CMBSZ_SZU_MASK; @@ -1887,20 +1875,6 @@ static void nvme_map_cmb(struct nvme_dev *dev) if ((dev->cmbsz & (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) == (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) pci_p2pmem_publish(pdev, true); - - if (sysfs_add_file_to_group(&dev->ctrl.device->kobj, - &dev_attr_cmb.attr, NULL)) - dev_warn(dev->ctrl.device, - "failed to add sysfs attribute for CMB\n"); -} - -static inline void nvme_release_cmb(struct nvme_dev *dev) -{ - if (dev->cmb_size) { - sysfs_remove_file_from_group(&dev->ctrl.device->kobj, - &dev_attr_cmb.attr, NULL); - dev->cmb_size = 0; - } } static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits) @@ -1923,7 +1897,9 @@ static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits) dev_warn(dev->ctrl.device, "failed to set host mem (err %d, flags %#x).\n", ret, bits); - } + } else + dev->hmb = bits & NVME_HOST_MEM_ENABLE; + return ret; } @@ -2080,6 +2056,102 @@ static int nvme_setup_host_mem(struct nvme_dev *dev) return ret; } +static ssize_t cmb_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "cmbloc : x%08x\ncmbsz : x%08x\n", + ndev->cmbloc, ndev->cmbsz); +} +static DEVICE_ATTR_RO(cmb); + +static ssize_t cmbloc_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%u\n", ndev->cmbloc); +} +static DEVICE_ATTR_RO(cmbloc); + +static ssize_t cmbsz_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%u\n", ndev->cmbsz); +} +static DEVICE_ATTR_RO(cmbsz); + +static ssize_t hmb_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%d\n", ndev->hmb); +} + +static ssize_t hmb_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev)); + bool new; + int ret; + + if (strtobool(buf, &new) < 0) + return -EINVAL; + + if (new == ndev->hmb) + return count; + + if (new) { + ret = nvme_setup_host_mem(ndev); + } else { + ret = nvme_set_host_mem(ndev, 0); + if (!ret) + nvme_free_host_mem(ndev); + } + + if (ret < 0) + return ret; + + return count; +} +static DEVICE_ATTR_RW(hmb); + +static umode_t nvme_pci_attrs_are_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct nvme_ctrl *ctrl = + dev_get_drvdata(container_of(kobj, struct device, kobj)); + struct nvme_dev *dev = to_nvme_dev(ctrl); + + if (a == &dev_attr_cmb.attr || + a == &dev_attr_cmbloc.attr || + a == &dev_attr_cmbsz.attr) { + if (!dev->cmbsz) + return 0; + } + if (a == &dev_attr_hmb.attr && !ctrl->hmpre) + return 0; + + return a->mode; +} + +static struct attribute *nvme_pci_attrs[] = { + &dev_attr_cmb.attr, + &dev_attr_cmbloc.attr, + &dev_attr_cmbsz.attr, + &dev_attr_hmb.attr, + NULL, +}; + +static const struct attribute_group nvme_pci_attr_group = { + .attrs = nvme_pci_attrs, + .is_visible = nvme_pci_attrs_are_visible, +}; + /* * nirqs is the number of interrupts available for write and read * queues. The core already reserved an interrupt for the admin queue. @@ -2751,6 +2823,10 @@ static void nvme_reset_work(struct work_struct *work) goto out; } + if (!dev->attrs_added && !sysfs_create_group(&dev->ctrl.device->kobj, + &nvme_pci_attr_group)) + dev->attrs_added = true; + nvme_start_ctrl(&dev->ctrl); return; @@ -2999,6 +3075,13 @@ static void nvme_shutdown(struct pci_dev *pdev) nvme_disable_prepare_reset(dev, true); } +static void nvme_remove_attrs(struct nvme_dev *dev) +{ + if (dev->attrs_added) + sysfs_remove_group(&dev->ctrl.device->kobj, + &nvme_pci_attr_group); +} + /* * The driver's remove may be called on a device in a partially initialized * state. This function must not have any dependencies on the device state in @@ -3020,7 +3103,7 @@ static void nvme_remove(struct pci_dev *pdev) nvme_stop_ctrl(&dev->ctrl); nvme_remove_namespaces(&dev->ctrl); nvme_dev_disable(dev, true); - nvme_release_cmb(dev); + nvme_remove_attrs(dev); nvme_free_host_mem(dev); nvme_dev_remove_admin(dev); nvme_free_queues(dev, 0); @@ -3047,8 +3130,13 @@ static int nvme_resume(struct device *dev) if (ndev->last_ps == U32_MAX || nvme_set_power_state(ctrl, ndev->last_ps) != 0) - return nvme_try_sched_reset(&ndev->ctrl); + goto reset; + if (ctrl->hmpre && nvme_setup_host_mem(ndev)) + goto reset; + return 0; +reset: + return nvme_try_sched_reset(ctrl); } static int nvme_suspend(struct device *dev) @@ -3072,15 +3160,9 @@ static int nvme_suspend(struct device *dev) * the PCI bus layer to put it into D3 in order to take the PCIe link * down, so as to allow the platform to achieve its minimum low-power * state (which may not be possible if the link is up). - * - * If a host memory buffer is enabled, shut down the device as the NVMe - * specification allows the device to access the host memory buffer in - * host DRAM from all power states, but hosts will fail access to DRAM - * during S3. */ if (pm_suspend_via_firmware() || !ctrl->npss || !pcie_aspm_enabled(pdev) || - ndev->nr_host_mem_descs || (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND)) return nvme_disable_prepare_reset(ndev, true); @@ -3091,6 +3173,17 @@ static int nvme_suspend(struct device *dev) if (ctrl->state != NVME_CTRL_LIVE) goto unfreeze; + /* + * Host memory access may not be successful in a system suspend state, + * but the specification allows the controller to access memory in a + * non-operational power state. + */ + if (ndev->hmb) { + ret = nvme_set_host_mem(ndev, 0); + if (ret < 0) + goto unfreeze; + } + ret = nvme_get_power_state(ctrl, &ndev->last_ps); if (ret < 0) goto unfreeze; @@ -3243,12 +3336,6 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, - { PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */ - .driver_data = NVME_QUIRK_LIGHTNVM, }, - { PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */ - .driver_data = NVME_QUIRK_LIGHTNVM, }, - { PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */ - .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */ diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 7f6b3a991501..a68704e39084 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -735,13 +735,13 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl) if (ret) return ret; - ctrl->ctrl.queue_count = nr_io_queues + 1; - if (ctrl->ctrl.queue_count < 2) { + if (nr_io_queues == 0) { dev_err(ctrl->ctrl.device, "unable to set any I/O queues\n"); return -ENOMEM; } + ctrl->ctrl.queue_count = nr_io_queues + 1; dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", nr_io_queues); @@ -1730,10 +1730,10 @@ static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, struct request *rq; struct nvme_rdma_request *req; - rq = blk_mq_tag_to_rq(nvme_rdma_tagset(queue), cqe->command_id); + rq = nvme_find_rq(nvme_rdma_tagset(queue), cqe->command_id); if (!rq) { dev_err(queue->ctrl->ctrl.device, - "tag 0x%x on QP %#x not found\n", + "got bad command_id %#x on QP %#x\n", cqe->command_id, queue->qp->qp_num); nvme_rdma_error_recovery(queue->ctrl); return; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 8cb15ee5b249..645025620154 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -487,11 +487,11 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue, { struct request *rq; - rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), cqe->command_id); + rq = nvme_find_rq(nvme_tcp_tagset(queue), cqe->command_id); if (!rq) { dev_err(queue->ctrl->ctrl.device, - "queue %d tag 0x%x not found\n", - nvme_tcp_queue_id(queue), cqe->command_id); + "got bad cqe.command_id %#x on queue %d\n", + cqe->command_id, nvme_tcp_queue_id(queue)); nvme_tcp_error_recovery(&queue->ctrl->ctrl); return -EINVAL; } @@ -508,11 +508,11 @@ static int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue, { struct request *rq; - rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id); + rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id); if (!rq) { dev_err(queue->ctrl->ctrl.device, - "queue %d tag %#x not found\n", - nvme_tcp_queue_id(queue), pdu->command_id); + "got bad c2hdata.command_id %#x on queue %d\n", + pdu->command_id, nvme_tcp_queue_id(queue)); return -ENOENT; } @@ -606,7 +606,7 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req, data->hdr.plen = cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst); data->ttag = pdu->ttag; - data->command_id = rq->tag; + data->command_id = nvme_cid(rq); data->data_offset = cpu_to_le32(req->data_sent); data->data_length = cpu_to_le32(req->pdu_len); return 0; @@ -619,11 +619,11 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue, struct request *rq; int ret; - rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id); + rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id); if (!rq) { dev_err(queue->ctrl->ctrl.device, - "queue %d tag %#x not found\n", - nvme_tcp_queue_id(queue), pdu->command_id); + "got bad r2t.command_id %#x on queue %d\n", + pdu->command_id, nvme_tcp_queue_id(queue)); return -ENOENT; } req = blk_mq_rq_to_pdu(rq); @@ -702,17 +702,9 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, unsigned int *offset, size_t *len) { struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu; - struct nvme_tcp_request *req; - struct request *rq; - - rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id); - if (!rq) { - dev_err(queue->ctrl->ctrl.device, - "queue %d tag %#x not found\n", - nvme_tcp_queue_id(queue), pdu->command_id); - return -ENOENT; - } - req = blk_mq_rq_to_pdu(rq); + struct request *rq = + nvme_cid_to_rq(nvme_tcp_tagset(queue), pdu->command_id); + struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); while (true) { int recv_len, ret; @@ -804,8 +796,8 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, } if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { - struct request *rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), - pdu->command_id); + struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), + pdu->command_id); nvme_tcp_end_request(rq, NVME_SC_SUCCESS); queue->nr_cqe++; @@ -1228,6 +1220,7 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid) sock_release(queue->sock); kfree(queue->pdu); + mutex_destroy(&queue->send_mutex); mutex_destroy(&queue->queue_lock); } @@ -1533,6 +1526,7 @@ err_sock: sock_release(queue->sock); queue->sock = NULL; err_destroy_mutex: + mutex_destroy(&queue->send_mutex); mutex_destroy(&queue->queue_lock); return ret; } @@ -1769,13 +1763,13 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) if (ret) return ret; - ctrl->queue_count = nr_io_queues + 1; - if (ctrl->queue_count < 2) { + if (nr_io_queues == 0) { dev_err(ctrl->device, "unable to set any I/O queues\n"); return -ENOMEM; } + ctrl->queue_count = nr_io_queues + 1; dev_info(ctrl->device, "creating %d I/O queues.\n", nr_io_queues); diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c index 6543015b6121..2a89c5aa0790 100644 --- a/drivers/nvme/host/trace.c +++ b/drivers/nvme/host/trace.c @@ -72,6 +72,20 @@ static const char *nvme_trace_admin_identify(struct trace_seq *p, u8 *cdw10) return ret; } +static const char *nvme_trace_admin_set_features(struct trace_seq *p, + u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u8 fid = cdw10[0]; + u8 sv = cdw10[3] & 0x8; + u32 cdw11 = get_unaligned_le32(cdw10 + 4); + + trace_seq_printf(p, "fid=0x%x, sv=0x%x, cdw11=0x%x", fid, sv, cdw11); + trace_seq_putc(p, 0); + + return ret; +} + static const char *nvme_trace_admin_get_features(struct trace_seq *p, u8 *cdw10) { @@ -80,7 +94,7 @@ static const char *nvme_trace_admin_get_features(struct trace_seq *p, u8 sel = cdw10[1] & 0x7; u32 cdw11 = get_unaligned_le32(cdw10 + 4); - trace_seq_printf(p, "fid=0x%x sel=0x%x cdw11=0x%x", fid, sel, cdw11); + trace_seq_printf(p, "fid=0x%x, sel=0x%x, cdw11=0x%x", fid, sel, cdw11); trace_seq_putc(p, 0); return ret; @@ -201,6 +215,8 @@ const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, return nvme_trace_create_cq(p, cdw10); case nvme_admin_identify: return nvme_trace_admin_identify(p, cdw10); + case nvme_admin_set_features: + return nvme_trace_admin_set_features(p, cdw10); case nvme_admin_get_features: return nvme_trace_admin_get_features(p, cdw10); case nvme_admin_get_lba_status: diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index 4be2ececbc45..973561c93888 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -31,7 +31,6 @@ config NVME_TARGET_PASSTHRU config NVME_TARGET_LOOP tristate "NVMe loopback device support" depends on NVME_TARGET - select NVME_CORE select NVME_FABRICS select SG_POOL help @@ -65,7 +64,6 @@ config NVME_TARGET_FC config NVME_TARGET_FCLOOP tristate "NVMe over Fabrics FC Transport Loopback Test driver" depends on NVME_TARGET - select NVME_CORE select NVME_FABRICS select SG_POOL depends on NVME_FC diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index ac7210a3ea1c..66d05eecc2a9 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -802,6 +802,7 @@ void nvmet_sq_destroy(struct nvmet_sq *sq) * controller teardown as a result of a keep-alive expiration. */ ctrl->reset_tbkas = true; + sq->ctrl->sqs[sq->qid] = NULL; nvmet_ctrl_put(ctrl); sq->ctrl = NULL; /* allows reusing the queue later */ } diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 7d0f3523fdab..7d0454cee920 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -109,21 +109,38 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) u16 qid = le16_to_cpu(c->qid); u16 sqsize = le16_to_cpu(c->sqsize); struct nvmet_ctrl *old; + u16 mqes = NVME_CAP_MQES(ctrl->cap); u16 ret; - old = cmpxchg(&req->sq->ctrl, NULL, ctrl); - if (old) { - pr_warn("queue already connected!\n"); - req->error_loc = offsetof(struct nvmf_connect_command, opcode); - return NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR; - } if (!sqsize) { pr_warn("queue size zero!\n"); req->error_loc = offsetof(struct nvmf_connect_command, sqsize); + req->cqe->result.u32 = IPO_IATTR_CONNECT_SQE(sqsize); ret = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; goto err; } + if (ctrl->sqs[qid] != NULL) { + pr_warn("qid %u has already been created\n", qid); + req->error_loc = offsetof(struct nvmf_connect_command, qid); + return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; + } + + if (sqsize > mqes) { + pr_warn("sqsize %u is larger than MQES supported %u cntlid %d\n", + sqsize, mqes, ctrl->cntlid); + req->error_loc = offsetof(struct nvmf_connect_command, sqsize); + req->cqe->result.u32 = IPO_IATTR_CONNECT_SQE(sqsize); + return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; + } + + old = cmpxchg(&req->sq->ctrl, NULL, ctrl); + if (old) { + pr_warn("queue already connected!\n"); + req->error_loc = offsetof(struct nvmf_connect_command, opcode); + return NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR; + } + /* note: convert queue size from 0's-based value to 1's-based value */ nvmet_cq_setup(ctrl, req->cq, qid, sqsize + 1); nvmet_sq_setup(ctrl, req->sq, qid, sqsize + 1); @@ -138,6 +155,7 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) if (ret) { pr_err("failed to install queue %d cntlid %d ret %x\n", qid, ctrl->cntlid, ret); + ctrl->sqs[qid] = NULL; goto err; } } @@ -260,11 +278,11 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) } status = nvmet_install_queue(ctrl, req); - if (status) { - /* pass back cntlid that had the issue of installing queue */ - req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); + if (status) goto out_ctrl_put; - } + + /* pass back cntlid for successful completion */ + req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 3a17a7e26bbf..0285ccc7541f 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -107,10 +107,10 @@ static void nvme_loop_queue_response(struct nvmet_req *req) } else { struct request *rq; - rq = blk_mq_tag_to_rq(nvme_loop_tagset(queue), cqe->command_id); + rq = nvme_find_rq(nvme_loop_tagset(queue), cqe->command_id); if (!rq) { dev_err(queue->ctrl->ctrl.device, - "tag 0x%x on queue %d not found\n", + "got bad command_id %#x on queue %d\n", cqe->command_id, nvme_loop_queue_idx(queue)); return; } diff --git a/drivers/nvme/target/trace.c b/drivers/nvme/target/trace.c index 1373a3c67962..bff454d46255 100644 --- a/drivers/nvme/target/trace.c +++ b/drivers/nvme/target/trace.c @@ -27,7 +27,7 @@ static const char *nvmet_trace_admin_get_features(struct trace_seq *p, u8 sel = cdw10[1] & 0x7; u32 cdw11 = get_unaligned_le32(cdw10 + 4); - trace_seq_printf(p, "fid=0x%x sel=0x%x cdw11=0x%x", fid, sel, cdw11); + trace_seq_printf(p, "fid=0x%x, sel=0x%x, cdw11=0x%x", fid, sel, cdw11); trace_seq_putc(p, 0); return ret; @@ -49,6 +49,20 @@ static const char *nvmet_trace_get_lba_status(struct trace_seq *p, return ret; } +static const char *nvmet_trace_admin_set_features(struct trace_seq *p, + u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u8 fid = cdw10[0]; + u8 sv = cdw10[3] & 0x8; + u32 cdw11 = get_unaligned_le32(cdw10 + 4); + + trace_seq_printf(p, "fid=0x%x, sv=0x%x, cdw11=0x%x", fid, sv, cdw11); + trace_seq_putc(p, 0); + + return ret; +} + static const char *nvmet_trace_read_write(struct trace_seq *p, u8 *cdw10) { const char *ret = trace_seq_buffer_ptr(p); @@ -94,6 +108,8 @@ const char *nvmet_trace_parse_admin_cmd(struct trace_seq *p, switch (opcode) { case nvme_admin_identify: return nvmet_trace_admin_identify(p, cdw10); + case nvme_admin_set_features: + return nvmet_trace_admin_set_features(p, cdw10); case nvme_admin_get_features: return nvmet_trace_admin_get_features(p, cdw10); case nvme_admin_get_lba_status: diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index 17f8b7a45f21..46bc30fe85d2 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -115,14 +115,11 @@ void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req) } status = nvmet_req_find_ns(req); - if (status) { - status = NVME_SC_INTERNAL; + if (status) goto done; - } if (!bdev_is_zoned(req->ns->bdev)) { req->error_loc = offsetof(struct nvme_identify, nsid); - status = NVME_SC_INVALID_NS | NVME_SC_DNR; goto done; } |