diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-26 00:12:50 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-26 00:12:50 +0200 |
commit | 6a398a3ef404e661999cd4c58974cbceb52762e3 (patch) | |
tree | c78ecf3d186beabcd9c1b14d37c871e97b72400f | |
parent | Merge branch 'for-4.2/core' of git://git.kernel.dk/linux-block (diff) | |
parent | mtip32xx: Fix accessing freed memory (diff) | |
download | linux-6a398a3ef404e661999cd4c58974cbceb52762e3.tar.xz linux-6a398a3ef404e661999cd4c58974cbceb52762e3.zip |
Merge branch 'for-4.2/drivers' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe:
"This contains:
- a few race fixes for null_blk, from Akinobu Mita.
- a series of fixes for mtip32xx, from Asai Thambi and Selvan Mani at
Micron.
- NVMe:
* Fix for missing error return on allocation failure, from Axel
Lin.
* Code consolidation and cleanups from Christoph.
* Memory barrier addition, syncing queue count and queue
pointers. From Jon Derrick.
* Various fixes from Keith, an addition to support user
issue reset from sysfs or ioctl, and automatic namespace
rescan.
* Fix from Matias, avoiding losing some request flags when
marking the request failfast.
- small cleanups and sparse fixups for ps3vram. From Geert
Uytterhoeven and Geoff Lavand.
- s390/dasd dead code removal, from Jarod Wilson.
- a set of fixes and optimizations for loop, from Ming Lei.
- conversion to blkdev_reread_part() of loop, dasd, ndb. From Ming
Lei.
- updates to cciss. From Tomas Henzl"
* 'for-4.2/drivers' of git://git.kernel.dk/linux-block: (44 commits)
mtip32xx: Fix accessing freed memory
block: nvme-scsi: Catch kcalloc failure
NVMe: Fix IO for extended metadata formats
nvme: don't overwrite req->cmd_flags on sync cmd
mtip32xx: increase wait time for hba reset
mtip32xx: fix minor number
mtip32xx: remove unnecessary sleep in mtip_ftl_rebuild_poll()
mtip32xx: fix crash on surprise removal of the drive
mtip32xx: Abort I/O during secure erase operation
mtip32xx: fix incorrectly setting MTIP_DDF_SEC_LOCK_BIT
mtip32xx: remove unused variable 'port->allocated'
mtip32xx: fix rmmod issue
MAINTAINERS: Update ps3vram block driver
block/ps3vram: Remove obsolete reference to MTD
block/ps3vram: Fix sparse warnings
NVMe: Automatic namespace rescan
NVMe: Memory barrier before queue_count is incremented
NVMe: add sysfs and ioctl controller reset
null_blk: restart request processing on completion handler
null_blk: prevent timer handler running on a different CPU where started
...
-rw-r--r-- | MAINTAINERS | 1 | ||||
-rw-r--r-- | drivers/block/loop.c | 82 | ||||
-rw-r--r-- | drivers/block/loop.h | 3 | ||||
-rw-r--r-- | drivers/block/mtip32xx/mtip32xx.c | 228 | ||||
-rw-r--r-- | drivers/block/mtip32xx/mtip32xx.h | 10 | ||||
-rw-r--r-- | drivers/block/nbd.c | 2 | ||||
-rw-r--r-- | drivers/block/null_blk.c | 14 | ||||
-rw-r--r-- | drivers/block/nvme-core.c | 812 | ||||
-rw-r--r-- | drivers/block/nvme-scsi.c | 1230 | ||||
-rw-r--r-- | drivers/block/ps3vram.c | 34 | ||||
-rw-r--r-- | drivers/s390/block/dasd_genhd.c | 19 | ||||
-rw-r--r-- | include/linux/nvme.h | 31 | ||||
-rw-r--r-- | include/uapi/linux/nvme.h | 5 |
13 files changed, 986 insertions, 1485 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index d42970b10a22..73cfd4b0f539 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7982,6 +7982,7 @@ F: sound/ppc/snd_ps3* PS3VRAM DRIVER M: Jim Paris <jim@jtan.com> +M: Geoff Levand <geoff@infradead.org> L: linuxppc-dev@lists.ozlabs.org S: Maintained F: drivers/block/ps3vram.c diff --git a/drivers/block/loop.c b/drivers/block/loop.c index d7173cb1ea76..40580dc7f41c 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -86,8 +86,6 @@ static DEFINE_MUTEX(loop_index_mutex); static int max_part; static int part_shift; -static struct workqueue_struct *loop_wq; - static int transfer_xor(struct loop_device *lo, int cmd, struct page *raw_page, unsigned raw_off, struct page *loop_page, unsigned loop_off, @@ -476,6 +474,28 @@ static int loop_flush(struct loop_device *lo) return loop_switch(lo, NULL); } +static void loop_reread_partitions(struct loop_device *lo, + struct block_device *bdev) +{ + int rc; + + /* + * bd_mutex has been held already in release path, so don't + * acquire it if this function is called in such case. + * + * If the reread partition isn't from release path, lo_refcnt + * must be at least one and it can only become zero when the + * current holder is released. + */ + if (!atomic_read(&lo->lo_refcnt)) + rc = __blkdev_reread_part(bdev); + else + rc = blkdev_reread_part(bdev); + if (rc) + pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n", + __func__, lo->lo_number, lo->lo_file_name, rc); +} + /* * loop_change_fd switched the backing store of a loopback device to * a new file. This is useful for operating system installers to free up @@ -524,7 +544,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, fput(old_file); if (lo->lo_flags & LO_FLAGS_PARTSCAN) - ioctl_by_bdev(bdev, BLKRRPART, 0); + loop_reread_partitions(lo, bdev); return 0; out_putf: @@ -725,6 +745,12 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, size = get_loop_size(lo, file); if ((loff_t)(sector_t)size != size) goto out_putf; + error = -ENOMEM; + lo->wq = alloc_workqueue("kloopd%d", + WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 16, + lo->lo_number); + if (!lo->wq) + goto out_putf; error = 0; @@ -755,7 +781,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, if (part_shift) lo->lo_flags |= LO_FLAGS_PARTSCAN; if (lo->lo_flags & LO_FLAGS_PARTSCAN) - ioctl_by_bdev(bdev, BLKRRPART, 0); + loop_reread_partitions(lo, bdev); /* Grab the block_device to prevent its destruction after we * put /dev/loopXX inode. Later in loop_clr_fd() we bdput(bdev). @@ -827,7 +853,7 @@ static int loop_clr_fd(struct loop_device *lo) * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d * command to fail with EBUSY. */ - if (lo->lo_refcnt > 1) { + if (atomic_read(&lo->lo_refcnt) > 1) { lo->lo_flags |= LO_FLAGS_AUTOCLEAR; mutex_unlock(&lo->lo_ctl_mutex); return 0; @@ -836,6 +862,9 @@ static int loop_clr_fd(struct loop_device *lo) if (filp == NULL) return -EINVAL; + /* freeze request queue during the transition */ + blk_mq_freeze_queue(lo->lo_queue); + spin_lock_irq(&lo->lo_lock); lo->lo_state = Lo_rundown; lo->lo_backing_file = NULL; @@ -867,11 +896,15 @@ static int loop_clr_fd(struct loop_device *lo) lo->lo_state = Lo_unbound; /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); + blk_mq_unfreeze_queue(lo->lo_queue); + if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev) - ioctl_by_bdev(bdev, BLKRRPART, 0); + loop_reread_partitions(lo, bdev); lo->lo_flags = 0; if (!part_shift) lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; + destroy_workqueue(lo->wq); + lo->wq = NULL; mutex_unlock(&lo->lo_ctl_mutex); /* * Need not hold lo_ctl_mutex to fput backing file. @@ -943,7 +976,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) !(lo->lo_flags & LO_FLAGS_PARTSCAN)) { lo->lo_flags |= LO_FLAGS_PARTSCAN; lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; - ioctl_by_bdev(lo->lo_device, BLKRRPART, 0); + loop_reread_partitions(lo, lo->lo_device); } lo->lo_encrypt_key_size = info->lo_encrypt_key_size; @@ -1324,9 +1357,7 @@ static int lo_open(struct block_device *bdev, fmode_t mode) goto out; } - mutex_lock(&lo->lo_ctl_mutex); - lo->lo_refcnt++; - mutex_unlock(&lo->lo_ctl_mutex); + atomic_inc(&lo->lo_refcnt); out: mutex_unlock(&loop_index_mutex); return err; @@ -1337,11 +1368,10 @@ static void lo_release(struct gendisk *disk, fmode_t mode) struct loop_device *lo = disk->private_data; int err; - mutex_lock(&lo->lo_ctl_mutex); - - if (--lo->lo_refcnt) - goto out; + if (atomic_dec_return(&lo->lo_refcnt)) + return; + mutex_lock(&lo->lo_ctl_mutex); if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) { /* * In autoclear mode, stop the loop thread @@ -1358,7 +1388,6 @@ static void lo_release(struct gendisk *disk, fmode_t mode) loop_flush(lo); } -out: mutex_unlock(&lo->lo_ctl_mutex); } @@ -1425,9 +1454,13 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); + struct loop_device *lo = cmd->rq->q->queuedata; blk_mq_start_request(bd->rq); + if (lo->lo_state != Lo_bound) + return -EIO; + if (cmd->rq->cmd_flags & REQ_WRITE) { struct loop_device *lo = cmd->rq->q->queuedata; bool need_sched = true; @@ -1441,9 +1474,9 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, spin_unlock_irq(&lo->lo_lock); if (need_sched) - queue_work(loop_wq, &lo->write_work); + queue_work(lo->wq, &lo->write_work); } else { - queue_work(loop_wq, &cmd->read_work); + queue_work(lo->wq, &cmd->read_work); } return BLK_MQ_RQ_QUEUE_OK; @@ -1455,9 +1488,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd) struct loop_device *lo = cmd->rq->q->queuedata; int ret = -EIO; - if (lo->lo_state != Lo_bound) - goto failed; - if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) goto failed; @@ -1594,6 +1624,7 @@ static int loop_add(struct loop_device **l, int i) disk->flags |= GENHD_FL_NO_PART_SCAN; disk->flags |= GENHD_FL_EXT_DEVT; mutex_init(&lo->lo_ctl_mutex); + atomic_set(&lo->lo_refcnt, 0); lo->lo_number = i; spin_lock_init(&lo->lo_lock); disk->major = LOOP_MAJOR; @@ -1711,7 +1742,7 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, mutex_unlock(&lo->lo_ctl_mutex); break; } - if (lo->lo_refcnt > 0) { + if (atomic_read(&lo->lo_refcnt) > 0) { ret = -EBUSY; mutex_unlock(&lo->lo_ctl_mutex); break; @@ -1806,13 +1837,6 @@ static int __init loop_init(void) goto misc_out; } - loop_wq = alloc_workqueue("kloopd", - WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0); - if (!loop_wq) { - err = -ENOMEM; - goto misc_out; - } - blk_register_region(MKDEV(LOOP_MAJOR, 0), range, THIS_MODULE, loop_probe, NULL, NULL); @@ -1850,8 +1874,6 @@ static void __exit loop_exit(void) blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); unregister_blkdev(LOOP_MAJOR, "loop"); - destroy_workqueue(loop_wq); - misc_deregister(&loop_misc); } diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 301c27f8323f..25e8997ed246 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -28,7 +28,7 @@ struct loop_func_table; struct loop_device { int lo_number; - int lo_refcnt; + atomic_t lo_refcnt; loff_t lo_offset; loff_t lo_sizelimit; int lo_flags; @@ -54,6 +54,7 @@ struct loop_device { gfp_t old_gfp_mask; spinlock_t lo_lock; + struct workqueue_struct *wq; struct list_head write_cmd_head; struct work_struct write_work; bool write_started; diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 3bd7ca9853a8..4a2ef09e6704 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -163,12 +163,6 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev) else dev_warn(&dd->pdev->dev, "%s: dd->queue is NULL\n", __func__); - if (dd->port) { - set_bit(MTIP_PF_SR_CLEANUP_BIT, &dd->port->flags); - wake_up_interruptible(&dd->port->svc_wait); - } else - dev_warn(&dd->pdev->dev, - "%s: dd->port is NULL\n", __func__); return true; /* device removed */ } @@ -269,8 +263,11 @@ static int mtip_hba_reset(struct driver_data *dd) /* Flush */ readl(dd->mmio + HOST_CTL); - /* Spin for up to 2 seconds, waiting for reset acknowledgement */ - timeout = jiffies + msecs_to_jiffies(2000); + /* + * Spin for up to 10 seconds waiting for reset acknowledgement. Spec + * is 1 sec but in LUN failure conditions, up to 10 secs are required + */ + timeout = jiffies + msecs_to_jiffies(10000); do { mdelay(10); if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) @@ -623,8 +620,7 @@ static void mtip_handle_tfe(struct driver_data *dd) set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); - if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) && - test_bit(MTIP_TAG_INTERNAL, port->allocated)) { + if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n"); @@ -896,6 +892,10 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data) /* Acknowledge the interrupt status on the port.*/ port_stat = readl(port->mmio + PORT_IRQ_STAT); + if (unlikely(port_stat == 0xFFFFFFFF)) { + mtip_check_surprise_removal(dd->pdev); + return IRQ_HANDLED; + } writel(port_stat, port->mmio + PORT_IRQ_STAT); /* Demux port status */ @@ -991,15 +991,10 @@ static bool mtip_pause_ncq(struct mtip_port *port, reply = port->rxfis + RX_FIS_D2H_REG; task_file_data = readl(port->mmio+PORT_TFDATA); - if (fis->command == ATA_CMD_SEC_ERASE_UNIT) - clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag); - if ((task_file_data & 1)) return false; if (fis->command == ATA_CMD_SEC_ERASE_PREP) { - set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); - set_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag); port->ic_pause_timer = jiffies; return true; } else if ((fis->command == ATA_CMD_DOWNLOAD_MICRO) && @@ -1011,8 +1006,10 @@ static bool mtip_pause_ncq(struct mtip_port *port, ((fis->command == 0xFC) && (fis->features == 0x27 || fis->features == 0x72 || fis->features == 0x62 || fis->features == 0x26))) { + clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag); /* Com reset after secure erase or lowlevel format */ mtip_restart_port(port); + clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); return false; } @@ -1112,9 +1109,10 @@ static int mtip_exec_internal_command(struct mtip_port *port, int_cmd = mtip_get_int_command(dd); set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); - port->ic_pause_timer = 0; - clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); + if (fis->command == ATA_CMD_SEC_ERASE_PREP) + set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); + clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags); if (atomic == GFP_KERNEL) { @@ -1251,11 +1249,11 @@ static int mtip_exec_internal_command(struct mtip_port *port, exec_ic_exit: /* Clear the allocated and active bits for the internal command. */ mtip_put_int_command(dd, int_cmd); + clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); if (rv >= 0 && mtip_pause_ncq(port, fis)) { /* NCQ paused */ return rv; } - clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); wake_up_interruptible(&port->svc_wait); return rv; @@ -2625,18 +2623,6 @@ static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf, readl(dd->mmio + HOST_IRQ_STAT)); size += sprintf(&buf[size], "\n"); - size += sprintf(&buf[size], "L/ Allocated : [ 0x"); - - for (n = dd->slot_groups-1; n >= 0; n--) { - if (sizeof(long) > sizeof(u32)) - group_allocated = - dd->port->allocated[n/2] >> (32*(n&1)); - else - group_allocated = dd->port->allocated[n]; - size += sprintf(&buf[size], "%08X ", group_allocated); - } - size += sprintf(&buf[size], "]\n"); - size += sprintf(&buf[size], "L/ Commands in Q : [ 0x"); for (n = dd->slot_groups-1; n >= 0; n--) { @@ -2780,48 +2766,6 @@ static void mtip_hw_debugfs_exit(struct driver_data *dd) debugfs_remove_recursive(dd->dfs_node); } -static int mtip_free_orphan(struct driver_data *dd) -{ - struct kobject *kobj; - - if (dd->bdev) { - if (dd->bdev->bd_holders >= 1) - return -2; - - bdput(dd->bdev); - dd->bdev = NULL; - } - - mtip_hw_debugfs_exit(dd); - - spin_lock(&rssd_index_lock); - ida_remove(&rssd_index_ida, dd->index); - spin_unlock(&rssd_index_lock); - - if (!test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag) && - test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)) { - put_disk(dd->disk); - } else { - if (dd->disk) { - kobj = kobject_get(&disk_to_dev(dd->disk)->kobj); - if (kobj) { - mtip_hw_sysfs_exit(dd, kobj); - kobject_put(kobj); - } - del_gendisk(dd->disk); - dd->disk = NULL; - } - if (dd->queue) { - dd->queue->queuedata = NULL; - blk_cleanup_queue(dd->queue); - blk_mq_free_tag_set(&dd->tags); - dd->queue = NULL; - } - } - kfree(dd); - return 0; -} - /* * Perform any init/resume time hardware setup * @@ -2944,7 +2888,6 @@ static int mtip_ftl_rebuild_poll(struct driver_data *dd) mtip_block_initialize(dd); return 0; } - ssleep(10); } while (time_before(jiffies, timeout)); /* Check for timeout */ @@ -2969,7 +2912,6 @@ static int mtip_service_thread(void *data) unsigned long slot, slot_start, slot_wrap; unsigned int num_cmd_slots = dd->slot_groups * 32; struct mtip_port *port = dd->port; - int ret; while (1) { if (kthread_should_stop() || @@ -2990,10 +2932,6 @@ static int mtip_service_thread(void *data) test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags)) goto st_out; - /* If I am an orphan, start self cleanup */ - if (test_bit(MTIP_PF_SR_CLEANUP_BIT, &port->flags)) - break; - if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) goto st_out; @@ -3047,26 +2985,6 @@ restart_eh: } } - /* wait for pci remove to exit */ - while (1) { - if (test_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag)) - break; - msleep_interruptible(1000); - if (kthread_should_stop()) - goto st_out; - } - - while (1) { - ret = mtip_free_orphan(dd); - if (!ret) { - /* NOTE: All data structures are invalid, do not - * access any here */ - return 0; - } - msleep_interruptible(1000); - if (kthread_should_stop()) - goto st_out; - } st_out: return 0; } @@ -3394,6 +3312,7 @@ static int mtip_hw_exit(struct driver_data *dd) /* Release the IRQ. */ irq_set_affinity_hint(dd->pdev->irq, NULL); devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd); + msleep(1000); /* Free dma regions */ mtip_dma_free(dd); @@ -3699,6 +3618,26 @@ static const struct block_device_operations mtip_block_ops = { .owner = THIS_MODULE }; +static inline bool is_se_active(struct driver_data *dd) +{ + if (unlikely(test_bit(MTIP_PF_SE_ACTIVE_BIT, &dd->port->flags))) { + if (dd->port->ic_pause_timer) { + unsigned long to = dd->port->ic_pause_timer + + msecs_to_jiffies(1000); + if (time_after(jiffies, to)) { + clear_bit(MTIP_PF_SE_ACTIVE_BIT, + &dd->port->flags); + clear_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag); + dd->port->ic_pause_timer = 0; + wake_up_interruptible(&dd->port->svc_wait); + return false; + } + } + return true; + } + return false; +} + /* * Block layer make request function. * @@ -3716,6 +3655,9 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq) struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); unsigned int nents; + if (is_se_active(dd)) + return -ENODATA; + if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) { if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) { @@ -3900,7 +3842,8 @@ static int mtip_block_initialize(struct driver_data *dd) dd->disk->driverfs_dev = &dd->pdev->dev; dd->disk->major = dd->major; - dd->disk->first_minor = dd->instance * MTIP_MAX_MINORS; + dd->disk->first_minor = index * MTIP_MAX_MINORS; + dd->disk->minors = MTIP_MAX_MINORS; dd->disk->fops = &mtip_block_ops; dd->disk->private_data = dd; dd->index = index; @@ -4066,52 +4009,51 @@ static int mtip_block_remove(struct driver_data *dd) { struct kobject *kobj; - if (!dd->sr) { - mtip_hw_debugfs_exit(dd); + mtip_hw_debugfs_exit(dd); - if (dd->mtip_svc_handler) { - set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags); - wake_up_interruptible(&dd->port->svc_wait); - kthread_stop(dd->mtip_svc_handler); - } + if (dd->mtip_svc_handler) { + set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags); + wake_up_interruptible(&dd->port->svc_wait); + kthread_stop(dd->mtip_svc_handler); + } - /* Clean up the sysfs attributes, if created */ - if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) { - kobj = kobject_get(&disk_to_dev(dd->disk)->kobj); - if (kobj) { - mtip_hw_sysfs_exit(dd, kobj); - kobject_put(kobj); - } + /* Clean up the sysfs attributes, if created */ + if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) { + kobj = kobject_get(&disk_to_dev(dd->disk)->kobj); + if (kobj) { + mtip_hw_sysfs_exit(dd, kobj); + kobject_put(kobj); } + } + if (!dd->sr) mtip_standby_drive(dd); - - /* - * Delete our gendisk structure. This also removes the device - * from /dev - */ - if (dd->bdev) { - bdput(dd->bdev); - dd->bdev = NULL; - } - if (dd->disk) { - if (dd->disk->queue) { - del_gendisk(dd->disk); - blk_cleanup_queue(dd->queue); - blk_mq_free_tag_set(&dd->tags); - dd->queue = NULL; - } else - put_disk(dd->disk); - } - dd->disk = NULL; - - spin_lock(&rssd_index_lock); - ida_remove(&rssd_index_ida, dd->index); - spin_unlock(&rssd_index_lock); - } else { + else dev_info(&dd->pdev->dev, "device %s surprise removal\n", dd->disk->disk_name); + + /* + * Delete our gendisk structure. This also removes the device + * from /dev + */ + if (dd->bdev) { + bdput(dd->bdev); + dd->bdev = NULL; } + if (dd->disk) { + del_gendisk(dd->disk); + if (dd->disk->queue) { + blk_cleanup_queue(dd->queue); + blk_mq_free_tag_set(&dd->tags); + dd->queue = NULL; + } + put_disk(dd->disk); + } + dd->disk = NULL; + + spin_lock(&rssd_index_lock); + ida_remove(&rssd_index_ida, dd->index); + spin_unlock(&rssd_index_lock); /* De-initialize the protocol layer. */ mtip_hw_exit(dd); @@ -4140,12 +4082,12 @@ static int mtip_block_shutdown(struct driver_data *dd) dev_info(&dd->pdev->dev, "Shutting down %s ...\n", dd->disk->disk_name); + del_gendisk(dd->disk); if (dd->disk->queue) { - del_gendisk(dd->disk); blk_cleanup_queue(dd->queue); blk_mq_free_tag_set(&dd->tags); - } else - put_disk(dd->disk); + } + put_disk(dd->disk); dd->disk = NULL; dd->queue = NULL; } @@ -4507,6 +4449,7 @@ static void mtip_pci_remove(struct pci_dev *pdev) "Completion workers still active!\n"); } + blk_mq_stop_hw_queues(dd->queue); /* Clean up the block layer. */ mtip_block_remove(dd); @@ -4524,10 +4467,7 @@ static void mtip_pci_remove(struct pci_dev *pdev) list_del_init(&dd->remove_list); spin_unlock_irqrestore(&dev_lock, flags); - if (!dd->sr) - kfree(dd); - else - set_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag); + kfree(dd); pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); pci_set_drvdata(pdev, NULL); diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index ba1b31ee22ec..3274784008eb 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -142,7 +142,6 @@ enum { MTIP_PF_SVC_THD_ACTIVE_BIT = 4, MTIP_PF_ISSUE_CMDS_BIT = 5, MTIP_PF_REBUILD_BIT = 6, - MTIP_PF_SR_CLEANUP_BIT = 7, MTIP_PF_SVC_THD_STOP_BIT = 8, /* below are bit numbers in 'dd_flag' defined in driver_data */ @@ -150,7 +149,6 @@ enum { MTIP_DDF_REMOVE_PENDING_BIT = 1, MTIP_DDF_OVER_TEMP_BIT = 2, MTIP_DDF_WRITE_PROTECT_BIT = 3, - MTIP_DDF_REMOVE_DONE_BIT = 4, MTIP_DDF_CLEANUP_BIT = 5, MTIP_DDF_RESUME_BIT = 6, MTIP_DDF_INIT_DONE_BIT = 7, @@ -412,19 +410,13 @@ struct mtip_port { * by the DMA when the driver issues internal commands. */ dma_addr_t sector_buffer_dma; - /* - * Bit significant, used to determine if a command slot has - * been allocated. i.e. the slot is in use. Bits are cleared - * when the command slot and all associated data structures - * are no longer needed. - */ + u16 *log_buf; dma_addr_t log_buf_dma; u8 *smart_buf; dma_addr_t smart_buf_dma; - unsigned long allocated[SLOTBITS_IN_LONGS]; /* * used to queue commands when an internal command is in progress * or error handling is active diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 83a7ba4a3eec..0e385d8e9b86 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -711,7 +711,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, bdev->bd_inode->i_size = 0; set_capacity(nbd->disk, 0); if (max_part > 0) - ioctl_by_bdev(bdev, BLKRRPART, 0); + blkdev_reread_part(bdev); if (nbd->disconnect) /* user requested, ignore socket errors */ return 0; return nbd->harderror; diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 65cd61a4145e..6f9b7534928e 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -243,6 +243,17 @@ static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) cmd = container_of(entry, struct nullb_cmd, ll_list); entry = entry->next; end_cmd(cmd); + + if (cmd->rq) { + struct request_queue *q = cmd->rq->q; + + if (!q->mq_ops && blk_queue_stopped(q)) { + spin_lock(q->queue_lock); + if (blk_queue_stopped(q)) + blk_start_queue(q); + spin_unlock(q->queue_lock); + } + } } while (entry); } @@ -257,7 +268,7 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd) if (llist_add(&cmd->ll_list, &cq->list)) { ktime_t kt = ktime_set(0, completion_nsec); - hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL); + hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL_PINNED); } put_cpu(); @@ -334,6 +345,7 @@ static int null_rq_prep_fn(struct request_queue *q, struct request *req) req->special = cmd; return BLKPREP_OK; } + blk_stop_queue(q); return BLKPREP_DEFER; } diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 683dff272562..e5112714188f 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -29,6 +29,7 @@ #include <linux/kdev_t.h> #include <linux/kthread.h> #include <linux/kernel.h> +#include <linux/list_sort.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/moduleparam.h> @@ -80,6 +81,7 @@ static wait_queue_head_t nvme_kthread_wait; static struct class *nvme_class; static void nvme_reset_failed_dev(struct work_struct *ws); +static int nvme_reset(struct nvme_dev *dev); static int nvme_process_cq(struct nvme_queue *nvmeq); struct async_cmd_info { @@ -102,6 +104,7 @@ struct nvme_queue { spinlock_t q_lock; struct nvme_command *sq_cmds; volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; dma_addr_t sq_dma_addr; dma_addr_t cq_dma_addr; u32 __iomem *q_db; @@ -114,7 +117,6 @@ struct nvme_queue { u8 cq_phase; u8 cqe_seen; struct async_cmd_info cmdinfo; - struct blk_mq_hw_ctx *hctx; }; /* @@ -182,9 +184,12 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, struct nvme_dev *dev = data; struct nvme_queue *nvmeq = dev->queues[0]; - WARN_ON(nvmeq->hctx); - nvmeq->hctx = hctx; + WARN_ON(hctx_idx != 0); + WARN_ON(dev->admin_tagset.tags[0] != hctx->tags); + WARN_ON(nvmeq->tags); + hctx->driver_data = nvmeq; + nvmeq->tags = &dev->admin_tagset.tags[0]; return 0; } @@ -201,27 +206,16 @@ static int nvme_admin_init_request(void *data, struct request *req, return 0; } -static void nvme_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) -{ - struct nvme_queue *nvmeq = hctx->driver_data; - - nvmeq->hctx = NULL; -} - static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { struct nvme_dev *dev = data; - struct nvme_queue *nvmeq = dev->queues[ - (hctx_idx % dev->queue_count) + 1]; - - if (!nvmeq->hctx) - nvmeq->hctx = hctx; + struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1]; - /* nvmeq queues are shared between namespaces. We assume here that - * blk-mq map the tags so they match up with the nvme queue tags. */ - WARN_ON(nvmeq->hctx->tags != hctx->tags); + if (!nvmeq->tags) + nvmeq->tags = &dev->tagset.tags[hctx_idx]; + WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags); hctx->driver_data = nvmeq; return 0; } @@ -307,9 +301,16 @@ static void async_req_completion(struct nvme_queue *nvmeq, void *ctx, if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ) ++nvmeq->dev->event_limit; - if (status == NVME_SC_SUCCESS) - dev_warn(nvmeq->q_dmadev, - "async event result %08x\n", result); + if (status != NVME_SC_SUCCESS) + return; + + switch (result & 0xff07) { + case NVME_AER_NOTICE_NS_CHANGED: + dev_info(nvmeq->q_dmadev, "rescanning\n"); + schedule_work(&nvmeq->dev->scan_work); + default: + dev_warn(nvmeq->q_dmadev, "async event result %08x\n", result); + } } static void abort_completion(struct nvme_queue *nvmeq, void *ctx, @@ -320,7 +321,7 @@ static void abort_completion(struct nvme_queue *nvmeq, void *ctx, u16 status = le16_to_cpup(&cqe->status) >> 1; u32 result = le32_to_cpup(&cqe->result); - blk_mq_free_hctx_request(nvmeq->hctx, req); + blk_mq_free_request(req); dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result); ++nvmeq->dev->abort_limit; @@ -333,14 +334,13 @@ static void async_completion(struct nvme_queue *nvmeq, void *ctx, cmdinfo->result = le32_to_cpup(&cqe->result); cmdinfo->status = le16_to_cpup(&cqe->status) >> 1; queue_kthread_work(cmdinfo->worker, &cmdinfo->work); - blk_mq_free_hctx_request(nvmeq->hctx, cmdinfo->req); + blk_mq_free_request(cmdinfo->req); } static inline struct nvme_cmd_info *get_cmd_from_tag(struct nvme_queue *nvmeq, unsigned int tag) { - struct blk_mq_hw_ctx *hctx = nvmeq->hctx; - struct request *req = blk_mq_tag_to_rq(hctx->tags, tag); + struct request *req = blk_mq_tag_to_rq(*nvmeq->tags, tag); return blk_mq_rq_to_pdu(req); } @@ -445,7 +445,7 @@ static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev, (unsigned long) rq, gfp); } -void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) +static void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) { const int last_prp = dev->page_size / 8 - 1; int i; @@ -605,22 +605,30 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, spin_unlock_irqrestore(req->q->queue_lock, flags); return; } - req->errors = nvme_error_status(status); + if (req->cmd_type == REQ_TYPE_DRV_PRIV) { + req->errors = status; + } else { + req->errors = nvme_error_status(status); + } } else req->errors = 0; + if (req->cmd_type == REQ_TYPE_DRV_PRIV) { + u32 result = le32_to_cpup(&cqe->result); + req->special = (void *)(uintptr_t)result; + } if (cmd_rq->aborted) - dev_warn(&nvmeq->dev->pci_dev->dev, + dev_warn(nvmeq->dev->dev, "completing aborted command with status:%04x\n", status); if (iod->nents) { - dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg, iod->nents, + dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents, rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); if (blk_integrity_rq(req)) { if (!rq_data_dir(req)) nvme_dif_remap(req, nvme_dif_complete); - dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->meta_sg, 1, + dma_unmap_sg(nvmeq->dev->dev, iod->meta_sg, 1, rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); } } @@ -630,8 +638,8 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, } /* length is in bytes. gfp flags indicates whether we may sleep. */ -int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len, - gfp_t gfp) +static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, + int total_len, gfp_t gfp) { struct dma_pool *pool; int length = total_len; @@ -709,6 +717,23 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len, return total_len; } +static void nvme_submit_priv(struct nvme_queue *nvmeq, struct request *req, + struct nvme_iod *iod) +{ + struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; + + memcpy(cmnd, req->cmd, sizeof(struct nvme_command)); + cmnd->rw.command_id = req->tag; + if (req->nr_phys_segments) { + cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); + cmnd->rw.prp2 = cpu_to_le64(iod->first_dma); + } + + if (++nvmeq->sq_tail == nvmeq->q_depth) + nvmeq->sq_tail = 0; + writel(nvmeq->sq_tail, nvmeq->q_db); +} + /* * We reuse the small pool to allocate the 16-byte range here as it is not * worth having a special pool for these or additional cases to handle freeing @@ -807,11 +832,15 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod, return 0; } +/* + * NOTE: ns is NULL when called on the admin queue. + */ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct nvme_ns *ns = hctx->queue->queuedata; struct nvme_queue *nvmeq = hctx->driver_data; + struct nvme_dev *dev = nvmeq->dev; struct request *req = bd->rq; struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); struct nvme_iod *iod; @@ -822,15 +851,16 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, * unless this namespace is formated such that the metadata can be * stripped/generated by the controller with PRACT=1. */ - if (ns->ms && !blk_integrity_rq(req)) { - if (!(ns->pi_type && ns->ms == 8)) { + if (ns && ns->ms && !blk_integrity_rq(req)) { + if (!(ns->pi_type && ns->ms == 8) && + req->cmd_type != REQ_TYPE_DRV_PRIV) { req->errors = -EFAULT; blk_mq_complete_request(req); return BLK_MQ_RQ_QUEUE_OK; } } - iod = nvme_alloc_iod(req, ns->dev, GFP_ATOMIC); + iod = nvme_alloc_iod(req, dev, GFP_ATOMIC); if (!iod) return BLK_MQ_RQ_QUEUE_BUSY; @@ -841,8 +871,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, * as it is not worth having a special pool for these or * additional cases to handle freeing the iod. */ - range = dma_pool_alloc(nvmeq->dev->prp_small_pool, - GFP_ATOMIC, + range = dma_pool_alloc(dev->prp_small_pool, GFP_ATOMIC, &iod->first_dma); if (!range) goto retry_cmd; @@ -860,9 +889,8 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, goto retry_cmd; if (blk_rq_bytes(req) != - nvme_setup_prps(nvmeq->dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) { - dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg, - iod->nents, dma_dir); + nvme_setup_prps(dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) { + dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); goto retry_cmd; } if (blk_integrity_rq(req)) { @@ -884,7 +912,9 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, nvme_set_info(cmd, iod, req_completion); spin_lock_irq(&nvmeq->q_lock); - if (req->cmd_flags & REQ_DISCARD) + if (req->cmd_type == REQ_TYPE_DRV_PRIV) + nvme_submit_priv(nvmeq, req, iod); + else if (req->cmd_flags & REQ_DISCARD) nvme_submit_discard(nvmeq, ns, req, iod); else if (req->cmd_flags & REQ_FLUSH) nvme_submit_flush(nvmeq, ns, req->tag); @@ -896,10 +926,10 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_MQ_RQ_QUEUE_OK; error_cmd: - nvme_free_iod(nvmeq->dev, iod); + nvme_free_iod(dev, iod); return BLK_MQ_RQ_QUEUE_ERROR; retry_cmd: - nvme_free_iod(nvmeq->dev, iod); + nvme_free_iod(dev, iod); return BLK_MQ_RQ_QUEUE_BUSY; } @@ -942,15 +972,6 @@ static int nvme_process_cq(struct nvme_queue *nvmeq) return 1; } -/* Admin queue isn't initialized as a request queue. If at some point this - * happens anyway, make sure to notify the user */ -static int nvme_admin_queue_rq(struct blk_mq_hw_ctx *hctx, - const struct blk_mq_queue_data *bd) -{ - WARN_ON_ONCE(1); - return BLK_MQ_RQ_QUEUE_ERROR; -} - static irqreturn_t nvme_irq(int irq, void *data) { irqreturn_t result; @@ -972,46 +993,61 @@ static irqreturn_t nvme_irq_check(int irq, void *data) return IRQ_WAKE_THREAD; } -struct sync_cmd_info { - struct task_struct *task; - u32 result; - int status; -}; - -static void sync_completion(struct nvme_queue *nvmeq, void *ctx, - struct nvme_completion *cqe) -{ - struct sync_cmd_info *cmdinfo = ctx; - cmdinfo->result = le32_to_cpup(&cqe->result); - cmdinfo->status = le16_to_cpup(&cqe->status) >> 1; - wake_up_process(cmdinfo->task); -} - /* * Returns 0 on success. If the result is negative, it's a Linux error code; * if the result is positive, it's an NVM Express status code */ -static int nvme_submit_sync_cmd(struct request *req, struct nvme_command *cmd, - u32 *result, unsigned timeout) +int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, + void *buffer, void __user *ubuffer, unsigned bufflen, + u32 *result, unsigned timeout) { - struct sync_cmd_info cmdinfo; - struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req); - struct nvme_queue *nvmeq = cmd_rq->nvmeq; + bool write = cmd->common.opcode & 1; + struct bio *bio = NULL; + struct request *req; + int ret; - cmdinfo.task = current; - cmdinfo.status = -EINTR; + req = blk_mq_alloc_request(q, write, GFP_KERNEL, false); + if (IS_ERR(req)) + return PTR_ERR(req); - cmd->common.command_id = req->tag; + req->cmd_type = REQ_TYPE_DRV_PRIV; + req->cmd_flags |= REQ_FAILFAST_DRIVER; + req->__data_len = 0; + req->__sector = (sector_t) -1; + req->bio = req->biotail = NULL; + + req->timeout = timeout ? timeout : ADMIN_TIMEOUT; - nvme_set_info(cmd_rq, &cmdinfo, sync_completion); + req->cmd = (unsigned char *)cmd; + req->cmd_len = sizeof(struct nvme_command); + req->special = (void *)0; - set_current_state(TASK_UNINTERRUPTIBLE); - nvme_submit_cmd(nvmeq, cmd); - schedule(); + if (buffer && bufflen) { + ret = blk_rq_map_kern(q, req, buffer, bufflen, __GFP_WAIT); + if (ret) + goto out; + } else if (ubuffer && bufflen) { + ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, __GFP_WAIT); + if (ret) + goto out; + bio = req->bio; + } + blk_execute_rq(req->q, NULL, req, 0); + if (bio) + blk_rq_unmap_user(bio); if (result) - *result = cmdinfo.result; - return cmdinfo.status; + *result = (u32)(uintptr_t)req->special; + ret = req->errors; + out: + blk_mq_free_request(req); + return ret; +} + +int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, + void *buffer, unsigned bufflen) +{ + return __nvme_submit_sync_cmd(q, cmd, buffer, NULL, bufflen, NULL, 0); } static int nvme_submit_async_admin_req(struct nvme_dev *dev) @@ -1033,7 +1069,7 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev) c.common.opcode = nvme_admin_async_event; c.common.command_id = req->tag; - blk_mq_free_hctx_request(nvmeq->hctx, req); + blk_mq_free_request(req); return __nvme_submit_cmd(nvmeq, &c); } @@ -1060,41 +1096,6 @@ static int nvme_submit_admin_async_cmd(struct nvme_dev *dev, return nvme_submit_cmd(nvmeq, cmd); } -static int __nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, - u32 *result, unsigned timeout) -{ - int res; - struct request *req; - - req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_KERNEL, false); - if (IS_ERR(req)) - return PTR_ERR(req); - res = nvme_submit_sync_cmd(req, cmd, result, timeout); - blk_mq_free_request(req); - return res; -} - -int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, - u32 *result) -{ - return __nvme_submit_admin_cmd(dev, cmd, result, ADMIN_TIMEOUT); -} - -int nvme_submit_io_cmd(struct nvme_dev *dev, struct nvme_ns *ns, - struct nvme_command *cmd, u32 *result) -{ - int res; - struct request *req; - - req = blk_mq_alloc_request(ns->queue, WRITE, (GFP_KERNEL|__GFP_WAIT), - false); - if (IS_ERR(req)) - return PTR_ERR(req); - res = nvme_submit_sync_cmd(req, cmd, result, NVME_IO_TIMEOUT); - blk_mq_free_request(req); - return res; -} - static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) { struct nvme_command c; @@ -1103,7 +1104,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) c.delete_queue.opcode = opcode; c.delete_queue.qid = cpu_to_le16(id); - return nvme_submit_admin_cmd(dev, &c, NULL); + return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0); } static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, @@ -1112,6 +1113,10 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, struct nvme_command c; int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED; + /* + * Note: we (ab)use the fact the the prp fields survive if no data + * is attached to the request. + */ memset(&c, 0, sizeof(c)); c.create_cq.opcode = nvme_admin_create_cq; c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr); @@ -1120,7 +1125,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, c.create_cq.cq_flags = cpu_to_le16(flags); c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector); - return nvme_submit_admin_cmd(dev, &c, NULL); + return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0); } static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, @@ -1129,6 +1134,10 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, struct nvme_command c; int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM; + /* + * Note: we (ab)use the fact the the prp fields survive if no data + * is attached to the request. + */ memset(&c, 0, sizeof(c)); c.create_sq.opcode = nvme_admin_create_sq; c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr); @@ -1137,7 +1146,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, c.create_sq.sq_flags = cpu_to_le16(flags); c.create_sq.cqid = cpu_to_le16(qid); - return nvme_submit_admin_cmd(dev, &c, NULL); + return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0); } static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid) @@ -1150,18 +1159,43 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid) return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid); } -int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns, - dma_addr_t dma_addr) +int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id) { - struct nvme_command c; + struct nvme_command c = { + .identify.opcode = nvme_admin_identify, + .identify.cns = cpu_to_le32(1), + }; + int error; - memset(&c, 0, sizeof(c)); - c.identify.opcode = nvme_admin_identify; - c.identify.nsid = cpu_to_le32(nsid); - c.identify.prp1 = cpu_to_le64(dma_addr); - c.identify.cns = cpu_to_le32(cns); + *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL); + if (!*id) + return -ENOMEM; - return nvme_submit_admin_cmd(dev, &c, NULL); + error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, + sizeof(struct nvme_id_ctrl)); + if (error) + kfree(*id); + return error; +} + +int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid, + struct nvme_id_ns **id) +{ + struct nvme_command c = { + .identify.opcode = nvme_admin_identify, + .identify.nsid = cpu_to_le32(nsid), + }; + int error; + + *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL); + if (!*id) + return -ENOMEM; + + error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, + sizeof(struct nvme_id_ns)); + if (error) + kfree(*id); + return error; } int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, @@ -1175,7 +1209,8 @@ int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, c.features.prp1 = cpu_to_le64(dma_addr); c.features.fid = cpu_to_le32(fid); - return nvme_submit_admin_cmd(dev, &c, result); + return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0, + result, 0); } int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, @@ -1189,7 +1224,30 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, c.features.fid = cpu_to_le32(fid); c.features.dword11 = cpu_to_le32(dword11); - return nvme_submit_admin_cmd(dev, &c, result); + return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0, + result, 0); +} + +int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log) +{ + struct nvme_command c = { + .common.opcode = nvme_admin_get_log_page, + .common.nsid = cpu_to_le32(0xFFFFFFFF), + .common.cdw10[0] = cpu_to_le32( + (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) | + NVME_LOG_SMART), + }; + int error; + + *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL); + if (!*log) + return -ENOMEM; + + error = nvme_submit_sync_cmd(dev->admin_q, &c, *log, + sizeof(struct nvme_smart_log)); + if (error) + kfree(*log); + return error; } /** @@ -1214,8 +1272,7 @@ static void nvme_abort_req(struct request *req) if (work_busy(&dev->reset_work)) goto out; list_del_init(&dev->node); - dev_warn(&dev->pci_dev->dev, - "I/O %d QID %d timeout, reset controller\n", + dev_warn(dev->dev, "I/O %d QID %d timeout, reset controller\n", req->tag, nvmeq->qid); dev->reset_workfn = nvme_reset_failed_dev; queue_work(nvme_workq, &dev->reset_work); @@ -1254,8 +1311,7 @@ static void nvme_abort_req(struct request *req) } } -static void nvme_cancel_queue_ios(struct blk_mq_hw_ctx *hctx, - struct request *req, void *data, bool reserved) +static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved) { struct nvme_queue *nvmeq = data; void *ctx; @@ -1352,11 +1408,9 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) static void nvme_clear_queue(struct nvme_queue *nvmeq) { - struct blk_mq_hw_ctx *hctx = nvmeq->hctx; - spin_lock_irq(&nvmeq->q_lock); - if (hctx && hctx->tags) - blk_mq_tag_busy_iter(hctx, nvme_cancel_queue_ios, nvmeq); + if (nvmeq->tags && *nvmeq->tags) + blk_mq_all_tag_busy_iter(*nvmeq->tags, nvme_cancel_queue_ios, nvmeq); spin_unlock_irq(&nvmeq->q_lock); } @@ -1384,22 +1438,21 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid) static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) { - struct device *dmadev = &dev->pci_dev->dev; struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL); if (!nvmeq) return NULL; - nvmeq->cqes = dma_zalloc_coherent(dmadev, CQ_SIZE(depth), + nvmeq->cqes = dma_zalloc_coherent(dev->dev, CQ_SIZE(depth), &nvmeq->cq_dma_addr, GFP_KERNEL); if (!nvmeq->cqes) goto free_nvmeq; - nvmeq->sq_cmds = dma_alloc_coherent(dmadev, SQ_SIZE(depth), + nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth), &nvmeq->sq_dma_addr, GFP_KERNEL); if (!nvmeq->sq_cmds) goto free_cqdma; - nvmeq->q_dmadev = dmadev; + nvmeq->q_dmadev = dev->dev; nvmeq->dev = dev; snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d", dev->instance, qid); @@ -1409,13 +1462,16 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; nvmeq->q_depth = depth; nvmeq->qid = qid; - dev->queue_count++; dev->queues[qid] = nvmeq; + /* make sure queue descriptor is set before queue count, for kthread */ + mb(); + dev->queue_count++; + return nvmeq; free_cqdma: - dma_free_coherent(dmadev, CQ_SIZE(depth), (void *)nvmeq->cqes, + dma_free_coherent(dev->dev, CQ_SIZE(depth), (void *)nvmeq->cqes, nvmeq->cq_dma_addr); free_nvmeq: kfree(nvmeq); @@ -1487,7 +1543,7 @@ static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled) if (fatal_signal_pending(current)) return -EINTR; if (time_after(jiffies, timeout)) { - dev_err(&dev->pci_dev->dev, + dev_err(dev->dev, "Device not ready; aborting %s\n", enabled ? "initialisation" : "reset"); return -ENODEV; @@ -1537,7 +1593,7 @@ static int nvme_shutdown_ctrl(struct nvme_dev *dev) if (fatal_signal_pending(current)) return -EINTR; if (time_after(jiffies, timeout)) { - dev_err(&dev->pci_dev->dev, + dev_err(dev->dev, "Device shutdown incomplete; abort shutdown\n"); return -ENODEV; } @@ -1547,10 +1603,9 @@ static int nvme_shutdown_ctrl(struct nvme_dev *dev) } static struct blk_mq_ops nvme_mq_admin_ops = { - .queue_rq = nvme_admin_queue_rq, + .queue_rq = nvme_queue_rq, .map_queue = blk_mq_map_queue, .init_hctx = nvme_admin_init_hctx, - .exit_hctx = nvme_exit_hctx, .init_request = nvme_admin_init_request, .timeout = nvme_timeout, }; @@ -1559,7 +1614,6 @@ static struct blk_mq_ops nvme_mq_ops = { .queue_rq = nvme_queue_rq, .map_queue = blk_mq_map_queue, .init_hctx = nvme_init_hctx, - .exit_hctx = nvme_exit_hctx, .init_request = nvme_init_request, .timeout = nvme_timeout, }; @@ -1580,7 +1634,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1; dev->admin_tagset.reserved_tags = 1; dev->admin_tagset.timeout = ADMIN_TIMEOUT; - dev->admin_tagset.numa_node = dev_to_node(&dev->pci_dev->dev); + dev->admin_tagset.numa_node = dev_to_node(dev->dev); dev->admin_tagset.cmd_size = nvme_cmd_size(dev); dev->admin_tagset.driver_data = dev; @@ -1613,14 +1667,14 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12; if (page_shift < dev_page_min) { - dev_err(&dev->pci_dev->dev, + dev_err(dev->dev, "Minimum device page size (%u) too large for " "host (%u)\n", 1 << dev_page_min, 1 << page_shift); return -ENODEV; } if (page_shift > dev_page_max) { - dev_info(&dev->pci_dev->dev, + dev_info(dev->dev, "Device maximum page size (%u) smaller than " "host (%u); enabling work-around\n", 1 << dev_page_max, 1 << page_shift); @@ -1668,126 +1722,43 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) return result; } -struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, - unsigned long addr, unsigned length) -{ - int i, err, count, nents, offset; - struct scatterlist *sg; - struct page **pages; - struct nvme_iod *iod; - - if (addr & 3) - return ERR_PTR(-EINVAL); - if (!length || length > INT_MAX - PAGE_SIZE) - return ERR_PTR(-EINVAL); - - offset = offset_in_page(addr); - count = DIV_ROUND_UP(offset + length, PAGE_SIZE); - pages = kcalloc(count, sizeof(*pages), GFP_KERNEL); - if (!pages) - return ERR_PTR(-ENOMEM); - - err = get_user_pages_fast(addr, count, 1, pages); - if (err < count) { - count = err; - err = -EFAULT; - goto put_pages; - } - - err = -ENOMEM; - iod = __nvme_alloc_iod(count, length, dev, 0, GFP_KERNEL); - if (!iod) - goto put_pages; - - sg = iod->sg; - sg_init_table(sg, count); - for (i = 0; i < count; i++) { - sg_set_page(&sg[i], pages[i], - min_t(unsigned, length, PAGE_SIZE - offset), - offset); - length -= (PAGE_SIZE - offset); - offset = 0; - } - sg_mark_end(&sg[i - 1]); - iod->nents = count; - - nents = dma_map_sg(&dev->pci_dev->dev, sg, count, - write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (!nents) - goto free_iod; - - kfree(pages); - return iod; - - free_iod: - kfree(iod); - put_pages: - for (i = 0; i < count; i++) - put_page(pages[i]); - kfree(pages); - return ERR_PTR(err); -} - -void nvme_unmap_user_pages(struct nvme_dev *dev, int write, - struct nvme_iod *iod) -{ - int i; - - dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents, - write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - - for (i = 0; i < iod->nents; i++) - put_page(sg_page(&iod->sg[i])); -} - static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) { struct nvme_dev *dev = ns->dev; struct nvme_user_io io; struct nvme_command c; - unsigned length, meta_len, prp_len; + unsigned length, meta_len; int status, write; - struct nvme_iod *iod; dma_addr_t meta_dma = 0; void *meta = NULL; void __user *metadata; if (copy_from_user(&io, uio, sizeof(io))) return -EFAULT; - length = (io.nblocks + 1) << ns->lba_shift; - meta_len = (io.nblocks + 1) * ns->ms; - - if (meta_len && ((io.metadata & 3) || !io.metadata) && !ns->ext) - return -EINVAL; - else if (meta_len && ns->ext) { - length += meta_len; - meta_len = 0; - } - - metadata = (void __user *)(unsigned long)io.metadata; - - write = io.opcode & 1; switch (io.opcode) { case nvme_cmd_write: case nvme_cmd_read: case nvme_cmd_compare: - iod = nvme_map_user_pages(dev, write, io.addr, length); break; default: return -EINVAL; } - if (IS_ERR(iod)) - return PTR_ERR(iod); + length = (io.nblocks + 1) << ns->lba_shift; + meta_len = (io.nblocks + 1) * ns->ms; + metadata = (void __user *)(unsigned long)io.metadata; + write = io.opcode & 1; - prp_len = nvme_setup_prps(dev, iod, length, GFP_KERNEL); - if (length != prp_len) { - status = -ENOMEM; - goto unmap; + if (ns->ext) { + length += meta_len; + meta_len = 0; } if (meta_len) { - meta = dma_alloc_coherent(&dev->pci_dev->dev, meta_len, + if (((io.metadata & 3) || !io.metadata) && !ns->ext) + return -EINVAL; + + meta = dma_alloc_coherent(dev->dev, meta_len, &meta_dma, GFP_KERNEL); if (!meta) { @@ -1813,19 +1784,17 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) c.rw.reftag = cpu_to_le32(io.reftag); c.rw.apptag = cpu_to_le16(io.apptag); c.rw.appmask = cpu_to_le16(io.appmask); - c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); - c.rw.prp2 = cpu_to_le64(iod->first_dma); c.rw.metadata = cpu_to_le64(meta_dma); - status = nvme_submit_io_cmd(dev, ns, &c, NULL); + + status = __nvme_submit_sync_cmd(ns->queue, &c, NULL, + (void __user *)io.addr, length, NULL, 0); unmap: - nvme_unmap_user_pages(dev, write, iod); - nvme_free_iod(dev, iod); if (meta) { if (status == NVME_SC_SUCCESS && !write) { if (copy_to_user(metadata, meta, meta_len)) status = -EFAULT; } - dma_free_coherent(&dev->pci_dev->dev, meta_len, meta, meta_dma); + dma_free_coherent(dev->dev, meta_len, meta, meta_dma); } return status; } @@ -1835,9 +1804,8 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns, { struct nvme_passthru_cmd cmd; struct nvme_command c; - int status, length; - struct nvme_iod *uninitialized_var(iod); - unsigned timeout; + unsigned timeout = 0; + int status; if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -1857,46 +1825,17 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns, c.common.cdw10[4] = cpu_to_le32(cmd.cdw14); c.common.cdw10[5] = cpu_to_le32(cmd.cdw15); - length = cmd.data_len; - if (cmd.data_len) { - iod = nvme_map_user_pages(dev, cmd.opcode & 1, cmd.addr, - length); - if (IS_ERR(iod)) - return PTR_ERR(iod); - length = nvme_setup_prps(dev, iod, length, GFP_KERNEL); - c.common.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); - c.common.prp2 = cpu_to_le64(iod->first_dma); - } - - timeout = cmd.timeout_ms ? msecs_to_jiffies(cmd.timeout_ms) : - ADMIN_TIMEOUT; - - if (length != cmd.data_len) - status = -ENOMEM; - else if (ns) { - struct request *req; - - req = blk_mq_alloc_request(ns->queue, WRITE, - (GFP_KERNEL|__GFP_WAIT), false); - if (IS_ERR(req)) - status = PTR_ERR(req); - else { - status = nvme_submit_sync_cmd(req, &c, &cmd.result, - timeout); - blk_mq_free_request(req); - } - } else - status = __nvme_submit_admin_cmd(dev, &c, &cmd.result, timeout); + if (cmd.timeout_ms) + timeout = msecs_to_jiffies(cmd.timeout_ms); - if (cmd.data_len) { - nvme_unmap_user_pages(dev, cmd.opcode & 1, iod); - nvme_free_iod(dev, iod); + status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c, + NULL, (void __user *)cmd.addr, cmd.data_len, + &cmd.result, timeout); + if (status >= 0) { + if (put_user(cmd.result, &ucmd->result)) + return -EFAULT; } - if ((status >= 0) && copy_to_user(&ucmd->result, &cmd.result, - sizeof(cmd.result))) - status = -EFAULT; - return status; } @@ -1988,23 +1927,18 @@ static int nvme_revalidate_disk(struct gendisk *disk) struct nvme_ns *ns = disk->private_data; struct nvme_dev *dev = ns->dev; struct nvme_id_ns *id; - dma_addr_t dma_addr; u8 lbaf, pi_type; u16 old_ms; unsigned short bs; - id = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr, - GFP_KERNEL); - if (!id) { - dev_warn(&dev->pci_dev->dev, "%s: Memory alocation failure\n", - __func__); - return 0; + if (nvme_identify_ns(dev, ns->ns_id, &id)) { + dev_warn(dev->dev, "%s: Identify failure nvme%dn%d\n", __func__, + dev->instance, ns->ns_id); + return -ENODEV; } - if (nvme_identify(dev, ns->ns_id, 0, dma_addr)) { - dev_warn(&dev->pci_dev->dev, - "identify failed ns:%d, setting capacity to 0\n", - ns->ns_id); - memset(id, 0, sizeof(*id)); + if (id->ncap == 0) { + kfree(id); + return -ENODEV; } old_ms = ns->ms; @@ -2038,7 +1972,7 @@ static int nvme_revalidate_disk(struct gendisk *disk) !ns->ext) nvme_init_integrity(ns); - if (id->ncap == 0 || (ns->ms && !blk_get_integrity(disk))) + if (ns->ms && !blk_get_integrity(disk)) set_capacity(disk, 0); else set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); @@ -2046,7 +1980,7 @@ static int nvme_revalidate_disk(struct gendisk *disk) if (dev->oncs & NVME_CTRL_ONCS_DSM) nvme_config_discard(ns); - dma_free_coherent(&dev->pci_dev->dev, 4096, id, dma_addr); + kfree(id); return 0; } @@ -2073,7 +2007,7 @@ static int nvme_kthread(void *data) if (work_busy(&dev->reset_work)) continue; list_del_init(&dev->node); - dev_warn(&dev->pci_dev->dev, + dev_warn(dev->dev, "Failed status: %x, reset controller\n", readl(&dev->bar->csts)); dev->reset_workfn = nvme_reset_failed_dev; @@ -2105,7 +2039,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid) { struct nvme_ns *ns; struct gendisk *disk; - int node = dev_to_node(&dev->pci_dev->dev); + int node = dev_to_node(dev->dev); ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); if (!ns) @@ -2153,11 +2087,16 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid) * requires it. */ set_capacity(disk, 0); - nvme_revalidate_disk(ns->disk); + if (nvme_revalidate_disk(ns->disk)) + goto out_free_disk; + add_disk(ns->disk); if (ns->ms) revalidate_disk(ns->disk); return; + out_free_disk: + kfree(disk); + list_del(&ns->list); out_free_queue: blk_cleanup_queue(ns->queue); out_free_ns: @@ -2188,8 +2127,7 @@ static int set_queue_count(struct nvme_dev *dev, int count) if (status < 0) return status; if (status > 0) { - dev_err(&dev->pci_dev->dev, "Could not set queue count (%d)\n", - status); + dev_err(dev->dev, "Could not set queue count (%d)\n", status); return 0; } return min(result & 0xffff, result >> 16) + 1; @@ -2203,7 +2141,7 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) static int nvme_setup_io_queues(struct nvme_dev *dev) { struct nvme_queue *adminq = dev->queues[0]; - struct pci_dev *pdev = dev->pci_dev; + struct pci_dev *pdev = to_pci_dev(dev->dev); int result, i, vecs, nr_io_queues, size; nr_io_queues = num_possible_cpus(); @@ -2275,6 +2213,99 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) return result; } +static void nvme_free_namespace(struct nvme_ns *ns) +{ + list_del(&ns->list); + + spin_lock(&dev_list_lock); + ns->disk->private_data = NULL; + spin_unlock(&dev_list_lock); + + put_disk(ns->disk); + kfree(ns); +} + +static int ns_cmp(void *priv, struct list_head *a, struct list_head *b) +{ + struct nvme_ns *nsa = container_of(a, struct nvme_ns, list); + struct nvme_ns *nsb = container_of(b, struct nvme_ns, list); + + return nsa->ns_id - nsb->ns_id; +} + +static struct nvme_ns *nvme_find_ns(struct nvme_dev *dev, unsigned nsid) +{ + struct nvme_ns *ns; + + list_for_each_entry(ns, &dev->namespaces, list) { + if (ns->ns_id == nsid) + return ns; + if (ns->ns_id > nsid) + break; + } + return NULL; +} + +static inline bool nvme_io_incapable(struct nvme_dev *dev) +{ + return (!dev->bar || readl(&dev->bar->csts) & NVME_CSTS_CFS || + dev->online_queues < 2); +} + +static void nvme_ns_remove(struct nvme_ns *ns) +{ + bool kill = nvme_io_incapable(ns->dev) && !blk_queue_dying(ns->queue); + + if (kill) + blk_set_queue_dying(ns->queue); + if (ns->disk->flags & GENHD_FL_UP) { + if (blk_get_integrity(ns->disk)) + blk_integrity_unregister(ns->disk); + del_gendisk(ns->disk); + } + if (kill || !blk_queue_dying(ns->queue)) { + blk_mq_abort_requeue_list(ns->queue); + blk_cleanup_queue(ns->queue); + } +} + +static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn) +{ + struct nvme_ns *ns, *next; + unsigned i; + + for (i = 1; i <= nn; i++) { + ns = nvme_find_ns(dev, i); + if (ns) { + if (revalidate_disk(ns->disk)) { + nvme_ns_remove(ns); + nvme_free_namespace(ns); + } + } else + nvme_alloc_ns(dev, i); + } + list_for_each_entry_safe(ns, next, &dev->namespaces, list) { + if (ns->ns_id > nn) { + nvme_ns_remove(ns); + nvme_free_namespace(ns); + } + } + list_sort(NULL, &dev->namespaces, ns_cmp); +} + +static void nvme_dev_scan(struct work_struct *work) +{ + struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work); + struct nvme_id_ctrl *ctrl; + + if (!dev->tagset.tags) + return; + if (nvme_identify_ctrl(dev, &ctrl)) + return; + nvme_scan_namespaces(dev, le32_to_cpup(&ctrl->nn)); + kfree(ctrl); +} + /* * Return: error value if an error occurred setting up the queues or calling * Identify Device. 0 if these succeeded, even if adding some of the @@ -2283,26 +2314,18 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) */ static int nvme_dev_add(struct nvme_dev *dev) { - struct pci_dev *pdev = dev->pci_dev; + struct pci_dev *pdev = to_pci_dev(dev->dev); int res; - unsigned nn, i; + unsigned nn; struct nvme_id_ctrl *ctrl; - void *mem; - dma_addr_t dma_addr; int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; - mem = dma_alloc_coherent(&pdev->dev, 4096, &dma_addr, GFP_KERNEL); - if (!mem) - return -ENOMEM; - - res = nvme_identify(dev, 0, 1, dma_addr); + res = nvme_identify_ctrl(dev, &ctrl); if (res) { - dev_err(&pdev->dev, "Identify Controller failed (%d)\n", res); - dma_free_coherent(&dev->pci_dev->dev, 4096, mem, dma_addr); + dev_err(dev->dev, "Identify Controller failed (%d)\n", res); return -EIO; } - ctrl = mem; nn = le32_to_cpup(&ctrl->nn); dev->oncs = le16_to_cpup(&ctrl->oncs); dev->abort_limit = ctrl->acl + 1; @@ -2324,12 +2347,12 @@ static int nvme_dev_add(struct nvme_dev *dev) } else dev->max_hw_sectors = max_hw_sectors; } - dma_free_coherent(&dev->pci_dev->dev, 4096, mem, dma_addr); + kfree(ctrl); dev->tagset.ops = &nvme_mq_ops; dev->tagset.nr_hw_queues = dev->online_queues - 1; dev->tagset.timeout = NVME_IO_TIMEOUT; - dev->tagset.numa_node = dev_to_node(&dev->pci_dev->dev); + dev->tagset.numa_node = dev_to_node(dev->dev); dev->tagset.queue_depth = min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; dev->tagset.cmd_size = nvme_cmd_size(dev); @@ -2339,9 +2362,7 @@ static int nvme_dev_add(struct nvme_dev *dev) if (blk_mq_alloc_tag_set(&dev->tagset)) return 0; - for (i = 1; i <= nn; i++) - nvme_alloc_ns(dev, i); - + schedule_work(&dev->scan_work); return 0; } @@ -2349,7 +2370,7 @@ static int nvme_dev_map(struct nvme_dev *dev) { u64 cap; int bars, result = -ENOMEM; - struct pci_dev *pdev = dev->pci_dev; + struct pci_dev *pdev = to_pci_dev(dev->dev); if (pci_enable_device_mem(pdev)) return result; @@ -2363,8 +2384,8 @@ static int nvme_dev_map(struct nvme_dev *dev) if (pci_request_selected_regions(pdev, bars, "nvme")) goto disable_pci; - if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) && - dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) + if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) && + dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32))) goto disable; dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); @@ -2405,19 +2426,21 @@ static int nvme_dev_map(struct nvme_dev *dev) static void nvme_dev_unmap(struct nvme_dev *dev) { - if (dev->pci_dev->msi_enabled) - pci_disable_msi(dev->pci_dev); - else if (dev->pci_dev->msix_enabled) - pci_disable_msix(dev->pci_dev); + struct pci_dev *pdev = to_pci_dev(dev->dev); + + if (pdev->msi_enabled) + pci_disable_msi(pdev); + else if (pdev->msix_enabled) + pci_disable_msix(pdev); if (dev->bar) { iounmap(dev->bar); dev->bar = NULL; - pci_release_regions(dev->pci_dev); + pci_release_regions(pdev); } - if (pci_is_enabled(dev->pci_dev)) - pci_disable_device(dev->pci_dev); + if (pci_is_enabled(pdev)) + pci_disable_device(pdev); } struct nvme_delq_ctx { @@ -2536,7 +2559,7 @@ static void nvme_disable_io_queues(struct nvme_dev *dev) &worker, "nvme%d", dev->instance); if (IS_ERR(kworker_task)) { - dev_err(&dev->pci_dev->dev, + dev_err(dev->dev, "Failed to create queue del task\n"); for (i = dev->queue_count - 1; i > 0; i--) nvme_disable_queue(dev, i); @@ -2587,9 +2610,9 @@ static void nvme_freeze_queues(struct nvme_dev *dev) list_for_each_entry(ns, &dev->namespaces, list) { blk_mq_freeze_queue_start(ns->queue); - spin_lock(ns->queue->queue_lock); + spin_lock_irq(ns->queue->queue_lock); queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue); - spin_unlock(ns->queue->queue_lock); + spin_unlock_irq(ns->queue->queue_lock); blk_mq_cancel_requeue_work(ns->queue); blk_mq_stop_hw_queues(ns->queue); @@ -2639,29 +2662,19 @@ static void nvme_dev_remove(struct nvme_dev *dev) { struct nvme_ns *ns; - list_for_each_entry(ns, &dev->namespaces, list) { - if (ns->disk->flags & GENHD_FL_UP) { - if (blk_get_integrity(ns->disk)) - blk_integrity_unregister(ns->disk); - del_gendisk(ns->disk); - } - if (!blk_queue_dying(ns->queue)) { - blk_mq_abort_requeue_list(ns->queue); - blk_cleanup_queue(ns->queue); - } - } + list_for_each_entry(ns, &dev->namespaces, list) + nvme_ns_remove(ns); } static int nvme_setup_prp_pools(struct nvme_dev *dev) { - struct device *dmadev = &dev->pci_dev->dev; - dev->prp_page_pool = dma_pool_create("prp list page", dmadev, + dev->prp_page_pool = dma_pool_create("prp list page", dev->dev, PAGE_SIZE, PAGE_SIZE, 0); if (!dev->prp_page_pool) return -ENOMEM; /* Optimisation for I/Os between 4k and 128k */ - dev->prp_small_pool = dma_pool_create("prp list 256", dmadev, + dev->prp_small_pool = dma_pool_create("prp list 256", dev->dev, 256, 256, 0); if (!dev->prp_small_pool) { dma_pool_destroy(dev->prp_page_pool); @@ -2709,23 +2722,15 @@ static void nvme_free_namespaces(struct nvme_dev *dev) { struct nvme_ns *ns, *next; - list_for_each_entry_safe(ns, next, &dev->namespaces, list) { - list_del(&ns->list); - - spin_lock(&dev_list_lock); - ns->disk->private_data = NULL; - spin_unlock(&dev_list_lock); - - put_disk(ns->disk); - kfree(ns); - } + list_for_each_entry_safe(ns, next, &dev->namespaces, list) + nvme_free_namespace(ns); } static void nvme_free_dev(struct kref *kref) { struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref); - pci_dev_put(dev->pci_dev); + put_device(dev->dev); put_device(dev->device); nvme_free_namespaces(dev); nvme_release_instance(dev); @@ -2781,6 +2786,9 @@ static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -ENOTTY; ns = list_first_entry(&dev->namespaces, struct nvme_ns, list); return nvme_user_cmd(dev, ns, (void __user *)arg); + case NVME_IOCTL_RESET: + dev_warn(dev->dev, "resetting controller\n"); + return nvme_reset(dev); default: return -ENOTTY; } @@ -2802,11 +2810,11 @@ static void nvme_set_irq_hints(struct nvme_dev *dev) for (i = 0; i < dev->online_queues; i++) { nvmeq = dev->queues[i]; - if (!nvmeq->hctx) + if (!nvmeq->tags || !(*nvmeq->tags)) continue; irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector, - nvmeq->hctx->cpumask); + blk_mq_tags_cpumask(*nvmeq->tags)); } } @@ -2869,7 +2877,7 @@ static int nvme_dev_start(struct nvme_dev *dev) static int nvme_remove_dead_ctrl(void *arg) { struct nvme_dev *dev = (struct nvme_dev *)arg; - struct pci_dev *pdev = dev->pci_dev; + struct pci_dev *pdev = to_pci_dev(dev->dev); if (pci_get_drvdata(pdev)) pci_stop_and_remove_bus_device_locked(pdev); @@ -2899,6 +2907,7 @@ static int nvme_dev_resume(struct nvme_dev *dev) spin_unlock(&dev_list_lock); } else { nvme_unfreeze_queues(dev); + schedule_work(&dev->scan_work); nvme_set_irq_hints(dev); } return 0; @@ -2908,11 +2917,11 @@ static void nvme_dev_reset(struct nvme_dev *dev) { nvme_dev_shutdown(dev); if (nvme_dev_resume(dev)) { - dev_warn(&dev->pci_dev->dev, "Device failed to resume\n"); + dev_warn(dev->dev, "Device failed to resume\n"); kref_get(&dev->kref); if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d", dev->instance))) { - dev_err(&dev->pci_dev->dev, + dev_err(dev->dev, "Failed to start controller remove task\n"); kref_put(&dev->kref, nvme_free_dev); } @@ -2931,6 +2940,44 @@ static void nvme_reset_workfn(struct work_struct *work) dev->reset_workfn(work); } +static int nvme_reset(struct nvme_dev *dev) +{ + int ret = -EBUSY; + + if (!dev->admin_q || blk_queue_dying(dev->admin_q)) + return -ENODEV; + + spin_lock(&dev_list_lock); + if (!work_pending(&dev->reset_work)) { + dev->reset_workfn = nvme_reset_failed_dev; + queue_work(nvme_workq, &dev->reset_work); + ret = 0; + } + spin_unlock(&dev_list_lock); + + if (!ret) { + flush_work(&dev->reset_work); + return 0; + } + + return ret; +} + +static ssize_t nvme_sysfs_reset(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct nvme_dev *ndev = dev_get_drvdata(dev); + int ret; + + ret = nvme_reset(ndev); + if (ret < 0) + return ret; + + return count; +} +static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset); + static void nvme_async_probe(struct work_struct *work); static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -2956,7 +3003,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) INIT_LIST_HEAD(&dev->namespaces); dev->reset_workfn = nvme_reset_failed_dev; INIT_WORK(&dev->reset_work, nvme_reset_workfn); - dev->pci_dev = pci_dev_get(pdev); + dev->dev = get_device(&pdev->dev); pci_set_drvdata(pdev, dev); result = nvme_set_instance(dev); if (result) @@ -2975,18 +3022,27 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto release_pools; } get_device(dev->device); + dev_set_drvdata(dev->device, dev); + + result = device_create_file(dev->device, &dev_attr_reset_controller); + if (result) + goto put_dev; INIT_LIST_HEAD(&dev->node); + INIT_WORK(&dev->scan_work, nvme_dev_scan); INIT_WORK(&dev->probe_work, nvme_async_probe); schedule_work(&dev->probe_work); return 0; + put_dev: + device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance)); + put_device(dev->device); release_pools: nvme_release_prp_pools(dev); release: nvme_release_instance(dev); put_pci: - pci_dev_put(dev->pci_dev); + put_device(dev->dev); free: kfree(dev->queues); kfree(dev->entry); @@ -3011,10 +3067,12 @@ static void nvme_async_probe(struct work_struct *work) nvme_set_irq_hints(dev); return; reset: + spin_lock(&dev_list_lock); if (!work_busy(&dev->reset_work)) { dev->reset_workfn = nvme_reset_failed_dev; queue_work(nvme_workq, &dev->reset_work); } + spin_unlock(&dev_list_lock); } static void nvme_reset_notify(struct pci_dev *pdev, bool prepare) @@ -3044,6 +3102,8 @@ static void nvme_remove(struct pci_dev *pdev) pci_set_drvdata(pdev, NULL); flush_work(&dev->probe_work); flush_work(&dev->reset_work); + flush_work(&dev->scan_work); + device_remove_file(dev->device, &dev_attr_reset_controller); nvme_dev_shutdown(dev); nvme_dev_remove(dev); nvme_dev_remove_admin(dev); diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 44f2514fb775..e5a63f06fb0f 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -41,15 +41,13 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/types.h> +#include <asm/unaligned.h> #include <scsi/sg.h> #include <scsi/scsi.h> static int sg_version_num = 30534; /* 2 digits for each component */ -#define SNTI_TRANSLATION_SUCCESS 0 -#define SNTI_INTERNAL_ERROR 1 - /* VPD Page Codes */ #define VPD_SUPPORTED_PAGES 0x00 #define VPD_SERIAL_NUMBER 0x80 @@ -58,49 +56,14 @@ static int sg_version_num = 30534; /* 2 digits for each component */ #define VPD_BLOCK_LIMITS 0xB0 #define VPD_BLOCK_DEV_CHARACTERISTICS 0xB1 -/* CDB offsets */ -#define REPORT_LUNS_CDB_ALLOC_LENGTH_OFFSET 6 -#define REPORT_LUNS_SR_OFFSET 2 -#define READ_CAP_16_CDB_ALLOC_LENGTH_OFFSET 10 -#define REQUEST_SENSE_CDB_ALLOC_LENGTH_OFFSET 4 -#define REQUEST_SENSE_DESC_OFFSET 1 -#define REQUEST_SENSE_DESC_MASK 0x01 -#define DESCRIPTOR_FORMAT_SENSE_DATA_TYPE 1 -#define INQUIRY_EVPD_BYTE_OFFSET 1 -#define INQUIRY_PAGE_CODE_BYTE_OFFSET 2 -#define INQUIRY_EVPD_BIT_MASK 1 -#define INQUIRY_CDB_ALLOCATION_LENGTH_OFFSET 3 -#define START_STOP_UNIT_CDB_IMMED_OFFSET 1 -#define START_STOP_UNIT_CDB_IMMED_MASK 0x1 -#define START_STOP_UNIT_CDB_POWER_COND_MOD_OFFSET 3 -#define START_STOP_UNIT_CDB_POWER_COND_MOD_MASK 0xF -#define START_STOP_UNIT_CDB_POWER_COND_OFFSET 4 -#define START_STOP_UNIT_CDB_POWER_COND_MASK 0xF0 -#define START_STOP_UNIT_CDB_NO_FLUSH_OFFSET 4 -#define START_STOP_UNIT_CDB_NO_FLUSH_MASK 0x4 -#define START_STOP_UNIT_CDB_START_OFFSET 4 -#define START_STOP_UNIT_CDB_START_MASK 0x1 -#define WRITE_BUFFER_CDB_MODE_OFFSET 1 -#define WRITE_BUFFER_CDB_MODE_MASK 0x1F -#define WRITE_BUFFER_CDB_BUFFER_ID_OFFSET 2 -#define WRITE_BUFFER_CDB_BUFFER_OFFSET_OFFSET 3 -#define WRITE_BUFFER_CDB_PARM_LIST_LENGTH_OFFSET 6 -#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_OFFSET 1 -#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_MASK 0xC0 -#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_SHIFT 6 -#define FORMAT_UNIT_CDB_LONG_LIST_OFFSET 1 -#define FORMAT_UNIT_CDB_LONG_LIST_MASK 0x20 -#define FORMAT_UNIT_CDB_FORMAT_DATA_OFFSET 1 -#define FORMAT_UNIT_CDB_FORMAT_DATA_MASK 0x10 +/* format unit paramter list offsets */ #define FORMAT_UNIT_SHORT_PARM_LIST_LEN 4 #define FORMAT_UNIT_LONG_PARM_LIST_LEN 8 #define FORMAT_UNIT_PROT_INT_OFFSET 3 #define FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET 0 #define FORMAT_UNIT_PROT_FIELD_USAGE_MASK 0x07 -#define UNMAP_CDB_PARAM_LIST_LENGTH_OFFSET 7 /* Misc. defines */ -#define NIBBLE_SHIFT 4 #define FIXED_SENSE_DATA 0x70 #define DESC_FORMAT_SENSE_DATA 0x72 #define FIXED_SENSE_DATA_ADD_LENGTH 10 @@ -144,27 +107,6 @@ static int sg_version_num = 30534; /* 2 digits for each component */ #define EXTENDED_INQUIRY_DATA_PAGE_LENGTH 0x3C #define RESERVED_FIELD 0 -/* SCSI READ/WRITE Defines */ -#define IO_CDB_WP_MASK 0xE0 -#define IO_CDB_WP_SHIFT 5 -#define IO_CDB_FUA_MASK 0x8 -#define IO_6_CDB_LBA_OFFSET 0 -#define IO_6_CDB_LBA_MASK 0x001FFFFF -#define IO_6_CDB_TX_LEN_OFFSET 4 -#define IO_6_DEFAULT_TX_LEN 256 -#define IO_10_CDB_LBA_OFFSET 2 -#define IO_10_CDB_TX_LEN_OFFSET 7 -#define IO_10_CDB_WP_OFFSET 1 -#define IO_10_CDB_FUA_OFFSET 1 -#define IO_12_CDB_LBA_OFFSET 2 -#define IO_12_CDB_TX_LEN_OFFSET 6 -#define IO_12_CDB_WP_OFFSET 1 -#define IO_12_CDB_FUA_OFFSET 1 -#define IO_16_CDB_FUA_OFFSET 1 -#define IO_16_CDB_WP_OFFSET 1 -#define IO_16_CDB_LBA_OFFSET 2 -#define IO_16_CDB_TX_LEN_OFFSET 10 - /* Mode Sense/Select defines */ #define MODE_PAGE_INFO_EXCEP 0x1C #define MODE_PAGE_CACHING 0x08 @@ -179,23 +121,14 @@ static int sg_version_num = 30534; /* 2 digits for each component */ #define MODE_PAGE_INF_EXC_LEN 0x0C #define MODE_PAGE_ALL_LEN 0x54 #define MODE_SENSE6_MPH_SIZE 4 -#define MODE_SENSE6_ALLOC_LEN_OFFSET 4 -#define MODE_SENSE_PAGE_CONTROL_OFFSET 2 #define MODE_SENSE_PAGE_CONTROL_MASK 0xC0 #define MODE_SENSE_PAGE_CODE_OFFSET 2 #define MODE_SENSE_PAGE_CODE_MASK 0x3F -#define MODE_SENSE_LLBAA_OFFSET 1 #define MODE_SENSE_LLBAA_MASK 0x10 #define MODE_SENSE_LLBAA_SHIFT 4 -#define MODE_SENSE_DBD_OFFSET 1 #define MODE_SENSE_DBD_MASK 8 #define MODE_SENSE_DBD_SHIFT 3 #define MODE_SENSE10_MPH_SIZE 8 -#define MODE_SENSE10_ALLOC_LEN_OFFSET 7 -#define MODE_SELECT_CDB_PAGE_FORMAT_OFFSET 1 -#define MODE_SELECT_CDB_SAVE_PAGES_OFFSET 1 -#define MODE_SELECT_6_CDB_PARAM_LIST_LENGTH_OFFSET 4 -#define MODE_SELECT_10_CDB_PARAM_LIST_LENGTH_OFFSET 7 #define MODE_SELECT_CDB_PAGE_FORMAT_MASK 0x10 #define MODE_SELECT_CDB_SAVE_PAGES_MASK 0x1 #define MODE_SELECT_6_BD_OFFSET 3 @@ -221,14 +154,11 @@ static int sg_version_num = 30534; /* 2 digits for each component */ #define LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH 0x07 #define LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE 0x2F #define LOG_PAGE_TEMPERATURE_PAGE 0x0D -#define LOG_SENSE_CDB_SP_OFFSET 1 #define LOG_SENSE_CDB_SP_NOT_ENABLED 0 -#define LOG_SENSE_CDB_PC_OFFSET 2 #define LOG_SENSE_CDB_PC_MASK 0xC0 #define LOG_SENSE_CDB_PC_SHIFT 6 #define LOG_SENSE_CDB_PC_CUMULATIVE_VALUES 1 #define LOG_SENSE_CDB_PAGE_CODE_MASK 0x3F -#define LOG_SENSE_CDB_ALLOC_LENGTH_OFFSET 7 #define REMAINING_INFO_EXCP_PAGE_LENGTH 0x8 #define LOG_INFO_EXCP_PAGE_LENGTH 0xC #define REMAINING_TEMP_PAGE_LENGTH 0xC @@ -278,77 +208,11 @@ static int sg_version_num = 30534; /* 2 digits for each component */ #define SCSI_ASCQ_POWER_LOSS_EXPECTED 0x08 #define SCSI_ASCQ_INVALID_LUN_ID 0x09 -/** - * DEVICE_SPECIFIC_PARAMETER in mode parameter header (see sbc2r16) to - * enable DPOFUA support type 0x10 value. - */ -#define DEVICE_SPECIFIC_PARAMETER 0 -#define VPD_ID_DESCRIPTOR_LENGTH sizeof(VPD_IDENTIFICATION_DESCRIPTOR) - -/* MACROs to extract information from CDBs */ - -#define GET_OPCODE(cdb) cdb[0] - -#define GET_U8_FROM_CDB(cdb, index) (cdb[index] << 0) - -#define GET_U16_FROM_CDB(cdb, index) ((cdb[index] << 8) | (cdb[index + 1] << 0)) - -#define GET_U24_FROM_CDB(cdb, index) ((cdb[index] << 16) | \ -(cdb[index + 1] << 8) | \ -(cdb[index + 2] << 0)) - -#define GET_U32_FROM_CDB(cdb, index) ((cdb[index] << 24) | \ -(cdb[index + 1] << 16) | \ -(cdb[index + 2] << 8) | \ -(cdb[index + 3] << 0)) - -#define GET_U64_FROM_CDB(cdb, index) ((((u64)cdb[index]) << 56) | \ -(((u64)cdb[index + 1]) << 48) | \ -(((u64)cdb[index + 2]) << 40) | \ -(((u64)cdb[index + 3]) << 32) | \ -(((u64)cdb[index + 4]) << 24) | \ -(((u64)cdb[index + 5]) << 16) | \ -(((u64)cdb[index + 6]) << 8) | \ -(((u64)cdb[index + 7]) << 0)) - -/* Inquiry Helper Macros */ -#define GET_INQ_EVPD_BIT(cdb) \ -((GET_U8_FROM_CDB(cdb, INQUIRY_EVPD_BYTE_OFFSET) & \ -INQUIRY_EVPD_BIT_MASK) ? 1 : 0) - -#define GET_INQ_PAGE_CODE(cdb) \ -(GET_U8_FROM_CDB(cdb, INQUIRY_PAGE_CODE_BYTE_OFFSET)) - -#define GET_INQ_ALLOC_LENGTH(cdb) \ -(GET_U16_FROM_CDB(cdb, INQUIRY_CDB_ALLOCATION_LENGTH_OFFSET)) - -/* Report LUNs Helper Macros */ -#define GET_REPORT_LUNS_ALLOC_LENGTH(cdb) \ -(GET_U32_FROM_CDB(cdb, REPORT_LUNS_CDB_ALLOC_LENGTH_OFFSET)) - -/* Read Capacity Helper Macros */ -#define GET_READ_CAP_16_ALLOC_LENGTH(cdb) \ -(GET_U32_FROM_CDB(cdb, READ_CAP_16_CDB_ALLOC_LENGTH_OFFSET)) - -#define IS_READ_CAP_16(cdb) \ -((cdb[0] == SERVICE_ACTION_IN_16 && cdb[1] == SAI_READ_CAPACITY_16) ? 1 : 0) - -/* Request Sense Helper Macros */ -#define GET_REQUEST_SENSE_ALLOC_LENGTH(cdb) \ -(GET_U8_FROM_CDB(cdb, REQUEST_SENSE_CDB_ALLOC_LENGTH_OFFSET)) - -/* Mode Sense Helper Macros */ -#define GET_MODE_SENSE_DBD(cdb) \ -((GET_U8_FROM_CDB(cdb, MODE_SENSE_DBD_OFFSET) & MODE_SENSE_DBD_MASK) >> \ -MODE_SENSE_DBD_SHIFT) - -#define GET_MODE_SENSE_LLBAA(cdb) \ -((GET_U8_FROM_CDB(cdb, MODE_SENSE_LLBAA_OFFSET) & \ -MODE_SENSE_LLBAA_MASK) >> MODE_SENSE_LLBAA_SHIFT) - -#define GET_MODE_SENSE_MPH_SIZE(cdb10) \ -(cdb10 ? MODE_SENSE10_MPH_SIZE : MODE_SENSE6_MPH_SIZE) - +/* copied from drivers/usb/gadget/function/storage_common.h */ +static inline u32 get_unaligned_be24(u8 *buf) +{ + return 0xffffff & (u32) get_unaligned_be32(buf - 1); +} /* Struct to gather data that needs to be extracted from a SCSI CDB. Not conforming to any particular CDB variant, but compatible with all. */ @@ -369,8 +233,6 @@ struct nvme_trans_io_cdb { static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from, unsigned long n) { - int res = SNTI_TRANSLATION_SUCCESS; - unsigned long not_copied; int i; void *index = from; size_t remaining = n; @@ -380,29 +242,25 @@ static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from, struct sg_iovec sgl; for (i = 0; i < hdr->iovec_count; i++) { - not_copied = copy_from_user(&sgl, hdr->dxferp + + if (copy_from_user(&sgl, hdr->dxferp + i * sizeof(struct sg_iovec), - sizeof(struct sg_iovec)); - if (not_copied) + sizeof(struct sg_iovec))) return -EFAULT; xfer_len = min(remaining, sgl.iov_len); - not_copied = copy_to_user(sgl.iov_base, index, - xfer_len); - if (not_copied) { - res = -EFAULT; - break; - } + if (copy_to_user(sgl.iov_base, index, xfer_len)) + return -EFAULT; + index += xfer_len; remaining -= xfer_len; if (remaining == 0) break; } - return res; + return 0; } - not_copied = copy_to_user(hdr->dxferp, from, n); - if (not_copied) - res = -EFAULT; - return res; + + if (copy_to_user(hdr->dxferp, from, n)) + return -EFAULT; + return 0; } /* Copy data from userspace memory */ @@ -410,8 +268,6 @@ static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from, static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to, unsigned long n) { - int res = SNTI_TRANSLATION_SUCCESS; - unsigned long not_copied; int i; void *index = to; size_t remaining = n; @@ -421,30 +277,24 @@ static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to, struct sg_iovec sgl; for (i = 0; i < hdr->iovec_count; i++) { - not_copied = copy_from_user(&sgl, hdr->dxferp + + if (copy_from_user(&sgl, hdr->dxferp + i * sizeof(struct sg_iovec), - sizeof(struct sg_iovec)); - if (not_copied) + sizeof(struct sg_iovec))) return -EFAULT; xfer_len = min(remaining, sgl.iov_len); - not_copied = copy_from_user(index, sgl.iov_base, - xfer_len); - if (not_copied) { - res = -EFAULT; - break; - } + if (copy_from_user(index, sgl.iov_base, xfer_len)) + return -EFAULT; index += xfer_len; remaining -= xfer_len; if (remaining == 0) break; } - return res; + return 0; } - not_copied = copy_from_user(to, hdr->dxferp, n); - if (not_copied) - res = -EFAULT; - return res; + if (copy_from_user(to, hdr->dxferp, n)) + return -EFAULT; + return 0; } /* Status/Sense Buffer Writeback */ @@ -452,7 +302,6 @@ static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to, static int nvme_trans_completion(struct sg_io_hdr *hdr, u8 status, u8 sense_key, u8 asc, u8 ascq) { - int res = SNTI_TRANSLATION_SUCCESS; u8 xfer_len; u8 resp[DESC_FMT_SENSE_DATA_SIZE]; @@ -477,25 +326,29 @@ static int nvme_trans_completion(struct sg_io_hdr *hdr, u8 status, u8 sense_key, xfer_len = min_t(u8, hdr->mx_sb_len, DESC_FMT_SENSE_DATA_SIZE); hdr->sb_len_wr = xfer_len; if (copy_to_user(hdr->sbp, resp, xfer_len) > 0) - res = -EFAULT; + return -EFAULT; } - return res; + return 0; } +/* + * Take a status code from a lowlevel routine, and if it was a positive NVMe + * error code update the sense data based on it. In either case the passed + * in value is returned again, unless an -EFAULT from copy_to_user overrides + * it. + */ static int nvme_trans_status_code(struct sg_io_hdr *hdr, int nvme_sc) { u8 status, sense_key, asc, ascq; - int res = SNTI_TRANSLATION_SUCCESS; + int res; /* For non-nvme (Linux) errors, simply return the error code */ if (nvme_sc < 0) return nvme_sc; /* Mask DNR, More, and reserved fields */ - nvme_sc &= 0x7FF; - - switch (nvme_sc) { + switch (nvme_sc & 0x7FF) { /* Generic Command Status */ case NVME_SC_SUCCESS: status = SAM_STAT_GOOD; @@ -662,8 +515,7 @@ static int nvme_trans_status_code(struct sg_io_hdr *hdr, int nvme_sc) } res = nvme_trans_completion(hdr, status, sense_key, asc, ascq); - - return res; + return res ? res : nvme_sc; } /* INQUIRY Helper Functions */ @@ -673,10 +525,8 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, int alloc_len) { struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; struct nvme_id_ns *id_ns; - int res = SNTI_TRANSLATION_SUCCESS; + int res; int nvme_sc; int xfer_len; u8 resp_data_format = 0x02; @@ -684,31 +534,17 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, u8 cmdque = 0x01 << 1; u8 fw_offset = sizeof(dev->firmware_rev); - mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out_dma; - } - /* nvme ns identify - use DPS value for PROTECT field */ - nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); res = nvme_trans_status_code(hdr, nvme_sc); - /* - * If nvme_sc was -ve, res will be -ve here. - * If nvme_sc was +ve, the status would bace been translated, and res - * can only be 0 or -ve. - * - If 0 && nvme_sc > 0, then go into next if where res gets nvme_sc - * - If -ve, return because its a Linux error. - */ if (res) - goto out_free; - if (nvme_sc) { - res = nvme_sc; - goto out_free; - } - id_ns = mem; - (id_ns->dps) ? (protect = 0x01) : (protect = 0); + return res; + + if (id_ns->dps) + protect = 0x01; + else + protect = 0; + kfree(id_ns); memset(inq_response, 0, STANDARD_INQUIRY_LENGTH); inq_response[2] = VERSION_SPC_4; @@ -725,20 +561,13 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, strncpy(&inq_response[32], dev->firmware_rev + fw_offset, 4); xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH); - res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); - - out_free: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, - dma_addr); - out_dma: - return res; + return nvme_trans_copy_to_user(hdr, inq_response, xfer_len); } static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *inq_response, int alloc_len) { - int res = SNTI_TRANSLATION_SUCCESS; int xfer_len; memset(inq_response, 0, STANDARD_INQUIRY_LENGTH); @@ -752,9 +581,7 @@ static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns, inq_response[9] = INQ_BDEV_LIMITS_PAGE; xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH); - res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); - - return res; + return nvme_trans_copy_to_user(hdr, inq_response, xfer_len); } static int nvme_trans_unit_serial_page(struct nvme_ns *ns, @@ -762,7 +589,6 @@ static int nvme_trans_unit_serial_page(struct nvme_ns *ns, int alloc_len) { struct nvme_dev *dev = ns->dev; - int res = SNTI_TRANSLATION_SUCCESS; int xfer_len; memset(inq_response, 0, STANDARD_INQUIRY_LENGTH); @@ -771,53 +597,42 @@ static int nvme_trans_unit_serial_page(struct nvme_ns *ns, strncpy(&inq_response[4], dev->serial, INQ_SERIAL_NUMBER_LENGTH); xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH); - res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); - - return res; + return nvme_trans_copy_to_user(hdr, inq_response, xfer_len); } static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *inq_response, int alloc_len) { struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; - int res = SNTI_TRANSLATION_SUCCESS; + int res; int nvme_sc; int xfer_len; __be32 tmp_id = cpu_to_be32(ns->ns_id); - mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out_dma; - } - memset(inq_response, 0, alloc_len); inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE; /* Page Code */ if (readl(&dev->bar->vs) >= NVME_VS(1, 1)) { - struct nvme_id_ns *id_ns = mem; - void *eui = id_ns->eui64; - int len = sizeof(id_ns->eui64); + struct nvme_id_ns *id_ns; + void *eui; + int len; - nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_free; - if (nvme_sc) { - res = nvme_sc; - goto out_free; - } + return res; + eui = id_ns->eui64; + len = sizeof(id_ns->eui64); if (readl(&dev->bar->vs) >= NVME_VS(1, 2)) { if (bitmap_empty(eui, len * 8)) { eui = id_ns->nguid; len = sizeof(id_ns->nguid); } } - if (bitmap_empty(eui, len * 8)) + if (bitmap_empty(eui, len * 8)) { + kfree(id_ns); goto scsi_string; + } inq_response[3] = 4 + len; /* Page Length */ /* Designation Descriptor start */ @@ -826,14 +641,14 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, inq_response[6] = 0x00; /* Rsvd */ inq_response[7] = len; /* Designator Length */ memcpy(&inq_response[8], eui, len); + kfree(id_ns); } else { scsi_string: if (alloc_len < 72) { - res = nvme_trans_completion(hdr, + return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); - goto out_free; } inq_response[3] = 0x48; /* Page Length */ /* Designation Descriptor start */ @@ -842,30 +657,22 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, inq_response[6] = 0x00; /* Rsvd */ inq_response[7] = 0x44; /* Designator Length */ - sprintf(&inq_response[8], "%04x", dev->pci_dev->vendor); + sprintf(&inq_response[8], "%04x", to_pci_dev(dev->dev)->vendor); memcpy(&inq_response[12], dev->model, sizeof(dev->model)); sprintf(&inq_response[52], "%04x", tmp_id); memcpy(&inq_response[56], dev->serial, sizeof(dev->serial)); } xfer_len = alloc_len; - res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); - - out_free: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, - dma_addr); - out_dma: - return res; + return nvme_trans_copy_to_user(hdr, inq_response, xfer_len); } static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, int alloc_len) { u8 *inq_response; - int res = SNTI_TRANSLATION_SUCCESS; + int res; int nvme_sc; struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; struct nvme_id_ctrl *id_ctrl; struct nvme_id_ns *id_ns; int xfer_len; @@ -878,45 +685,32 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 luiclr = 0x01; inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL); - if (inq_response == NULL) { - res = -ENOMEM; - goto out_mem; - } - - mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out_dma; - } + if (inq_response == NULL) + return -ENOMEM; - /* nvme ns identify */ - nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_free; - if (nvme_sc) { - res = nvme_sc; - goto out_free; - } - id_ns = mem; - spt = spt_lut[(id_ns->dpc) & 0x07] << 3; - (id_ns->dps) ? (protect = 0x01) : (protect = 0); + goto out_free_inq; + + spt = spt_lut[id_ns->dpc & 0x07] << 3; + if (id_ns->dps) + protect = 0x01; + else + protect = 0; + kfree(id_ns); + grd_chk = protect << 2; app_chk = protect << 1; ref_chk = protect; - /* nvme controller identify */ - nvme_sc = nvme_identify(dev, 0, 1, dma_addr); + nvme_sc = nvme_identify_ctrl(dev, &id_ctrl); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_free; - if (nvme_sc) { - res = nvme_sc; - goto out_free; - } - id_ctrl = mem; + goto out_free_inq; + v_sup = id_ctrl->vwc; + kfree(id_ctrl); memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH); inq_response[1] = INQ_EXTENDED_INQUIRY_DATA_PAGE; /* Page Code */ @@ -932,12 +726,8 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH); res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); - out_free: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, - dma_addr); - out_dma: + out_free_inq: kfree(inq_response); - out_mem: return res; } @@ -965,7 +755,7 @@ static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, int alloc_len) { u8 *inq_response; - int res = SNTI_TRANSLATION_SUCCESS; + int res; int xfer_len; inq_response = kzalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL); @@ -994,7 +784,7 @@ static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr, int alloc_len) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; int xfer_len; u8 *log_response; @@ -1022,47 +812,30 @@ static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr, static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, struct sg_io_hdr *hdr, int alloc_len) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; int xfer_len; u8 *log_response; - struct nvme_command c; struct nvme_dev *dev = ns->dev; struct nvme_smart_log *smart_log; - dma_addr_t dma_addr; - void *mem; u8 temp_c; u16 temp_k; log_response = kzalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL); - if (log_response == NULL) { - res = -ENOMEM; - goto out_mem; - } + if (log_response == NULL) + return -ENOMEM; - mem = dma_alloc_coherent(&dev->pci_dev->dev, - sizeof(struct nvme_smart_log), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out_dma; - } + res = nvme_get_log_page(dev, &smart_log); + if (res < 0) + goto out_free_response; - /* Get SMART Log Page */ - memset(&c, 0, sizeof(c)); - c.common.opcode = nvme_admin_get_log_page; - c.common.nsid = cpu_to_le32(0xFFFFFFFF); - c.common.prp1 = cpu_to_le64(dma_addr); - c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) / - BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART); - res = nvme_submit_admin_cmd(dev, &c, NULL); if (res != NVME_SC_SUCCESS) { temp_c = LOG_TEMP_UNKNOWN; } else { - smart_log = mem; temp_k = (smart_log->temperature[1] << 8) + (smart_log->temperature[0]); temp_c = temp_k - KELVIN_TEMP_FACTOR; } + kfree(smart_log); log_response[0] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE; /* Subpage=0x00, Page Length MSB=0 */ @@ -1078,59 +851,39 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, xfer_len = min(alloc_len, LOG_INFO_EXCP_PAGE_LENGTH); res = nvme_trans_copy_to_user(hdr, log_response, xfer_len); - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_smart_log), - mem, dma_addr); - out_dma: + out_free_response: kfree(log_response); - out_mem: return res; } static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, int alloc_len) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; int xfer_len; u8 *log_response; - struct nvme_command c; struct nvme_dev *dev = ns->dev; struct nvme_smart_log *smart_log; - dma_addr_t dma_addr; - void *mem; u32 feature_resp; u8 temp_c_cur, temp_c_thresh; u16 temp_k; log_response = kzalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL); - if (log_response == NULL) { - res = -ENOMEM; - goto out_mem; - } + if (log_response == NULL) + return -ENOMEM; - mem = dma_alloc_coherent(&dev->pci_dev->dev, - sizeof(struct nvme_smart_log), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out_dma; - } + res = nvme_get_log_page(dev, &smart_log); + if (res < 0) + goto out_free_response; - /* Get SMART Log Page */ - memset(&c, 0, sizeof(c)); - c.common.opcode = nvme_admin_get_log_page; - c.common.nsid = cpu_to_le32(0xFFFFFFFF); - c.common.prp1 = cpu_to_le64(dma_addr); - c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) / - BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART); - res = nvme_submit_admin_cmd(dev, &c, NULL); if (res != NVME_SC_SUCCESS) { temp_c_cur = LOG_TEMP_UNKNOWN; } else { - smart_log = mem; temp_k = (smart_log->temperature[1] << 8) + (smart_log->temperature[0]); temp_c_cur = temp_k - KELVIN_TEMP_FACTOR; } + kfree(smart_log); /* Get Features for Temp Threshold */ res = nvme_get_features(dev, NVME_FEAT_TEMP_THRESH, 0, 0, @@ -1159,11 +912,8 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, xfer_len = min(alloc_len, LOG_TEMP_PAGE_LENGTH); res = nvme_trans_copy_to_user(hdr, log_response, xfer_len); - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_smart_log), - mem, dma_addr); - out_dma: + out_free_response: kfree(log_response); - out_mem: return res; } @@ -1174,59 +924,45 @@ static int nvme_trans_fill_mode_parm_hdr(u8 *resp, int len, u8 cdb10, u8 llbaa, { /* Quick check to make sure I don't stomp on my own memory... */ if ((cdb10 && len < 8) || (!cdb10 && len < 4)) - return SNTI_INTERNAL_ERROR; + return -EINVAL; if (cdb10) { resp[0] = (mode_data_length & 0xFF00) >> 8; resp[1] = (mode_data_length & 0x00FF); - /* resp[2] and [3] are zero */ + resp[3] = 0x10 /* DPOFUA */; resp[4] = llbaa; resp[5] = RESERVED_FIELD; resp[6] = (blk_desc_len & 0xFF00) >> 8; resp[7] = (blk_desc_len & 0x00FF); } else { resp[0] = (mode_data_length & 0x00FF); - /* resp[1] and [2] are zero */ + resp[2] = 0x10 /* DPOFUA */; resp[3] = (blk_desc_len & 0x00FF); } - return SNTI_TRANSLATION_SUCCESS; + return 0; } static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *resp, int len, u8 llbaa) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; int nvme_sc; struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; struct nvme_id_ns *id_ns; u8 flbas; u32 lba_length; if (llbaa == 0 && len < MODE_PAGE_BLK_DES_LEN) - return SNTI_INTERNAL_ERROR; + return -EINVAL; else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN) - return SNTI_INTERNAL_ERROR; - - mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out; - } + return -EINVAL; - /* nvme ns identify */ - nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_dma; - if (nvme_sc) { - res = nvme_sc; - goto out_dma; - } - id_ns = mem; + return res; + flbas = (id_ns->flbas) & 0x0F; lba_length = (1 << (id_ns->lbaf[flbas].ds)); @@ -1246,10 +982,7 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr, memcpy(&resp[12], &tmp_len, sizeof(u32)); } - out_dma: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, - dma_addr); - out: + kfree(id_ns); return res; } @@ -1258,7 +991,7 @@ static int nvme_trans_fill_control_page(struct nvme_ns *ns, int len) { if (len < MODE_PAGE_CONTROL_LEN) - return SNTI_INTERNAL_ERROR; + return -EINVAL; resp[0] = MODE_PAGE_CONTROL; resp[1] = MODE_PAGE_CONTROL_LEN_FIELD; @@ -1272,78 +1005,69 @@ static int nvme_trans_fill_control_page(struct nvme_ns *ns, resp[9] = 0xFF; /* Bytes 10,11: Extended selftest completion time = 0x0000 */ - return SNTI_TRANSLATION_SUCCESS; + return 0; } static int nvme_trans_fill_caching_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *resp, int len) { - int res = SNTI_TRANSLATION_SUCCESS; + int res = 0; int nvme_sc; struct nvme_dev *dev = ns->dev; u32 feature_resp; u8 vwc; if (len < MODE_PAGE_CACHING_LEN) - return SNTI_INTERNAL_ERROR; + return -EINVAL; nvme_sc = nvme_get_features(dev, NVME_FEAT_VOLATILE_WC, 0, 0, &feature_resp); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out; - if (nvme_sc) { - res = nvme_sc; - goto out; - } + return res; + vwc = feature_resp & 0x00000001; resp[0] = MODE_PAGE_CACHING; resp[1] = MODE_PAGE_CACHING_LEN_FIELD; resp[2] = vwc << 2; - - out: - return res; + return 0; } static int nvme_trans_fill_pow_cnd_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *resp, int len) { - int res = SNTI_TRANSLATION_SUCCESS; - if (len < MODE_PAGE_POW_CND_LEN) - return SNTI_INTERNAL_ERROR; + return -EINVAL; resp[0] = MODE_PAGE_POWER_CONDITION; resp[1] = MODE_PAGE_POW_CND_LEN_FIELD; /* All other bytes are zero */ - return res; + return 0; } static int nvme_trans_fill_inf_exc_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *resp, int len) { - int res = SNTI_TRANSLATION_SUCCESS; - if (len < MODE_PAGE_INF_EXC_LEN) - return SNTI_INTERNAL_ERROR; + return -EINVAL; resp[0] = MODE_PAGE_INFO_EXCEP; resp[1] = MODE_PAGE_INF_EXC_LEN_FIELD; resp[2] = 0x88; /* All other bytes are zero */ - return res; + return 0; } static int nvme_trans_fill_all_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *resp, int len) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; u16 mode_pages_offset_1 = 0; u16 mode_pages_offset_2, mode_pages_offset_3, mode_pages_offset_4; @@ -1353,23 +1077,18 @@ static int nvme_trans_fill_all_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_trans_fill_caching_page(ns, hdr, &resp[mode_pages_offset_1], MODE_PAGE_CACHING_LEN); - if (res != SNTI_TRANSLATION_SUCCESS) - goto out; + if (res) + return res; res = nvme_trans_fill_control_page(ns, hdr, &resp[mode_pages_offset_2], MODE_PAGE_CONTROL_LEN); - if (res != SNTI_TRANSLATION_SUCCESS) - goto out; + if (res) + return res; res = nvme_trans_fill_pow_cnd_page(ns, hdr, &resp[mode_pages_offset_3], MODE_PAGE_POW_CND_LEN); - if (res != SNTI_TRANSLATION_SUCCESS) - goto out; - res = nvme_trans_fill_inf_exc_page(ns, hdr, &resp[mode_pages_offset_4], + if (res) + return res; + return nvme_trans_fill_inf_exc_page(ns, hdr, &resp[mode_pages_offset_4], MODE_PAGE_INF_EXC_LEN); - if (res != SNTI_TRANSLATION_SUCCESS) - goto out; - - out: - return res; } static inline int nvme_trans_get_blk_desc_len(u8 dbd, u8 llbaa) @@ -1390,7 +1109,7 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *, int), u16 mode_pages_tot_len) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; int xfer_len; u8 *response; u8 dbd, llbaa; @@ -1399,9 +1118,10 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns, u16 mode_pages_offset_1; u16 blk_desc_len, blk_desc_offset, mode_data_length; - dbd = GET_MODE_SENSE_DBD(cmd); - llbaa = GET_MODE_SENSE_LLBAA(cmd); - mph_size = GET_MODE_SENSE_MPH_SIZE(cdb10); + dbd = (cmd[1] & MODE_SENSE_DBD_MASK) >> MODE_SENSE_DBD_SHIFT; + llbaa = (cmd[1] & MODE_SENSE_LLBAA_MASK) >> MODE_SENSE_LLBAA_SHIFT; + mph_size = cdb10 ? MODE_SENSE10_MPH_SIZE : MODE_SENSE6_MPH_SIZE; + blk_desc_len = nvme_trans_get_blk_desc_len(dbd, llbaa); resp_size = mph_size + blk_desc_len + mode_pages_tot_len; @@ -1419,18 +1139,18 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns, res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10, llbaa, mode_data_length, blk_desc_len); - if (res != SNTI_TRANSLATION_SUCCESS) + if (res) goto out_free; if (blk_desc_len > 0) { res = nvme_trans_fill_blk_desc(ns, hdr, &response[blk_desc_offset], blk_desc_len, llbaa); - if (res != SNTI_TRANSLATION_SUCCESS) + if (res) goto out_free; } res = mode_page_fill_func(ns, hdr, &response[mode_pages_offset_1], mode_pages_tot_len); - if (res != SNTI_TRANSLATION_SUCCESS) + if (res) goto out_free; xfer_len = min(alloc_len, resp_size); @@ -1485,33 +1205,20 @@ static void nvme_trans_fill_read_cap(u8 *response, struct nvme_id_ns *id_ns, static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 pc, u8 pcmod, u8 start) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; int nvme_sc; struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; struct nvme_id_ctrl *id_ctrl; int lowest_pow_st; /* max npss = lowest power consumption */ unsigned ps_desired = 0; - /* NVMe Controller Identify */ - mem = dma_alloc_coherent(&dev->pci_dev->dev, - sizeof(struct nvme_id_ctrl), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out; - } - nvme_sc = nvme_identify(dev, 0, 1, dma_addr); + nvme_sc = nvme_identify_ctrl(dev, &id_ctrl); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_dma; - if (nvme_sc) { - res = nvme_sc; - goto out_dma; - } - id_ctrl = mem; + return res; + lowest_pow_st = max(POWER_STATE_0, (int)(id_ctrl->npss - 1)); + kfree(id_ctrl); switch (pc) { case NVME_POWER_STATE_START_VALID: @@ -1551,79 +1258,48 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, } nvme_sc = nvme_set_features(dev, NVME_FEAT_POWER_MGMT, ps_desired, 0, NULL); - res = nvme_trans_status_code(hdr, nvme_sc); - if (res) - goto out_dma; - if (nvme_sc) - res = nvme_sc; - out_dma: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ctrl), mem, - dma_addr); - out: - return res; + return nvme_trans_status_code(hdr, nvme_sc); } -/* Write Buffer Helper Functions */ -/* Also using this for Format Unit with hdr passed as NULL, and buffer_id, 0 */ +static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 buffer_id) +{ + struct nvme_command c; + int nvme_sc; -static int nvme_trans_send_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, + memset(&c, 0, sizeof(c)); + c.common.opcode = nvme_admin_activate_fw; + c.common.cdw10[0] = cpu_to_le32(buffer_id | NVME_FWACT_REPL_ACTV); + + nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0); + return nvme_trans_status_code(hdr, nvme_sc); +} + +static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 opcode, u32 tot_len, u32 offset, u8 buffer_id) { - int res = SNTI_TRANSLATION_SUCCESS; int nvme_sc; struct nvme_dev *dev = ns->dev; struct nvme_command c; - struct nvme_iod *iod = NULL; - unsigned length; - memset(&c, 0, sizeof(c)); - c.common.opcode = opcode; - if (opcode == nvme_admin_download_fw) { - if (hdr->iovec_count > 0) { - /* Assuming SGL is not allowed for this command */ - res = nvme_trans_completion(hdr, - SAM_STAT_CHECK_CONDITION, - ILLEGAL_REQUEST, - SCSI_ASC_INVALID_CDB, - SCSI_ASCQ_CAUSE_NOT_REPORTABLE); - goto out; - } - iod = nvme_map_user_pages(dev, DMA_TO_DEVICE, - (unsigned long)hdr->dxferp, tot_len); - if (IS_ERR(iod)) { - res = PTR_ERR(iod); - goto out; - } - length = nvme_setup_prps(dev, iod, tot_len, GFP_KERNEL); - if (length != tot_len) { - res = -ENOMEM; - goto out_unmap; - } - - c.dlfw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); - c.dlfw.prp2 = cpu_to_le64(iod->first_dma); - c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1); - c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS); - } else if (opcode == nvme_admin_activate_fw) { - u32 cdw10 = buffer_id | NVME_FWACT_REPL_ACTV; - c.common.cdw10[0] = cpu_to_le32(cdw10); + if (hdr->iovec_count > 0) { + /* Assuming SGL is not allowed for this command */ + return nvme_trans_completion(hdr, + SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, + SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); } - nvme_sc = nvme_submit_admin_cmd(dev, &c, NULL); - res = nvme_trans_status_code(hdr, nvme_sc); - if (res) - goto out_unmap; - if (nvme_sc) - res = nvme_sc; - - out_unmap: - if (opcode == nvme_admin_download_fw) { - nvme_unmap_user_pages(dev, DMA_TO_DEVICE, iod); - nvme_free_iod(dev, iod); - } - out: - return res; + memset(&c, 0, sizeof(c)); + c.common.opcode = nvme_admin_download_fw; + c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1); + c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS); + + nvme_sc = __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, + hdr->dxferp, tot_len, NULL, 0); + return nvme_trans_status_code(hdr, nvme_sc); } /* Mode Select Helper Functions */ @@ -1686,7 +1362,7 @@ static void nvme_trans_modesel_save_bd(struct nvme_ns *ns, u8 *parm_list, static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *mode_page, u8 page_code) { - int res = SNTI_TRANSLATION_SUCCESS; + int res = 0; int nvme_sc; struct nvme_dev *dev = ns->dev; unsigned dword11; @@ -1697,12 +1373,6 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr, nvme_sc = nvme_set_features(dev, NVME_FEAT_VOLATILE_WC, dword11, 0, NULL); res = nvme_trans_status_code(hdr, nvme_sc); - if (res) - break; - if (nvme_sc) { - res = nvme_sc; - break; - } break; case MODE_PAGE_CONTROL: break; @@ -1714,8 +1384,6 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr, ILLEGAL_REQUEST, SCSI_ASC_INVALID_PARAMETER, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); - if (!res) - res = SNTI_INTERNAL_ERROR; break; } break; @@ -1723,8 +1391,6 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); - if (!res) - res = SNTI_INTERNAL_ERROR; break; } @@ -1735,7 +1401,7 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd, u16 parm_list_len, u8 pf, u8 sp, u8 cdb10) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; u8 *parm_list; u16 bd_len; u8 llbaa = 0; @@ -1751,7 +1417,7 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr, } res = nvme_trans_copy_from_user(hdr, parm_list, parm_list_len); - if (res != SNTI_TRANSLATION_SUCCESS) + if (res) goto out_mem; nvme_trans_modesel_get_bd_len(parm_list, cdb10, &bd_len, &llbaa); @@ -1789,7 +1455,7 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr, mp_size = parm_list[index + 1] + 2; res = nvme_trans_modesel_get_mp(ns, hdr, &parm_list[index], page_code); - if (res != SNTI_TRANSLATION_SUCCESS) + if (res) break; index += mp_size; } while (index < parm_list_len); @@ -1805,12 +1471,9 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr, static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns, struct sg_io_hdr *hdr) { - int res = SNTI_TRANSLATION_SUCCESS; + int res = 0; int nvme_sc; struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; - struct nvme_id_ns *id_ns; u8 flbas; /* @@ -1821,22 +1484,12 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns, */ if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) { - mem = dma_alloc_coherent(&dev->pci_dev->dev, - sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out; - } - /* nvme ns identify */ - nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + struct nvme_id_ns *id_ns; + + nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_dma; - if (nvme_sc) { - res = nvme_sc; - goto out_dma; - } - id_ns = mem; + return res; if (ns->mode_select_num_blocks == 0) ns->mode_select_num_blocks = le64_to_cpu(id_ns->ncap); @@ -1845,18 +1498,17 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns, ns->mode_select_block_len = (1 << (id_ns->lbaf[flbas].ds)); } - out_dma: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), - mem, dma_addr); + + kfree(id_ns); } - out: - return res; + + return 0; } static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len, u8 format_prot_info, u8 *nvme_pf_code) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; u8 *parm_list; u8 pf_usage, pf_code; @@ -1866,7 +1518,7 @@ static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len, goto out; } res = nvme_trans_copy_from_user(hdr, parm_list, len); - if (res != SNTI_TRANSLATION_SUCCESS) + if (res) goto out_mem; if ((parm_list[FORMAT_UNIT_IMMED_OFFSET] & @@ -1916,11 +1568,9 @@ static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len, static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 prot_info) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; int nvme_sc; struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; struct nvme_id_ns *id_ns; u8 i; u8 flbas, nlbaf; @@ -1929,22 +1579,11 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, struct nvme_command c; /* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */ - mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out; - } - /* nvme ns identify */ - nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_dma; - if (nvme_sc) { - res = nvme_sc; - goto out_dma; - } - id_ns = mem; + return res; + flbas = (id_ns->flbas) & 0x0F; nlbaf = id_ns->nlbaf; @@ -1972,69 +1611,13 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, c.format.nsid = cpu_to_le32(ns->ns_id); c.format.cdw10 = cpu_to_le32(cdw10); - nvme_sc = nvme_submit_admin_cmd(dev, &c, NULL); + nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0); res = nvme_trans_status_code(hdr, nvme_sc); - if (res) - goto out_dma; - if (nvme_sc) - res = nvme_sc; - out_dma: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, - dma_addr); - out: + kfree(id_ns); return res; } -/* Read/Write Helper Functions */ - -static inline void nvme_trans_get_io_cdb6(u8 *cmd, - struct nvme_trans_io_cdb *cdb_info) -{ - cdb_info->fua = 0; - cdb_info->prot_info = 0; - cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_6_CDB_LBA_OFFSET) & - IO_6_CDB_LBA_MASK; - cdb_info->xfer_len = GET_U8_FROM_CDB(cmd, IO_6_CDB_TX_LEN_OFFSET); - - /* sbc3r27 sec 5.32 - TRANSFER LEN of 0 implies a 256 Block transfer */ - if (cdb_info->xfer_len == 0) - cdb_info->xfer_len = IO_6_DEFAULT_TX_LEN; -} - -static inline void nvme_trans_get_io_cdb10(u8 *cmd, - struct nvme_trans_io_cdb *cdb_info) -{ - cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_10_CDB_FUA_OFFSET) & - IO_CDB_FUA_MASK; - cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_10_CDB_WP_OFFSET) & - IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT; - cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_10_CDB_LBA_OFFSET); - cdb_info->xfer_len = GET_U16_FROM_CDB(cmd, IO_10_CDB_TX_LEN_OFFSET); -} - -static inline void nvme_trans_get_io_cdb12(u8 *cmd, - struct nvme_trans_io_cdb *cdb_info) -{ - cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_12_CDB_FUA_OFFSET) & - IO_CDB_FUA_MASK; - cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_12_CDB_WP_OFFSET) & - IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT; - cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_12_CDB_LBA_OFFSET); - cdb_info->xfer_len = GET_U32_FROM_CDB(cmd, IO_12_CDB_TX_LEN_OFFSET); -} - -static inline void nvme_trans_get_io_cdb16(u8 *cmd, - struct nvme_trans_io_cdb *cdb_info) -{ - cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_16_CDB_FUA_OFFSET) & - IO_CDB_FUA_MASK; - cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_16_CDB_WP_OFFSET) & - IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT; - cdb_info->lba = GET_U64_FROM_CDB(cmd, IO_16_CDB_LBA_OFFSET); - cdb_info->xfer_len = GET_U32_FROM_CDB(cmd, IO_16_CDB_TX_LEN_OFFSET); -} - static inline u32 nvme_trans_io_get_num_cmds(struct sg_io_hdr *hdr, struct nvme_trans_io_cdb *cdb_info, u32 max_blocks) @@ -2064,11 +1647,8 @@ static u16 nvme_trans_io_get_control(struct nvme_ns *ns, static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, struct nvme_trans_io_cdb *cdb_info, u8 is_write) { - int res = SNTI_TRANSLATION_SUCCESS; - int nvme_sc; - struct nvme_dev *dev = ns->dev; + int nvme_sc = NVME_SC_SUCCESS; u32 num_cmds; - struct nvme_iod *iod; u64 unit_len; u64 unit_num_blocks; /* Number of blocks to xfer in each nvme cmd */ u32 retcode; @@ -2119,45 +1699,20 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, control = nvme_trans_io_get_control(ns, cdb_info); c.rw.control = cpu_to_le16(control); - iod = nvme_map_user_pages(dev, - (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, - (unsigned long)next_mapping_addr, unit_len); - if (IS_ERR(iod)) { - res = PTR_ERR(iod); - goto out; - } - retcode = nvme_setup_prps(dev, iod, unit_len, GFP_KERNEL); - if (retcode != unit_len) { - nvme_unmap_user_pages(dev, - (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, - iod); - nvme_free_iod(dev, iod); - res = -ENOMEM; - goto out; + if (get_capacity(ns->disk) - unit_num_blocks < + cdb_info->lba + nvme_offset) { + nvme_sc = NVME_SC_LBA_RANGE; + break; } - c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); - c.rw.prp2 = cpu_to_le64(iod->first_dma); + nvme_sc = __nvme_submit_sync_cmd(ns->queue, &c, NULL, + next_mapping_addr, unit_len, NULL, 0); + if (nvme_sc) + break; nvme_offset += unit_num_blocks; - - nvme_sc = nvme_submit_io_cmd(dev, ns, &c, NULL); - if (nvme_sc != NVME_SC_SUCCESS) { - nvme_unmap_user_pages(dev, - (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, - iod); - nvme_free_iod(dev, iod); - res = nvme_trans_status_code(hdr, nvme_sc); - goto out; - } - nvme_unmap_user_pages(dev, - (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, - iod); - nvme_free_iod(dev, iod); } - res = nvme_trans_status_code(hdr, NVME_SC_SUCCESS); - out: - return res; + return nvme_trans_status_code(hdr, nvme_sc); } @@ -2166,8 +1721,8 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write, u8 *cmd) { - int res = SNTI_TRANSLATION_SUCCESS; - struct nvme_trans_io_cdb cdb_info; + int res = 0; + struct nvme_trans_io_cdb cdb_info = { 0, }; u8 opcode = cmd[0]; u64 xfer_bytes; u64 sum_iov_len = 0; @@ -2175,27 +1730,52 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write, int i; size_t not_copied; - /* Extract Fields from CDB */ + /* + * The FUA and WPROTECT fields are not supported in 6-byte CDBs, + * but always in the same place for all others. + */ + switch (opcode) { + case WRITE_6: + case READ_6: + break; + default: + cdb_info.fua = cmd[1] & 0x8; + cdb_info.prot_info = (cmd[1] & 0xe0) >> 5; + if (cdb_info.prot_info && !ns->pi_type) { + return nvme_trans_completion(hdr, + SAM_STAT_CHECK_CONDITION, + ILLEGAL_REQUEST, + SCSI_ASC_INVALID_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + } + } + switch (opcode) { case WRITE_6: case READ_6: - nvme_trans_get_io_cdb6(cmd, &cdb_info); + cdb_info.lba = get_unaligned_be24(&cmd[1]); + cdb_info.xfer_len = cmd[4]; + if (cdb_info.xfer_len == 0) + cdb_info.xfer_len = 256; break; case WRITE_10: case READ_10: - nvme_trans_get_io_cdb10(cmd, &cdb_info); + cdb_info.lba = get_unaligned_be32(&cmd[2]); + cdb_info.xfer_len = get_unaligned_be16(&cmd[7]); break; case WRITE_12: case READ_12: - nvme_trans_get_io_cdb12(cmd, &cdb_info); + cdb_info.lba = get_unaligned_be32(&cmd[2]); + cdb_info.xfer_len = get_unaligned_be32(&cmd[6]); break; case WRITE_16: case READ_16: - nvme_trans_get_io_cdb16(cmd, &cdb_info); + cdb_info.lba = get_unaligned_be64(&cmd[2]); + cdb_info.xfer_len = get_unaligned_be32(&cmd[10]); break; default: /* Will never really reach here */ - res = SNTI_INTERNAL_ERROR; + res = -EIO; goto out; } @@ -2237,7 +1817,7 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write, /* Send NVMe IO Command(s) */ res = nvme_trans_do_nvme_io(ns, hdr, &cdb_info, is_write); - if (res != SNTI_TRANSLATION_SUCCESS) + if (res) goto out; out: @@ -2247,15 +1827,15 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write, static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - int res = SNTI_TRANSLATION_SUCCESS; + int res = 0; u8 evpd; u8 page_code; int alloc_len; u8 *inq_response; - evpd = GET_INQ_EVPD_BIT(cmd); - page_code = GET_INQ_PAGE_CODE(cmd); - alloc_len = GET_INQ_ALLOC_LENGTH(cmd); + evpd = cmd[1] & 0x01; + page_code = cmd[2]; + alloc_len = get_unaligned_be16(&cmd[3]); inq_response = kmalloc(max(alloc_len, STANDARD_INQUIRY_LENGTH), GFP_KERNEL); @@ -2316,29 +1896,27 @@ static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr, static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; u16 alloc_len; - u8 sp; u8 pc; u8 page_code; - sp = GET_U8_FROM_CDB(cmd, LOG_SENSE_CDB_SP_OFFSET); - if (sp != LOG_SENSE_CDB_SP_NOT_ENABLED) { + if (cmd[1] != LOG_SENSE_CDB_SP_NOT_ENABLED) { res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); goto out; } - pc = GET_U8_FROM_CDB(cmd, LOG_SENSE_CDB_PC_OFFSET); - page_code = pc & LOG_SENSE_CDB_PAGE_CODE_MASK; - pc = (pc & LOG_SENSE_CDB_PC_MASK) >> LOG_SENSE_CDB_PC_SHIFT; + + page_code = cmd[2] & LOG_SENSE_CDB_PAGE_CODE_MASK; + pc = (cmd[2] & LOG_SENSE_CDB_PC_MASK) >> LOG_SENSE_CDB_PC_SHIFT; if (pc != LOG_SENSE_CDB_PC_CUMULATIVE_VALUES) { res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); goto out; } - alloc_len = GET_U16_FROM_CDB(cmd, LOG_SENSE_CDB_ALLOC_LENGTH_OFFSET); + alloc_len = get_unaligned_be16(&cmd[7]); switch (page_code) { case LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE: res = nvme_trans_log_supp_pages(ns, hdr, alloc_len); @@ -2363,24 +1941,18 @@ static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - int res = SNTI_TRANSLATION_SUCCESS; u8 cdb10 = 0; u16 parm_list_len; u8 page_format; u8 save_pages; - page_format = GET_U8_FROM_CDB(cmd, MODE_SELECT_CDB_PAGE_FORMAT_OFFSET); - page_format &= MODE_SELECT_CDB_PAGE_FORMAT_MASK; + page_format = cmd[1] & MODE_SELECT_CDB_PAGE_FORMAT_MASK; + save_pages = cmd[1] & MODE_SELECT_CDB_SAVE_PAGES_MASK; - save_pages = GET_U8_FROM_CDB(cmd, MODE_SELECT_CDB_SAVE_PAGES_OFFSET); - save_pages &= MODE_SELECT_CDB_SAVE_PAGES_MASK; - - if (GET_OPCODE(cmd) == MODE_SELECT) { - parm_list_len = GET_U8_FROM_CDB(cmd, - MODE_SELECT_6_CDB_PARAM_LIST_LENGTH_OFFSET); + if (cmd[0] == MODE_SELECT) { + parm_list_len = cmd[4]; } else { - parm_list_len = GET_U16_FROM_CDB(cmd, - MODE_SELECT_10_CDB_PARAM_LIST_LENGTH_OFFSET); + parm_list_len = cmd[7]; cdb10 = 1; } @@ -2389,42 +1961,36 @@ static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr, * According to SPC-4 r24, a paramter list length field of 0 * shall not be considered an error */ - res = nvme_trans_modesel_data(ns, hdr, cmd, parm_list_len, + return nvme_trans_modesel_data(ns, hdr, cmd, parm_list_len, page_format, save_pages, cdb10); } - return res; + return 0; } static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - int res = SNTI_TRANSLATION_SUCCESS; + int res = 0; u16 alloc_len; u8 cdb10 = 0; - u8 page_code; - u8 pc; - if (GET_OPCODE(cmd) == MODE_SENSE) { - alloc_len = GET_U8_FROM_CDB(cmd, MODE_SENSE6_ALLOC_LEN_OFFSET); + if (cmd[0] == MODE_SENSE) { + alloc_len = cmd[4]; } else { - alloc_len = GET_U16_FROM_CDB(cmd, - MODE_SENSE10_ALLOC_LEN_OFFSET); + alloc_len = get_unaligned_be16(&cmd[7]); cdb10 = 1; } - pc = GET_U8_FROM_CDB(cmd, MODE_SENSE_PAGE_CONTROL_OFFSET) & - MODE_SENSE_PAGE_CONTROL_MASK; - if (pc != MODE_SENSE_PC_CURRENT_VALUES) { + if ((cmd[2] & MODE_SENSE_PAGE_CONTROL_MASK) != + MODE_SENSE_PC_CURRENT_VALUES) { res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); goto out; } - page_code = GET_U8_FROM_CDB(cmd, MODE_SENSE_PAGE_CODE_OFFSET) & - MODE_SENSE_PAGE_CODE_MASK; - switch (page_code) { + switch (cmd[2] & MODE_SENSE_PAGE_CODE_MASK) { case MODE_PAGE_CACHING: res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len, cdb10, @@ -2467,47 +2033,34 @@ static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, } static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr, - u8 *cmd) + u8 *cmd, u8 cdb16) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; int nvme_sc; - u32 alloc_len = READ_CAP_10_RESP_SIZE; - u32 resp_size = READ_CAP_10_RESP_SIZE; + u32 alloc_len; + u32 resp_size; u32 xfer_len; - u8 cdb16; struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; struct nvme_id_ns *id_ns; u8 *response; - cdb16 = IS_READ_CAP_16(cmd); if (cdb16) { - alloc_len = GET_READ_CAP_16_ALLOC_LENGTH(cmd); + alloc_len = get_unaligned_be32(&cmd[10]); resp_size = READ_CAP_16_RESP_SIZE; + } else { + alloc_len = READ_CAP_10_RESP_SIZE; + resp_size = READ_CAP_10_RESP_SIZE; } - mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out; - } - /* nvme ns identify */ - nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr); + nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_dma; - if (nvme_sc) { - res = nvme_sc; - goto out_dma; - } - id_ns = mem; + return res; response = kzalloc(resp_size, GFP_KERNEL); if (response == NULL) { res = -ENOMEM; - goto out_dma; + goto out_free_id; } nvme_trans_fill_read_cap(response, id_ns, cdb16); @@ -2515,72 +2068,53 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_trans_copy_to_user(hdr, response, xfer_len); kfree(response); - out_dma: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem, - dma_addr); - out: + out_free_id: + kfree(id_ns); return res; } static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; int nvme_sc; u32 alloc_len, xfer_len, resp_size; - u8 select_report; u8 *response; struct nvme_dev *dev = ns->dev; - dma_addr_t dma_addr; - void *mem; struct nvme_id_ctrl *id_ctrl; u32 ll_length, lun_id; u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET; __be32 tmp_len; - alloc_len = GET_REPORT_LUNS_ALLOC_LENGTH(cmd); - select_report = GET_U8_FROM_CDB(cmd, REPORT_LUNS_SR_OFFSET); - - if ((select_report != ALL_LUNS_RETURNED) && - (select_report != ALL_WELL_KNOWN_LUNS_RETURNED) && - (select_report != RESTRICTED_LUNS_RETURNED)) { - res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + switch (cmd[2]) { + default: + return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); - goto out; - } else { - /* NVMe Controller Identify */ - mem = dma_alloc_coherent(&dev->pci_dev->dev, - sizeof(struct nvme_id_ctrl), - &dma_addr, GFP_KERNEL); - if (mem == NULL) { - res = -ENOMEM; - goto out; - } - nvme_sc = nvme_identify(dev, 0, 1, dma_addr); + case ALL_LUNS_RETURNED: + case ALL_WELL_KNOWN_LUNS_RETURNED: + case RESTRICTED_LUNS_RETURNED: + nvme_sc = nvme_identify_ctrl(dev, &id_ctrl); res = nvme_trans_status_code(hdr, nvme_sc); if (res) - goto out_dma; - if (nvme_sc) { - res = nvme_sc; - goto out_dma; - } - id_ctrl = mem; + return res; + ll_length = le32_to_cpu(id_ctrl->nn) * LUN_ENTRY_SIZE; resp_size = ll_length + LUN_DATA_HEADER_SIZE; + alloc_len = get_unaligned_be32(&cmd[6]); if (alloc_len < resp_size) { res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); - goto out_dma; + goto out_free_id; } response = kzalloc(resp_size, GFP_KERNEL); if (response == NULL) { res = -ENOMEM; - goto out_dma; + goto out_free_id; } /* The first LUN ID will always be 0 per the SAM spec */ @@ -2601,24 +2135,21 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, res = nvme_trans_copy_to_user(hdr, response, xfer_len); kfree(response); - out_dma: - dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ctrl), mem, - dma_addr); - out: + out_free_id: + kfree(id_ctrl); return res; } static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; u8 alloc_len, xfer_len, resp_size; u8 desc_format; u8 *response; - alloc_len = GET_REQUEST_SENSE_ALLOC_LENGTH(cmd); - desc_format = GET_U8_FROM_CDB(cmd, REQUEST_SENSE_DESC_OFFSET); - desc_format &= REQUEST_SENSE_DESC_MASK; + desc_format = cmd[1] & 0x01; + alloc_len = cmd[4]; resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) : (FIXED_FMT_SENSE_DATA_SIZE)); @@ -2628,7 +2159,7 @@ static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, goto out; } - if (desc_format == DESCRIPTOR_FORMAT_SENSE_DATA_TYPE) { + if (desc_format) { /* Descriptor Format Sense Data */ response[0] = DESC_FORMAT_SENSE_DATA; response[1] = NO_SENSE; @@ -2667,95 +2198,58 @@ static int nvme_trans_security_protocol(struct nvme_ns *ns, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); } -static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, - u8 *cmd) +static int nvme_trans_synchronize_cache(struct nvme_ns *ns, + struct sg_io_hdr *hdr) { - int res = SNTI_TRANSLATION_SUCCESS; int nvme_sc; struct nvme_command c; - u8 immed, pcmod, pc, no_flush, start; - immed = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_IMMED_OFFSET); - pcmod = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_POWER_COND_MOD_OFFSET); - pc = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_POWER_COND_OFFSET); - no_flush = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_NO_FLUSH_OFFSET); - start = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_START_OFFSET); + memset(&c, 0, sizeof(c)); + c.common.opcode = nvme_cmd_flush; + c.common.nsid = cpu_to_le32(ns->ns_id); - immed &= START_STOP_UNIT_CDB_IMMED_MASK; - pcmod &= START_STOP_UNIT_CDB_POWER_COND_MOD_MASK; - pc = (pc & START_STOP_UNIT_CDB_POWER_COND_MASK) >> NIBBLE_SHIFT; - no_flush &= START_STOP_UNIT_CDB_NO_FLUSH_MASK; - start &= START_STOP_UNIT_CDB_START_MASK; + nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0); + return nvme_trans_status_code(hdr, nvme_sc); +} + +static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, + u8 *cmd) +{ + u8 immed, pcmod, pc, no_flush, start; + + immed = cmd[1] & 0x01; + pcmod = cmd[3] & 0x0f; + pc = (cmd[4] & 0xf0) >> 4; + no_flush = cmd[4] & 0x04; + start = cmd[4] & 0x01; if (immed != 0) { - res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); } else { if (no_flush == 0) { /* Issue NVME FLUSH command prior to START STOP UNIT */ - memset(&c, 0, sizeof(c)); - c.common.opcode = nvme_cmd_flush; - c.common.nsid = cpu_to_le32(ns->ns_id); - - nvme_sc = nvme_submit_io_cmd(ns->dev, ns, &c, NULL); - res = nvme_trans_status_code(hdr, nvme_sc); + int res = nvme_trans_synchronize_cache(ns, hdr); if (res) - goto out; - if (nvme_sc) { - res = nvme_sc; - goto out; - } + return res; } /* Setup the expected power state transition */ - res = nvme_trans_power_state(ns, hdr, pc, pcmod, start); + return nvme_trans_power_state(ns, hdr, pc, pcmod, start); } - - out: - return res; -} - -static int nvme_trans_synchronize_cache(struct nvme_ns *ns, - struct sg_io_hdr *hdr, u8 *cmd) -{ - int res = SNTI_TRANSLATION_SUCCESS; - int nvme_sc; - struct nvme_command c; - - memset(&c, 0, sizeof(c)); - c.common.opcode = nvme_cmd_flush; - c.common.nsid = cpu_to_le32(ns->ns_id); - - nvme_sc = nvme_submit_io_cmd(ns->dev, ns, &c, NULL); - - res = nvme_trans_status_code(hdr, nvme_sc); - if (res) - goto out; - if (nvme_sc) - res = nvme_sc; - - out: - return res; } static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - int res = SNTI_TRANSLATION_SUCCESS; + int res; u8 parm_hdr_len = 0; u8 nvme_pf_code = 0; u8 format_prot_info, long_list, format_data; - format_prot_info = GET_U8_FROM_CDB(cmd, - FORMAT_UNIT_CDB_FORMAT_PROT_INFO_OFFSET); - long_list = GET_U8_FROM_CDB(cmd, FORMAT_UNIT_CDB_LONG_LIST_OFFSET); - format_data = GET_U8_FROM_CDB(cmd, FORMAT_UNIT_CDB_FORMAT_DATA_OFFSET); - - format_prot_info = (format_prot_info & - FORMAT_UNIT_CDB_FORMAT_PROT_INFO_MASK) >> - FORMAT_UNIT_CDB_FORMAT_PROT_INFO_SHIFT; - long_list &= FORMAT_UNIT_CDB_LONG_LIST_MASK; - format_data &= FORMAT_UNIT_CDB_FORMAT_DATA_MASK; + format_prot_info = (cmd[1] & 0xc0) >> 6; + long_list = cmd[1] & 0x20; + format_data = cmd[1] & 0x10; if (format_data != 0) { if (format_prot_info != 0) { @@ -2779,16 +2273,16 @@ static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr, if (parm_hdr_len > 0) { res = nvme_trans_fmt_get_parm_header(hdr, parm_hdr_len, format_prot_info, &nvme_pf_code); - if (res != SNTI_TRANSLATION_SUCCESS) + if (res) goto out; } /* Attempt to activate any previously downloaded firmware image */ - res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw, 0, 0, 0); + res = nvme_trans_send_activate_fw_cmd(ns, hdr, 0); /* Determine Block size and count and send format command */ res = nvme_trans_fmt_set_blk_size_count(ns, hdr); - if (res != SNTI_TRANSLATION_SUCCESS) + if (res) goto out; res = nvme_trans_fmt_send_cmd(ns, hdr, nvme_pf_code); @@ -2801,28 +2295,24 @@ static int nvme_trans_test_unit_ready(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - int res = SNTI_TRANSLATION_SUCCESS; struct nvme_dev *dev = ns->dev; if (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) - res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, + return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, NOT_READY, SCSI_ASC_LUN_NOT_READY, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); else - res = nvme_trans_completion(hdr, SAM_STAT_GOOD, NO_SENSE, 0, 0); - - return res; + return nvme_trans_completion(hdr, SAM_STAT_GOOD, NO_SENSE, 0, 0); } static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - int res = SNTI_TRANSLATION_SUCCESS; + int res = 0; u32 buffer_offset, parm_list_length; u8 buffer_id, mode; - parm_list_length = - GET_U24_FROM_CDB(cmd, WRITE_BUFFER_CDB_PARM_LIST_LENGTH_OFFSET); + parm_list_length = get_unaligned_be24(&cmd[6]); if (parm_list_length % BYTES_TO_DWORDS != 0) { /* NVMe expects Firmware file to be a whole number of DWORDS */ res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, @@ -2830,38 +2320,32 @@ static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); goto out; } - buffer_id = GET_U8_FROM_CDB(cmd, WRITE_BUFFER_CDB_BUFFER_ID_OFFSET); + buffer_id = cmd[2]; if (buffer_id > NVME_MAX_FIRMWARE_SLOT) { res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, SCSI_ASCQ_CAUSE_NOT_REPORTABLE); goto out; } - mode = GET_U8_FROM_CDB(cmd, WRITE_BUFFER_CDB_MODE_OFFSET) & - WRITE_BUFFER_CDB_MODE_MASK; - buffer_offset = - GET_U24_FROM_CDB(cmd, WRITE_BUFFER_CDB_BUFFER_OFFSET_OFFSET); + mode = cmd[1] & 0x1f; + buffer_offset = get_unaligned_be24(&cmd[3]); switch (mode) { case DOWNLOAD_SAVE_ACTIVATE: - res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_download_fw, + res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw, parm_list_length, buffer_offset, buffer_id); - if (res != SNTI_TRANSLATION_SUCCESS) + if (res) goto out; - res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw, - parm_list_length, buffer_offset, - buffer_id); + res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id); break; case DOWNLOAD_SAVE_DEFER_ACTIVATE: - res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_download_fw, + res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw, parm_list_length, buffer_offset, buffer_id); break; case ACTIVATE_DEFERRED_MICROCODE: - res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw, - parm_list_length, buffer_offset, - buffer_id); + res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id); break; default: res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, @@ -2890,15 +2374,13 @@ struct scsi_unmap_parm_list { static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - struct nvme_dev *dev = ns->dev; struct scsi_unmap_parm_list *plist; struct nvme_dsm_range *range; struct nvme_command c; - int i, nvme_sc, res = -ENOMEM; + int i, nvme_sc, res; u16 ndesc, list_len; - dma_addr_t dma_addr; - list_len = GET_U16_FROM_CDB(cmd, UNMAP_CDB_PARAM_LIST_LENGTH_OFFSET); + list_len = get_unaligned_be16(&cmd[7]); if (!list_len) return -EINVAL; @@ -2907,7 +2389,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, return -ENOMEM; res = nvme_trans_copy_from_user(hdr, plist, list_len); - if (res != SNTI_TRANSLATION_SUCCESS) + if (res) goto out; ndesc = be16_to_cpu(plist->unmap_blk_desc_data_len) >> 4; @@ -2916,10 +2398,11 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, goto out; } - range = dma_alloc_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range), - &dma_addr, GFP_KERNEL); - if (!range) + range = kcalloc(ndesc, sizeof(*range), GFP_KERNEL); + if (!range) { + res = -ENOMEM; goto out; + } for (i = 0; i < ndesc; i++) { range[i].nlb = cpu_to_le32(be32_to_cpu(plist->desc[i].nlb)); @@ -2930,15 +2413,14 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, memset(&c, 0, sizeof(c)); c.dsm.opcode = nvme_cmd_dsm; c.dsm.nsid = cpu_to_le32(ns->ns_id); - c.dsm.prp1 = cpu_to_le64(dma_addr); c.dsm.nr = cpu_to_le32(ndesc - 1); c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); - nvme_sc = nvme_submit_io_cmd(dev, ns, &c, NULL); + nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, range, + ndesc * sizeof(*range)); res = nvme_trans_status_code(hdr, nvme_sc); - dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range), - range, dma_addr); + kfree(range); out: kfree(plist); return res; @@ -2993,13 +2475,16 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr) retcode = nvme_trans_mode_sense(ns, hdr, cmd); break; case READ_CAPACITY: - retcode = nvme_trans_read_capacity(ns, hdr, cmd); + retcode = nvme_trans_read_capacity(ns, hdr, cmd, 0); break; case SERVICE_ACTION_IN_16: - if (IS_READ_CAP_16(cmd)) - retcode = nvme_trans_read_capacity(ns, hdr, cmd); - else + switch (cmd[1]) { + case SAI_READ_CAPACITY_16: + retcode = nvme_trans_read_capacity(ns, hdr, cmd, 1); + break; + default: goto out; + } break; case REPORT_LUNS: retcode = nvme_trans_report_luns(ns, hdr, cmd); @@ -3015,7 +2500,7 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr) retcode = nvme_trans_start_stop(ns, hdr, cmd); break; case SYNCHRONIZE_CACHE: - retcode = nvme_trans_synchronize_cache(ns, hdr, cmd); + retcode = nvme_trans_synchronize_cache(ns, hdr); break; case FORMAT_UNIT: retcode = nvme_trans_format_unit(ns, hdr, cmd); @@ -3053,15 +2538,16 @@ int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr) if (hdr.cmd_len > BLK_MAX_CDB) return -EINVAL; + /* + * A positive return code means a NVMe status, which has been + * translated to sense data. + */ retcode = nvme_scsi_translate(ns, &hdr); if (retcode < 0) return retcode; - if (retcode > 0) - retcode = SNTI_TRANSLATION_SUCCESS; if (copy_to_user(u_hdr, &hdr, sizeof(sg_io_hdr_t)) > 0) return -EFAULT; - - return retcode; + return 0; } int nvme_sg_get_version_num(int __user *ip) diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index ef45cfb98fd2..b1612eb16172 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -1,5 +1,5 @@ /* - * ps3vram - Use extra PS3 video ram as MTD block device. + * ps3vram - Use extra PS3 video ram as block device. * * Copyright 2009 Sony Corporation * @@ -73,8 +73,8 @@ struct ps3vram_priv { u64 memory_handle; u64 context_handle; - u32 *ctrl; - void *reports; + u32 __iomem *ctrl; + void __iomem *reports; u8 *xdr_buf; u32 *fifo_base; @@ -104,7 +104,7 @@ static char *size = "256M"; module_param(size, charp, 0); MODULE_PARM_DESC(size, "memory size"); -static u32 *ps3vram_get_notifier(void *reports, int notifier) +static u32 __iomem *ps3vram_get_notifier(void __iomem *reports, int notifier) { return reports + DMA_NOTIFIER_OFFSET_BASE + DMA_NOTIFIER_SIZE * notifier; @@ -113,22 +113,22 @@ static u32 *ps3vram_get_notifier(void *reports, int notifier) static void ps3vram_notifier_reset(struct ps3_system_bus_device *dev) { struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); - u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER); + u32 __iomem *notify = ps3vram_get_notifier(priv->reports, NOTIFIER); int i; for (i = 0; i < 4; i++) - notify[i] = 0xffffffff; + iowrite32be(0xffffffff, notify + i); } static int ps3vram_notifier_wait(struct ps3_system_bus_device *dev, unsigned int timeout_ms) { struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); - u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER); + u32 __iomem *notify = ps3vram_get_notifier(priv->reports, NOTIFIER); unsigned long timeout; for (timeout = 20; timeout; timeout--) { - if (!notify[3]) + if (!ioread32be(notify + 3)) return 0; udelay(10); } @@ -136,7 +136,7 @@ static int ps3vram_notifier_wait(struct ps3_system_bus_device *dev, timeout = jiffies + msecs_to_jiffies(timeout_ms); do { - if (!notify[3]) + if (!ioread32be(notify + 3)) return 0; msleep(1); } while (time_before(jiffies, timeout)); @@ -148,8 +148,8 @@ static void ps3vram_init_ring(struct ps3_system_bus_device *dev) { struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); - priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET; - priv->ctrl[CTRL_GET] = FIFO_BASE + FIFO_OFFSET; + iowrite32be(FIFO_BASE + FIFO_OFFSET, priv->ctrl + CTRL_PUT); + iowrite32be(FIFO_BASE + FIFO_OFFSET, priv->ctrl + CTRL_GET); } static int ps3vram_wait_ring(struct ps3_system_bus_device *dev, @@ -159,14 +159,14 @@ static int ps3vram_wait_ring(struct ps3_system_bus_device *dev, unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms); do { - if (priv->ctrl[CTRL_PUT] == priv->ctrl[CTRL_GET]) + if (ioread32be(priv->ctrl + CTRL_PUT) == ioread32be(priv->ctrl + CTRL_GET)) return 0; msleep(1); } while (time_before(jiffies, timeout)); dev_warn(&dev->core, "FIFO timeout (%08x/%08x/%08x)\n", - priv->ctrl[CTRL_PUT], priv->ctrl[CTRL_GET], - priv->ctrl[CTRL_TOP]); + ioread32be(priv->ctrl + CTRL_PUT), ioread32be(priv->ctrl + CTRL_GET), + ioread32be(priv->ctrl + CTRL_TOP)); return -ETIMEDOUT; } @@ -189,7 +189,7 @@ static void ps3vram_rewind_ring(struct ps3_system_bus_device *dev) ps3vram_out_ring(priv, 0x20000000 | (FIFO_BASE + FIFO_OFFSET)); - priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET; + iowrite32be(FIFO_BASE + FIFO_OFFSET, priv->ctrl + CTRL_PUT); /* asking the HV for a blit will kick the FIFO */ status = lv1_gpu_fb_blit(priv->context_handle, 0, 0, 0, 0); @@ -207,8 +207,8 @@ static void ps3vram_fire_ring(struct ps3_system_bus_device *dev) mutex_lock(&ps3_gpu_mutex); - priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET + - (priv->fifo_ptr - priv->fifo_base) * sizeof(u32); + iowrite32be(FIFO_BASE + FIFO_OFFSET + (priv->fifo_ptr - priv->fifo_base) + * sizeof(u32), priv->ctrl + CTRL_PUT); /* asking the HV for a blit will kick the FIFO */ status = lv1_gpu_fb_blit(priv->context_handle, 0, 0, 0, 0); diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 90f39f79f5d7..ef1d9fb06cab 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -99,9 +99,8 @@ void dasd_gendisk_free(struct dasd_block *block) int dasd_scan_partitions(struct dasd_block *block) { struct block_device *bdev; - int retry, rc; + int rc; - retry = 5; bdev = bdget_disk(block->gdp, 0); if (!bdev) { DBF_DEV_EVENT(DBF_ERR, block->base, "%s", @@ -116,19 +115,11 @@ int dasd_scan_partitions(struct dasd_block *block) rc); return -ENODEV; } - /* - * See fs/partition/check.c:register_disk,rescan_partitions - * Can't call rescan_partitions directly. Use ioctl. - */ - rc = ioctl_by_bdev(bdev, BLKRRPART, 0); - while (rc == -EBUSY && retry > 0) { - schedule(); - rc = ioctl_by_bdev(bdev, BLKRRPART, 0); - retry--; + + rc = blkdev_reread_part(bdev); + if (rc) DBF_DEV_EVENT(DBF_ERR, block->base, - "scan partitions error, retry %d rc %d", - retry, rc); - } + "scan partitions error, rc %d", rc); /* * Since the matching blkdev_put call to the blkdev_get in diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 8dbd05e70f09..c0d94ed8ce9a 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -74,7 +74,7 @@ struct nvme_dev { struct blk_mq_tag_set tagset; struct blk_mq_tag_set admin_tagset; u32 __iomem *dbs; - struct pci_dev *pci_dev; + struct device *dev; struct dma_pool *prp_page_pool; struct dma_pool *prp_small_pool; int instance; @@ -92,6 +92,7 @@ struct nvme_dev { work_func_t reset_workfn; struct work_struct reset_work; struct work_struct probe_work; + struct work_struct scan_work; char name[12]; char serial[20]; char model[40]; @@ -146,25 +147,15 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector) return (sector >> (ns->lba_shift - 9)); } -/** - * nvme_free_iod - frees an nvme_iod - * @dev: The device that the I/O was submitted to - * @iod: The memory to free - */ -void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod); - -int nvme_setup_prps(struct nvme_dev *, struct nvme_iod *, int, gfp_t); -struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, - unsigned long addr, unsigned length); -void nvme_unmap_user_pages(struct nvme_dev *dev, int write, - struct nvme_iod *iod); -int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_ns *, - struct nvme_command *, u32 *); -int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns); -int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *, - u32 *result); -int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns, - dma_addr_t dma_addr); +int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, + void *buf, unsigned bufflen); +int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, + void *buffer, void __user *ubuffer, unsigned bufflen, + u32 *result, unsigned timeout); +int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id); +int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid, + struct nvme_id_ns **id); +int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log); int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, dma_addr_t dma_addr, u32 *result); int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h index aef9a81b2d75..732b32e92b02 100644 --- a/include/uapi/linux/nvme.h +++ b/include/uapi/linux/nvme.h @@ -179,6 +179,10 @@ enum { NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4, }; +enum { + NVME_AER_NOTICE_NS_CHANGED = 0x0002, +}; + struct nvme_lba_range_type { __u8 type; __u8 attributes; @@ -579,5 +583,6 @@ struct nvme_passthru_cmd { #define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd) #define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io) #define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd) +#define NVME_IOCTL_RESET _IO('N', 0x44) #endif /* _UAPI_LINUX_NVME_H */ |