diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-19 01:13:31 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-19 01:13:31 +0100 |
commit | 237045fc3c67d44088f767dca5a9fa30815eba62 (patch) | |
tree | 50773de6320e4b3358e3935c2f3c6f93d0dc93f3 | |
parent | Merge branch 'for-4.6/core' of git://git.kernel.dk/linux-block (diff) | |
parent | NVMe: Expose ns wwid through single sysfs entry (diff) | |
download | linux-237045fc3c67d44088f767dca5a9fa30815eba62.tar.xz linux-237045fc3c67d44088f767dca5a9fa30815eba62.zip |
Merge branch 'for-4.6/drivers' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe:
"This is the block driver pull request for this merge window. It sits
on top of for-4.6/core, that was just sent out.
This contains:
- A set of fixes for lightnvm. One from Alan, fixing an overflow,
and the rest from the usual suspects, Javier and Matias.
- A set of fixes for nbd from Markus and Dan, and a fixup from Arnd
for correct usage of the signed 64-bit divider.
- A set of bug fixes for the Micron mtip32xx, from Asai.
- A fix for the brd discard handling from Bart.
- Update the maintainers entry for cciss, since that hardware has
transferred ownership.
- Three bug fixes for bcache from Eric Wheeler.
- Set of fixes for xen-blk{back,front} from Jan and Konrad.
- Removal of the cpqarray driver. It has been disabled in Kconfig
since 2013, and we were initially scheduled to remove it in 3.15.
- Various updates and fixes for NVMe, with the most important being:
- Removal of the per-device NVMe thread, replacing that with a
watchdog timer instead. From Christoph.
- Exposing the namespace WWID through sysfs, from Keith.
- Set of cleanups from Ming Lin.
- Logging the controller device name instead of the underlying
PCI device name, from Sagi.
- And a bunch of fixes and optimizations from the usual suspects
in this area"
* 'for-4.6/drivers' of git://git.kernel.dk/linux-block: (49 commits)
NVMe: Expose ns wwid through single sysfs entry
drivers:block: cpqarray clean up
brd: Fix discard request processing
cpqarray: remove it from the kernel
cciss: update MAINTAINERS
NVMe: Remove unused sq_head read in completion path
bcache: fix cache_set_flush() NULL pointer dereference on OOM
bcache: cleaned up error handling around register_cache()
bcache: fix race of writeback thread starting before complete initialization
NVMe: Create discard zero quirk white list
nbd: use correct div_s64 helper
mtip32xx: remove unneeded variable in mtip_cmd_timeout()
lightnvm: generalize rrpc ppa calculations
lightnvm: remove struct nvm_dev->total_blocks
lightnvm: rename ->nr_pages to ->nr_sects
lightnvm: update closed list outside of intr context
xen/blback: Fit the important information of the thread in 17 characters
lightnvm: fold get bb tbl when using dual/quad plane mode
lightnvm: fix up nonsensical configure overrun checking
xen-blkback: advertise indirect segment support earlier
...
-rw-r--r-- | Documentation/blockdev/cpqarray.txt | 93 | ||||
-rw-r--r-- | MAINTAINERS | 10 | ||||
-rw-r--r-- | drivers/block/Kconfig | 10 | ||||
-rw-r--r-- | drivers/block/Makefile | 1 | ||||
-rw-r--r-- | drivers/block/brd.c | 2 | ||||
-rw-r--r-- | drivers/block/cpqarray.c | 1820 | ||||
-rw-r--r-- | drivers/block/cpqarray.h | 126 | ||||
-rw-r--r-- | drivers/block/ida_cmd.h | 349 | ||||
-rw-r--r-- | drivers/block/ida_ioctl.h | 87 | ||||
-rw-r--r-- | drivers/block/mtip32xx/mtip32xx.c | 266 | ||||
-rw-r--r-- | drivers/block/mtip32xx/mtip32xx.h | 11 | ||||
-rw-r--r-- | drivers/block/nbd.c | 334 | ||||
-rw-r--r-- | drivers/block/xen-blkback/xenbus.c | 20 | ||||
-rw-r--r-- | drivers/block/xen-blkfront.c | 6 | ||||
-rw-r--r-- | drivers/lightnvm/core.c | 19 | ||||
-rw-r--r-- | drivers/lightnvm/gennvm.c | 7 | ||||
-rw-r--r-- | drivers/lightnvm/rrpc.c | 98 | ||||
-rw-r--r-- | drivers/lightnvm/rrpc.h | 15 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 46 | ||||
-rw-r--r-- | drivers/nvme/host/Kconfig | 6 | ||||
-rw-r--r-- | drivers/nvme/host/Makefile | 10 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 166 | ||||
-rw-r--r-- | drivers/nvme/host/lightnvm.c | 46 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 14 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 235 | ||||
-rw-r--r-- | include/linux/lightnvm.h | 8 |
26 files changed, 794 insertions, 3011 deletions
diff --git a/Documentation/blockdev/cpqarray.txt b/Documentation/blockdev/cpqarray.txt deleted file mode 100644 index c7154e20ef5e..000000000000 --- a/Documentation/blockdev/cpqarray.txt +++ /dev/null @@ -1,93 +0,0 @@ -This driver is for Compaq's SMART2 Intelligent Disk Array Controllers. - -Supported Cards: ----------------- - -This driver is known to work with the following cards: - - * SMART (EISA) - * SMART-2/E (EISA) - * SMART-2/P - * SMART-2DH - * SMART-2SL - * SMART-221 - * SMART-3100ES - * SMART-3200 - * Integrated Smart Array Controller - * SA 4200 - * SA 4250ES - * SA 431 - * RAID LC2 Controller - -It should also work with some really old Disk array adapters, but I am -unable to test against these cards: - - * IDA - * IDA-2 - * IAES - - -EISA Controllers: ------------------ - -If you want to use an EISA controller you'll have to supply some -modprobe/lilo parameters. If the driver is compiled into the kernel, must -give it the controller's IO port address at boot time (it is not -necessary to specify the IRQ). For example, if you had two SMART-2/E -controllers, in EISA slots 1 and 2 you'd give it a boot argument like -this: - - smart2=0x1000,0x2000 - -If you were loading the driver as a module, you'd give load it like this: - - modprobe cpqarray eisa=0x1000,0x2000 - -You can use EISA and PCI adapters at the same time. - - -Device Naming: --------------- - -You need some entries in /dev for the ida device. MAKEDEV in the /dev -directory can make device nodes for you automatically. The device setup is -as follows: - -Major numbers: - 72 ida0 - 73 ida1 - 74 ida2 - 75 ida3 - 76 ida4 - 77 ida5 - 78 ida6 - 79 ida7 - -Minor numbers: - b7 b6 b5 b4 b3 b2 b1 b0 - |----+----| |----+----| - | | - | +-------- Partition ID (0=wholedev, 1-15 partition) - | - +-------------------- Logical Volume number - -The device naming scheme is: -/dev/ida/c0d0 Controller 0, disk 0, whole device -/dev/ida/c0d0p1 Controller 0, disk 0, partition 1 -/dev/ida/c0d0p2 Controller 0, disk 0, partition 2 -/dev/ida/c0d0p3 Controller 0, disk 0, partition 3 - -/dev/ida/c1d1 Controller 1, disk 1, whole device -/dev/ida/c1d1p1 Controller 1, disk 1, partition 1 -/dev/ida/c1d1p2 Controller 1, disk 1, partition 2 -/dev/ida/c1d1p3 Controller 1, disk 1, partition 3 - - -Changelog: -========== - -10-28-2004 : General cleanup, syntax fixes for in-kernel driver version. - James Nelson <james4765@gmail.com> - - -1999 : Original Document diff --git a/MAINTAINERS b/MAINTAINERS index 15b4c417211f..1c6d7781812e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5016,12 +5016,6 @@ T: git git://linuxtv.org/anttip/media_tree.git S: Maintained F: drivers/media/dvb-frontends/hd29l2* -HEWLETT-PACKARD SMART2 RAID DRIVER -L: iss_storagedev@hp.com -S: Orphan -F: Documentation/blockdev/cpqarray.txt -F: drivers/block/cpqarray.* - HEWLETT-PACKARD SMART ARRAY RAID DRIVER (hpsa) M: Don Brace <don.brace@microsemi.com> L: iss_storagedev@hp.com @@ -5034,9 +5028,9 @@ F: include/linux/cciss*.h F: include/uapi/linux/cciss*.h HEWLETT-PACKARD SMART CISS RAID DRIVER (cciss) -M: Don Brace <don.brace@pmcs.com> +M: Don Brace <don.brace@microsemi.com> L: iss_storagedev@hp.com -L: storagedev@pmcs.com +L: esc.storagedev@microsemi.com L: linux-scsi@vger.kernel.org S: Supported F: Documentation/blockdev/cciss.txt diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 29819e719afa..39dd30b6ef86 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -110,16 +110,6 @@ source "drivers/block/mtip32xx/Kconfig" source "drivers/block/zram/Kconfig" -config BLK_CPQ_DA - tristate "Compaq SMART2 support" - depends on PCI && VIRT_TO_BUS && 0 - help - This is the driver for Compaq Smart Array controllers. Everyone - using these boards should say Y here. See the file - <file:Documentation/blockdev/cpqarray.txt> for the current list of - boards supported by this driver, and for further information on the - use of this driver. - config BLK_CPQ_CISS_DA tristate "Compaq Smart Array 5xxx support" depends on PCI diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 671329023ec2..1e9661e26f29 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -15,7 +15,6 @@ obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o obj-$(CONFIG_BLK_DEV_RAM) += brd.o obj-$(CONFIG_BLK_DEV_LOOP) += loop.o -obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o obj-$(CONFIG_XILINX_SYSACE) += xsysace.o diff --git a/drivers/block/brd.c b/drivers/block/brd.c index cb27190e9f39..f7ecc287d733 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -341,7 +341,7 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio) if (unlikely(bio->bi_rw & REQ_DISCARD)) { if (sector & ((PAGE_SIZE >> SECTOR_SHIFT) - 1) || - bio->bi_iter.bi_size & PAGE_MASK) + bio->bi_iter.bi_size & ~PAGE_MASK) goto io_error; discard_from_brd(brd, sector, bio->bi_iter.bi_size); goto out; diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c deleted file mode 100644 index f749df9e15cd..000000000000 --- a/drivers/block/cpqarray.c +++ /dev/null @@ -1,1820 +0,0 @@ -/* - * Disk Array driver for Compaq SMART2 Controllers - * Copyright 1998 Compaq Computer Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Questions/Comments/Bugfixes to iss_storagedev@hp.com - * - */ -#include <linux/module.h> -#include <linux/types.h> -#include <linux/pci.h> -#include <linux/bio.h> -#include <linux/interrupt.h> -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/delay.h> -#include <linux/major.h> -#include <linux/fs.h> -#include <linux/blkpg.h> -#include <linux/timer.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/init.h> -#include <linux/hdreg.h> -#include <linux/mutex.h> -#include <linux/spinlock.h> -#include <linux/blkdev.h> -#include <linux/genhd.h> -#include <linux/scatterlist.h> -#include <asm/uaccess.h> -#include <asm/io.h> - - -#define SMART2_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin)) - -#define DRIVER_NAME "Compaq SMART2 Driver (v 2.6.0)" -#define DRIVER_VERSION SMART2_DRIVER_VERSION(2,6,0) - -/* Embedded module documentation macros - see modules.h */ -/* Original author Chris Frantz - Compaq Computer Corporation */ -MODULE_AUTHOR("Compaq Computer Corporation"); -MODULE_DESCRIPTION("Driver for Compaq Smart2 Array Controllers version 2.6.0"); -MODULE_LICENSE("GPL"); - -#include "cpqarray.h" -#include "ida_cmd.h" -#include "smart1,2.h" -#include "ida_ioctl.h" - -#define READ_AHEAD 128 -#define NR_CMDS 128 /* This could probably go as high as ~400 */ - -#define MAX_CTLR 8 -#define CTLR_SHIFT 8 - -#define CPQARRAY_DMA_MASK 0xFFFFFFFF /* 32 bit DMA */ - -static DEFINE_MUTEX(cpqarray_mutex); -static int nr_ctlr; -static ctlr_info_t *hba[MAX_CTLR]; - -static int eisa[8]; - -#define NR_PRODUCTS ARRAY_SIZE(products) - -/* board_id = Subsystem Device ID & Vendor ID - * product = Marketing Name for the board - * access = Address of the struct of function pointers - */ -static struct board_type products[] = { - { 0x0040110E, "IDA", &smart1_access }, - { 0x0140110E, "IDA-2", &smart1_access }, - { 0x1040110E, "IAES", &smart1_access }, - { 0x2040110E, "SMART", &smart1_access }, - { 0x3040110E, "SMART-2/E", &smart2e_access }, - { 0x40300E11, "SMART-2/P", &smart2_access }, - { 0x40310E11, "SMART-2SL", &smart2_access }, - { 0x40320E11, "Smart Array 3200", &smart2_access }, - { 0x40330E11, "Smart Array 3100ES", &smart2_access }, - { 0x40340E11, "Smart Array 221", &smart2_access }, - { 0x40400E11, "Integrated Array", &smart4_access }, - { 0x40480E11, "Compaq Raid LC2", &smart4_access }, - { 0x40500E11, "Smart Array 4200", &smart4_access }, - { 0x40510E11, "Smart Array 4250ES", &smart4_access }, - { 0x40580E11, "Smart Array 431", &smart4_access }, -}; - -/* define the PCI info for the PCI cards this driver can control */ -static const struct pci_device_id cpqarray_pci_device_id[] = -{ - { PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_COMPAQ_42XX, - 0x0E11, 0x4058, 0, 0, 0}, /* SA431 */ - { PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_COMPAQ_42XX, - 0x0E11, 0x4051, 0, 0, 0}, /* SA4250ES */ - { PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_COMPAQ_42XX, - 0x0E11, 0x4050, 0, 0, 0}, /* SA4200 */ - { PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C1510, - 0x0E11, 0x4048, 0, 0, 0}, /* LC2 */ - { PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C1510, - 0x0E11, 0x4040, 0, 0, 0}, /* Integrated Array */ - { PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_SMART2P, - 0x0E11, 0x4034, 0, 0, 0}, /* SA 221 */ - { PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_SMART2P, - 0x0E11, 0x4033, 0, 0, 0}, /* SA 3100ES*/ - { PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_SMART2P, - 0x0E11, 0x4032, 0, 0, 0}, /* SA 3200*/ - { PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_SMART2P, - 0x0E11, 0x4031, 0, 0, 0}, /* SA 2SL*/ - { PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_SMART2P, - 0x0E11, 0x4030, 0, 0, 0}, /* SA 2P */ - { 0 } -}; - -MODULE_DEVICE_TABLE(pci, cpqarray_pci_device_id); - -static struct gendisk *ida_gendisk[MAX_CTLR][NWD]; - -/* Debug... */ -#define DBG(s) do { s } while(0) -/* Debug (general info)... */ -#define DBGINFO(s) do { } while(0) -/* Debug Paranoid... */ -#define DBGP(s) do { } while(0) -/* Debug Extra Paranoid... */ -#define DBGPX(s) do { } while(0) - -static int cpqarray_pci_init(ctlr_info_t *c, struct pci_dev *pdev); -static void __iomem *remap_pci_mem(ulong base, ulong size); -static int cpqarray_eisa_detect(void); -static int pollcomplete(int ctlr); -static void getgeometry(int ctlr); -static void start_fwbk(int ctlr); - -static cmdlist_t * cmd_alloc(ctlr_info_t *h, int get_from_pool); -static void cmd_free(ctlr_info_t *h, cmdlist_t *c, int got_from_pool); - -static void free_hba(int i); -static int alloc_cpqarray_hba(void); - -static int sendcmd( - __u8 cmd, - int ctlr, - void *buff, - size_t size, - unsigned int blk, - unsigned int blkcnt, - unsigned int log_unit ); - -static int ida_unlocked_open(struct block_device *bdev, fmode_t mode); -static void ida_release(struct gendisk *disk, fmode_t mode); -static int ida_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg); -static int ida_getgeo(struct block_device *bdev, struct hd_geometry *geo); -static int ida_ctlr_ioctl(ctlr_info_t *h, int dsk, ida_ioctl_t *io); - -static void do_ida_request(struct request_queue *q); -static void start_io(ctlr_info_t *h); - -static inline void addQ(cmdlist_t **Qptr, cmdlist_t *c); -static inline cmdlist_t *removeQ(cmdlist_t **Qptr, cmdlist_t *c); -static inline void complete_command(cmdlist_t *cmd, int timeout); - -static irqreturn_t do_ida_intr(int irq, void *dev_id); -static void ida_timer(unsigned long tdata); -static int ida_revalidate(struct gendisk *disk); -static int revalidate_allvol(ctlr_info_t *host); -static int cpqarray_register_ctlr(int ctlr, struct pci_dev *pdev); - -#ifdef CONFIG_PROC_FS -static void ida_procinit(int i); -#else -static void ida_procinit(int i) {} -#endif - -static inline drv_info_t *get_drv(struct gendisk *disk) -{ - return disk->private_data; -} - -static inline ctlr_info_t *get_host(struct gendisk *disk) -{ - return disk->queue->queuedata; -} - - -static const struct block_device_operations ida_fops = { - .owner = THIS_MODULE, - .open = ida_unlocked_open, - .release = ida_release, - .ioctl = ida_ioctl, - .getgeo = ida_getgeo, - .revalidate_disk= ida_revalidate, -}; - - -#ifdef CONFIG_PROC_FS - -static struct proc_dir_entry *proc_array; -static const struct file_operations ida_proc_fops; - -/* - * Get us a file in /proc/array that says something about each controller. - * Create /proc/array if it doesn't exist yet. - */ -static void __init ida_procinit(int i) -{ - if (proc_array == NULL) { - proc_array = proc_mkdir("driver/cpqarray", NULL); - if (!proc_array) return; - } - - proc_create_data(hba[i]->devname, 0, proc_array, &ida_proc_fops, hba[i]); -} - -/* - * Report information about this controller. - */ -static int ida_proc_show(struct seq_file *m, void *v) -{ - int i, ctlr; - ctlr_info_t *h = (ctlr_info_t*)m->private; - drv_info_t *drv; -#ifdef CPQ_PROC_PRINT_QUEUES - cmdlist_t *c; - unsigned long flags; -#endif - - ctlr = h->ctlr; - seq_printf(m, "%s: Compaq %s Controller\n" - " Board ID: 0x%08lx\n" - " Firmware Revision: %c%c%c%c\n" - " Controller Sig: 0x%08lx\n" - " Memory Address: 0x%08lx\n" - " I/O Port: 0x%04x\n" - " IRQ: %d\n" - " Logical drives: %d\n" - " Physical drives: %d\n\n" - " Current Q depth: %d\n" - " Max Q depth since init: %d\n\n", - h->devname, - h->product_name, - (unsigned long)h->board_id, - h->firm_rev[0], h->firm_rev[1], h->firm_rev[2], h->firm_rev[3], - (unsigned long)h->ctlr_sig, (unsigned long)h->vaddr, - (unsigned int) h->io_mem_addr, (unsigned int)h->intr, - h->log_drives, h->phys_drives, - h->Qdepth, h->maxQsinceinit); - - seq_puts(m, "Logical Drive Info:\n"); - - for(i=0; i<h->log_drives; i++) { - drv = &h->drv[i]; - seq_printf(m, "ida/c%dd%d: blksz=%d nr_blks=%d\n", - ctlr, i, drv->blk_size, drv->nr_blks); - } - -#ifdef CPQ_PROC_PRINT_QUEUES - spin_lock_irqsave(IDA_LOCK(h->ctlr), flags); - seq_puts(m, "\nCurrent Queues:\n"); - - c = h->reqQ; - seq_printf(m, "reqQ = %p", c); - if (c) c=c->next; - while(c && c != h->reqQ) { - seq_printf(m, "->%p", c); - c=c->next; - } - - c = h->cmpQ; - seq_printf(m, "\ncmpQ = %p", c); - if (c) c=c->next; - while(c && c != h->cmpQ) { - seq_printf(m, "->%p", c); - c=c->next; - } - - seq_putc(m, '\n'); - spin_unlock_irqrestore(IDA_LOCK(h->ctlr), flags); -#endif - seq_printf(m, "nr_allocs = %d\nnr_frees = %d\n", - h->nr_allocs, h->nr_frees); - return 0; -} - -static int ida_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, ida_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations ida_proc_fops = { - .owner = THIS_MODULE, - .open = ida_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -#endif /* CONFIG_PROC_FS */ - -module_param_array(eisa, int, NULL, 0); - -static void release_io_mem(ctlr_info_t *c) -{ - /* if IO mem was not protected do nothing */ - if( c->io_mem_addr == 0) - return; - release_region(c->io_mem_addr, c->io_mem_length); - c->io_mem_addr = 0; - c->io_mem_length = 0; -} - -static void cpqarray_remove_one(int i) -{ - int j; - char buff[4]; - - /* sendcmd will turn off interrupt, and send the flush... - * To write all data in the battery backed cache to disks - * no data returned, but don't want to send NULL to sendcmd */ - if( sendcmd(FLUSH_CACHE, i, buff, 4, 0, 0, 0)) - { - printk(KERN_WARNING "Unable to flush cache on controller %d\n", - i); - } - free_irq(hba[i]->intr, hba[i]); - iounmap(hba[i]->vaddr); - unregister_blkdev(COMPAQ_SMART2_MAJOR+i, hba[i]->devname); - del_timer(&hba[i]->timer); - remove_proc_entry(hba[i]->devname, proc_array); - pci_free_consistent(hba[i]->pci_dev, - NR_CMDS * sizeof(cmdlist_t), (hba[i]->cmd_pool), - hba[i]->cmd_pool_dhandle); - kfree(hba[i]->cmd_pool_bits); - for(j = 0; j < NWD; j++) { - if (ida_gendisk[i][j]->flags & GENHD_FL_UP) - del_gendisk(ida_gendisk[i][j]); - put_disk(ida_gendisk[i][j]); - } - blk_cleanup_queue(hba[i]->queue); - release_io_mem(hba[i]); - free_hba(i); -} - -static void cpqarray_remove_one_pci(struct pci_dev *pdev) -{ - int i; - ctlr_info_t *tmp_ptr; - - if (pci_get_drvdata(pdev) == NULL) { - printk( KERN_ERR "cpqarray: Unable to remove device \n"); - return; - } - - tmp_ptr = pci_get_drvdata(pdev); - i = tmp_ptr->ctlr; - if (hba[i] == NULL) { - printk(KERN_ERR "cpqarray: controller %d appears to have" - "already been removed \n", i); - return; - } - pci_set_drvdata(pdev, NULL); - - cpqarray_remove_one(i); -} - -/* removing an instance that was not removed automatically.. - * must be an eisa card. - */ -static void cpqarray_remove_one_eisa(int i) -{ - if (hba[i] == NULL) { - printk(KERN_ERR "cpqarray: controller %d appears to have" - "already been removed \n", i); - return; - } - cpqarray_remove_one(i); -} - -/* pdev is NULL for eisa */ -static int cpqarray_register_ctlr(int i, struct pci_dev *pdev) -{ - struct request_queue *q; - int j; - - /* - * register block devices - * Find disks and fill in structs - * Get an interrupt, set the Q depth and get into /proc - */ - - /* If this successful it should insure that we are the only */ - /* instance of the driver */ - if (register_blkdev(COMPAQ_SMART2_MAJOR+i, hba[i]->devname)) { - goto Enomem4; - } - hba[i]->access.set_intr_mask(hba[i], 0); - if (request_irq(hba[i]->intr, do_ida_intr, IRQF_SHARED, - hba[i]->devname, hba[i])) - { - printk(KERN_ERR "cpqarray: Unable to get irq %d for %s\n", - hba[i]->intr, hba[i]->devname); - goto Enomem3; - } - - for (j=0; j<NWD; j++) { - ida_gendisk[i][j] = alloc_disk(1 << NWD_SHIFT); - if (!ida_gendisk[i][j]) - goto Enomem2; - } - - hba[i]->cmd_pool = pci_alloc_consistent( - hba[i]->pci_dev, NR_CMDS * sizeof(cmdlist_t), - &(hba[i]->cmd_pool_dhandle)); - hba[i]->cmd_pool_bits = kcalloc( - DIV_ROUND_UP(NR_CMDS, BITS_PER_LONG), sizeof(unsigned long), - GFP_KERNEL); - - if (!hba[i]->cmd_pool_bits || !hba[i]->cmd_pool) - goto Enomem1; - - memset(hba[i]->cmd_pool, 0, NR_CMDS * sizeof(cmdlist_t)); - printk(KERN_INFO "cpqarray: Finding drives on %s", - hba[i]->devname); - - spin_lock_init(&hba[i]->lock); - q = blk_init_queue(do_ida_request, &hba[i]->lock); - if (!q) - goto Enomem1; - - hba[i]->queue = q; - q->queuedata = hba[i]; - - getgeometry(i); - start_fwbk(i); - - ida_procinit(i); - - if (pdev) - blk_queue_bounce_limit(q, hba[i]->pci_dev->dma_mask); - - /* This is a hardware imposed limit. */ - blk_queue_max_segments(q, SG_MAX); - - init_timer(&hba[i]->timer); - hba[i]->timer.expires = jiffies + IDA_TIMER; - hba[i]->timer.data = (unsigned long)hba[i]; - hba[i]->timer.function = ida_timer; - add_timer(&hba[i]->timer); - - /* Enable IRQ now that spinlock and rate limit timer are set up */ - hba[i]->access.set_intr_mask(hba[i], FIFO_NOT_EMPTY); - - for(j=0; j<NWD; j++) { - struct gendisk *disk = ida_gendisk[i][j]; - drv_info_t *drv = &hba[i]->drv[j]; - sprintf(disk->disk_name, "ida/c%dd%d", i, j); - disk->major = COMPAQ_SMART2_MAJOR + i; - disk->first_minor = j<<NWD_SHIFT; - disk->fops = &ida_fops; - if (j && !drv->nr_blks) - continue; - blk_queue_logical_block_size(hba[i]->queue, drv->blk_size); - set_capacity(disk, drv->nr_blks); - disk->queue = hba[i]->queue; - disk->private_data = drv; - add_disk(disk); - } - - /* done ! */ - return(i); - -Enomem1: - nr_ctlr = i; - kfree(hba[i]->cmd_pool_bits); - if (hba[i]->cmd_pool) - pci_free_consistent(hba[i]->pci_dev, NR_CMDS*sizeof(cmdlist_t), - hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle); -Enomem2: - while (j--) { - put_disk(ida_gendisk[i][j]); - ida_gendisk[i][j] = NULL; - } - free_irq(hba[i]->intr, hba[i]); -Enomem3: - unregister_blkdev(COMPAQ_SMART2_MAJOR+i, hba[i]->devname); -Enomem4: - if (pdev) - pci_set_drvdata(pdev, NULL); - release_io_mem(hba[i]); - free_hba(i); - - printk( KERN_ERR "cpqarray: out of memory"); - - return -1; -} - -static int cpqarray_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) -{ - int i; - - printk(KERN_DEBUG "cpqarray: Device 0x%x has been found at" - " bus %d dev %d func %d\n", - pdev->device, pdev->bus->number, PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn)); - i = alloc_cpqarray_hba(); - if( i < 0 ) - return (-1); - memset(hba[i], 0, sizeof(ctlr_info_t)); - sprintf(hba[i]->devname, "ida%d", i); - hba[i]->ctlr = i; - /* Initialize the pdev driver private data */ - pci_set_drvdata(pdev, hba[i]); - - if (cpqarray_pci_init(hba[i], pdev) != 0) { - pci_set_drvdata(pdev, NULL); - release_io_mem(hba[i]); - free_hba(i); - return -1; - } - - return (cpqarray_register_ctlr(i, pdev)); -} - -static struct pci_driver cpqarray_pci_driver = { - .name = "cpqarray", - .probe = cpqarray_init_one, - .remove = cpqarray_remove_one_pci, - .id_table = cpqarray_pci_device_id, -}; - -/* - * This is it. Find all the controllers and register them. - * returns the number of block devices registered. - */ -static int __init cpqarray_init(void) -{ - int num_cntlrs_reg = 0; - int i; - int rc = 0; - - /* detect controllers */ - printk(DRIVER_NAME "\n"); - - rc = pci_register_driver(&cpqarray_pci_driver); - if (rc) - return rc; - cpqarray_eisa_detect(); - - for (i=0; i < MAX_CTLR; i++) { - if (hba[i] != NULL) - num_cntlrs_reg++; - } - - if (num_cntlrs_reg) - return 0; - else { - pci_unregister_driver(&cpqarray_pci_driver); - return -ENODEV; - } -} - -/* Function to find the first free pointer into our hba[] array */ -/* Returns -1 if no free entries are left. */ -static int alloc_cpqarray_hba(void) -{ - int i; - - for(i=0; i< MAX_CTLR; i++) { - if (hba[i] == NULL) { - hba[i] = kmalloc(sizeof(ctlr_info_t), GFP_KERNEL); - if(hba[i]==NULL) { - printk(KERN_ERR "cpqarray: out of memory.\n"); - return (-1); - } - return (i); - } - } - printk(KERN_WARNING "cpqarray: This driver supports a maximum" - " of 8 controllers.\n"); - return(-1); -} - -static void free_hba(int i) -{ - kfree(hba[i]); - hba[i]=NULL; -} - -/* - * Find the IO address of the controller, its IRQ and so forth. Fill - * in some basic stuff into the ctlr_info_t structure. - */ -static int cpqarray_pci_init(ctlr_info_t *c, struct pci_dev *pdev) -{ - ushort vendor_id, device_id, command; - unchar cache_line_size, latency_timer; - unchar irq, revision; - unsigned long addr[6]; - __u32 board_id; - - int i; - - c->pci_dev = pdev; - pci_set_master(pdev); - if (pci_enable_device(pdev)) { - printk(KERN_ERR "cpqarray: Unable to Enable PCI device\n"); - return -1; - } - vendor_id = pdev->vendor; - device_id = pdev->device; - revision = pdev->revision; - irq = pdev->irq; - - for(i=0; i<6; i++) - addr[i] = pci_resource_start(pdev, i); - - if (pci_set_dma_mask(pdev, CPQARRAY_DMA_MASK) != 0) - { - printk(KERN_ERR "cpqarray: Unable to set DMA mask\n"); - return -1; - } - - pci_read_config_word(pdev, PCI_COMMAND, &command); - pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache_line_size); - pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &latency_timer); - - pci_read_config_dword(pdev, 0x2c, &board_id); - - /* check to see if controller has been disabled */ - if(!(command & 0x02)) { - printk(KERN_WARNING - "cpqarray: controller appears to be disabled\n"); - return(-1); - } - -DBGINFO( - printk("vendor_id = %x\n", vendor_id); - printk("device_id = %x\n", device_id); - printk("command = %x\n", command); - for(i=0; i<6; i++) - printk("addr[%d] = %lx\n", i, addr[i]); - printk("revision = %x\n", revision); - printk("irq = %x\n", irq); - printk("cache_line_size = %x\n", cache_line_size); - printk("latency_timer = %x\n", latency_timer); - printk("board_id = %x\n", board_id); -); - - c->intr = irq; - - for(i=0; i<6; i++) { - if (pci_resource_flags(pdev, i) & PCI_BASE_ADDRESS_SPACE_IO) - { /* IO space */ - c->io_mem_addr = addr[i]; - c->io_mem_length = pci_resource_end(pdev, i) - - pci_resource_start(pdev, i) + 1; - if(!request_region( c->io_mem_addr, c->io_mem_length, - "cpqarray")) - { - printk( KERN_WARNING "cpqarray I/O memory range already in use addr %lx length = %ld\n", c->io_mem_addr, c->io_mem_length); - c->io_mem_addr = 0; - c->io_mem_length = 0; - } - break; - } - } - - c->paddr = 0; - for(i=0; i<6; i++) - if (!(pci_resource_flags(pdev, i) & - PCI_BASE_ADDRESS_SPACE_IO)) { - c->paddr = pci_resource_start (pdev, i); - break; - } - if (!c->paddr) - return -1; - c->vaddr = remap_pci_mem(c->paddr, 128); - if (!c->vaddr) - return -1; - c->board_id = board_id; - - for(i=0; i<NR_PRODUCTS; i++) { - if (board_id == products[i].board_id) { - c->product_name = products[i].product_name; - c->access = *(products[i].access); - break; - } - } - if (i == NR_PRODUCTS) { - printk(KERN_WARNING "cpqarray: Sorry, I don't know how" - " to access the SMART Array controller %08lx\n", - (unsigned long)board_id); - return -1; - } - - return 0; -} - -/* - * Map (physical) PCI mem into (virtual) kernel space - */ -static void __iomem *remap_pci_mem(ulong base, ulong size) -{ - ulong page_base = ((ulong) base) & PAGE_MASK; - ulong page_offs = ((ulong) base) - page_base; - void __iomem *page_remapped = ioremap(page_base, page_offs+size); - - return (page_remapped ? (page_remapped + page_offs) : NULL); -} - -#ifndef MODULE -/* - * Config string is a comma separated set of i/o addresses of EISA cards. - */ -static int cpqarray_setup(char *str) -{ - int i, ints[9]; - - (void)get_options(str, ARRAY_SIZE(ints), ints); - - for(i=0; i<ints[0] && i<8; i++) - eisa[i] = ints[i+1]; - return 1; -} - -__setup("smart2=", cpqarray_setup); - -#endif - -/* - * Find an EISA controller's signature. Set up an hba if we find it. - */ -static int cpqarray_eisa_detect(void) -{ - int i=0, j; - __u32 board_id; - int intr; - int ctlr; - int num_ctlr = 0; - - while(i<8 && eisa[i]) { - ctlr = alloc_cpqarray_hba(); - if(ctlr == -1) - break; - board_id = inl(eisa[i]+0xC80); - for(j=0; j < NR_PRODUCTS; j++) - if (board_id == products[j].board_id) - break; - - if (j == NR_PRODUCTS) { - printk(KERN_WARNING "cpqarray: Sorry, I don't know how" - " to access the SMART Array controller %08lx\n", (unsigned long)board_id); - continue; - } - - memset(hba[ctlr], 0, sizeof(ctlr_info_t)); - hba[ctlr]->io_mem_addr = eisa[i]; - hba[ctlr]->io_mem_length = 0x7FF; - if(!request_region(hba[ctlr]->io_mem_addr, - hba[ctlr]->io_mem_length, - "cpqarray")) - { - printk(KERN_WARNING "cpqarray: I/O range already in " - "use addr = %lx length = %ld\n", - hba[ctlr]->io_mem_addr, - hba[ctlr]->io_mem_length); - free_hba(ctlr); - continue; - } - - /* - * Read the config register to find our interrupt - */ - intr = inb(eisa[i]+0xCC0) >> 4; - if (intr & 1) intr = 11; - else if (intr & 2) intr = 10; - else if (intr & 4) intr = 14; - else if (intr & 8) intr = 15; - - hba[ctlr]->intr = intr; - sprintf(hba[ctlr]->devname, "ida%d", nr_ctlr); - hba[ctlr]->product_name = products[j].product_name; - hba[ctlr]->access = *(products[j].access); - hba[ctlr]->ctlr = ctlr; - hba[ctlr]->board_id = board_id; - hba[ctlr]->pci_dev = NULL; /* not PCI */ - -DBGINFO( - printk("i = %d, j = %d\n", i, j); - printk("irq = %x\n", intr); - printk("product name = %s\n", products[j].product_name); - printk("board_id = %x\n", board_id); -); - - num_ctlr++; - i++; - - if (cpqarray_register_ctlr(ctlr, NULL) == -1) - printk(KERN_WARNING - "cpqarray: Can't register EISA controller %d\n", - ctlr); - - } - - return num_ctlr; -} - -/* - * Open. Make sure the device is really there. - */ -static int ida_open(struct block_device *bdev, fmode_t mode) -{ - drv_info_t *drv = get_drv(bdev->bd_disk); - ctlr_info_t *host = get_host(bdev->bd_disk); - - DBGINFO(printk("ida_open %s\n", bdev->bd_disk->disk_name)); - /* - * Root is allowed to open raw volume zero even if it's not configured - * so array config can still work. I don't think I really like this, - * but I'm already using way to many device nodes to claim another one - * for "raw controller". - */ - if (!drv->nr_blks) { - if (!capable(CAP_SYS_RAWIO)) - return -ENXIO; - if (!capable(CAP_SYS_ADMIN) && drv != host->drv) - return -ENXIO; - } - host->usage_count++; - return 0; -} - -static int ida_unlocked_open(struct block_device *bdev, fmode_t mode) -{ - int ret; - - mutex_lock(&cpqarray_mutex); - ret = ida_open(bdev, mode); - mutex_unlock(&cpqarray_mutex); - - return ret; -} - -/* - * Close. Sync first. - */ -static void ida_release(struct gendisk *disk, fmode_t mode) -{ - ctlr_info_t *host; - - mutex_lock(&cpqarray_mutex); - host = get_host(disk); - host->usage_count--; - mutex_unlock(&cpqarray_mutex); -} - -/* - * Enqueuing and dequeuing functions for cmdlists. - */ -static inline void addQ(cmdlist_t **Qptr, cmdlist_t *c) -{ - if (*Qptr == NULL) { - *Qptr = c; - c->next = c->prev = c; - } else { - c->prev = (*Qptr)->prev; - c->next = (*Qptr); - (*Qptr)->prev->next = c; - (*Qptr)->prev = c; - } -} - -static inline cmdlist_t *removeQ(cmdlist_t **Qptr, cmdlist_t *c) -{ - if (c && c->next != c) { - if (*Qptr == c) *Qptr = c->next; - c->prev->next = c->next; - c->next->prev = c->prev; - } else { - *Qptr = NULL; - } - return c; -} - -/* - * Get a request and submit it to the controller. - * This routine needs to grab all the requests it possibly can from the - * req Q and submit them. Interrupts are off (and need to be off) when you - * are in here (either via the dummy do_ida_request functions or by being - * called from the interrupt handler - */ -static void do_ida_request(struct request_queue *q) -{ - ctlr_info_t *h = q->queuedata; - cmdlist_t *c; - struct request *creq; - struct scatterlist tmp_sg[SG_MAX]; - int i, dir, seg; - -queue_next: - creq = blk_peek_request(q); - if (!creq) - goto startio; - - BUG_ON(creq->nr_phys_segments > SG_MAX); - - if ((c = cmd_alloc(h,1)) == NULL) - goto startio; - - blk_start_request(creq); - - c->ctlr = h->ctlr; - c->hdr.unit = (drv_info_t *)(creq->rq_disk->private_data) - h->drv; - c->hdr.size = sizeof(rblk_t) >> 2; - c->size += sizeof(rblk_t); - - c->req.hdr.blk = blk_rq_pos(creq); - c->rq = creq; -DBGPX( - printk("sector=%d, nr_sectors=%u\n", - blk_rq_pos(creq), blk_rq_sectors(creq)); -); - sg_init_table(tmp_sg, SG_MAX); - seg = blk_rq_map_sg(q, creq, tmp_sg); - - /* Now do all the DMA Mappings */ - if (rq_data_dir(creq) == READ) - dir = PCI_DMA_FROMDEVICE; - else - dir = PCI_DMA_TODEVICE; - for( i=0; i < seg; i++) - { - c->req.sg[i].size = tmp_sg[i].length; - c->req.sg[i].addr = (__u32) pci_map_page(h->pci_dev, - sg_page(&tmp_sg[i]), - tmp_sg[i].offset, - tmp_sg[i].length, dir); - } -DBGPX( printk("Submitting %u sectors in %d segments\n", blk_rq_sectors(creq), seg); ); - c->req.hdr.sg_cnt = seg; - c->req.hdr.blk_cnt = blk_rq_sectors(creq); - c->req.hdr.cmd = (rq_data_dir(creq) == READ) ? IDA_READ : IDA_WRITE; - c->type = CMD_RWREQ; - - /* Put the request on the tail of the request queue */ - addQ(&h->reqQ, c); - h->Qdepth++; - if (h->Qdepth > h->maxQsinceinit) - h->maxQsinceinit = h->Qdepth; - - goto queue_next; - -startio: - start_io(h); -} - -/* - * start_io submits everything on a controller's request queue - * and moves it to the completion queue. - * - * Interrupts had better be off if you're in here - */ -static void start_io(ctlr_info_t *h) -{ - cmdlist_t *c; - - while((c = h->reqQ) != NULL) { - /* Can't do anything if we're busy */ - if (h->access.fifo_full(h) == 0) - return; - - /* Get the first entry from the request Q */ - removeQ(&h->reqQ, c); - h->Qdepth--; - - /* Tell the controller to do our bidding */ - h->access.submit_command(h, c); - - /* Get onto the completion Q */ - addQ(&h->cmpQ, c); - } -} - -/* - * Mark all buffers that cmd was responsible for - */ -static inline void complete_command(cmdlist_t *cmd, int timeout) -{ - struct request *rq = cmd->rq; - int error = 0; - int i, ddir; - - if (cmd->req.hdr.rcode & RCODE_NONFATAL && - (hba[cmd->ctlr]->misc_tflags & MISC_NONFATAL_WARN) == 0) { - printk(KERN_NOTICE "Non Fatal error on ida/c%dd%d\n", - cmd->ctlr, cmd->hdr.unit); - hba[cmd->ctlr]->misc_tflags |= MISC_NONFATAL_WARN; - } - if (cmd->req.hdr.rcode & RCODE_FATAL) { - printk(KERN_WARNING "Fatal error on ida/c%dd%d\n", - cmd->ctlr, cmd->hdr.unit); - error = -EIO; - } - if (cmd->req.hdr.rcode & RCODE_INVREQ) { - printk(KERN_WARNING "Invalid request on ida/c%dd%d = (cmd=%x sect=%d cnt=%d sg=%d ret=%x)\n", - cmd->ctlr, cmd->hdr.unit, cmd->req.hdr.cmd, - cmd->req.hdr.blk, cmd->req.hdr.blk_cnt, - cmd->req.hdr.sg_cnt, cmd->req.hdr.rcode); - error = -EIO; - } - if (timeout) - error = -EIO; - /* unmap the DMA mapping for all the scatter gather elements */ - if (cmd->req.hdr.cmd == IDA_READ) - ddir = PCI_DMA_FROMDEVICE; - else - ddir = PCI_DMA_TODEVICE; - for(i=0; i<cmd->req.hdr.sg_cnt; i++) - pci_unmap_page(hba[cmd->ctlr]->pci_dev, cmd->req.sg[i].addr, - cmd->req.sg[i].size, ddir); - - DBGPX(printk("Done with %p\n", rq);); - __blk_end_request_all(rq, error); -} - -/* - * The controller will interrupt us upon completion of commands. - * Find the command on the completion queue, remove it, tell the OS and - * try to queue up more IO - */ -static irqreturn_t do_ida_intr(int irq, void *dev_id) -{ - ctlr_info_t *h = dev_id; - cmdlist_t *c; - unsigned long istat; - unsigned long flags; - __u32 a,a1; - - istat = h->access.intr_pending(h); - /* Is this interrupt for us? */ - if (istat == 0) - return IRQ_NONE; - - /* - * If there are completed commands in the completion queue, - * we had better do something about it. - */ - spin_lock_irqsave(IDA_LOCK(h->ctlr), flags); - if (istat & FIFO_NOT_EMPTY) { - while((a = h->access.command_completed(h))) { - a1 = a; a &= ~3; - if ((c = h->cmpQ) == NULL) - { - printk(KERN_WARNING "cpqarray: Completion of %08lx ignored\n", (unsigned long)a1); - continue; - } - while(c->busaddr != a) { - c = c->next; - if (c == h->cmpQ) - break; - } - /* - * If we've found the command, take it off the - * completion Q and free it - */ - if (c->busaddr == a) { - removeQ(&h->cmpQ, c); - /* Check for invalid command. - * Controller returns command error, - * But rcode = 0. - */ - - if((a1 & 0x03) && (c->req.hdr.rcode == 0)) - { - c->req.hdr.rcode = RCODE_INVREQ; - } - if (c->type == CMD_RWREQ) { - complete_command(c, 0); - cmd_free(h, c, 1); - } else if (c->type == CMD_IOCTL_PEND) { - c->type = CMD_IOCTL_DONE; - } - continue; - } - } - } - - /* - * See if we can queue up some more IO - */ - do_ida_request(h->queue); - spin_unlock_irqrestore(IDA_LOCK(h->ctlr), flags); - return IRQ_HANDLED; -} - -/* - * This timer was for timing out requests that haven't happened after - * IDA_TIMEOUT. That wasn't such a good idea. This timer is used to - * reset a flags structure so we don't flood the user with - * "Non-Fatal error" messages. - */ -static void ida_timer(unsigned long tdata) -{ - ctlr_info_t *h = (ctlr_info_t*)tdata; - - h->timer.expires = jiffies + IDA_TIMER; - add_timer(&h->timer); - h->misc_tflags = 0; -} - -static int ida_getgeo(struct block_device *bdev, struct hd_geometry *geo) -{ - drv_info_t *drv = get_drv(bdev->bd_disk); - - if (drv->cylinders) { - geo->heads = drv->heads; - geo->sectors = drv->sectors; - geo->cylinders = drv->cylinders; - } else { - geo->heads = 0xff; - geo->sectors = 0x3f; - geo->cylinders = drv->nr_blks / (0xff*0x3f); - } - - return 0; -} - -/* - * ida_ioctl does some miscellaneous stuff like reporting drive geometry, - * setting readahead and submitting commands from userspace to the controller. - */ -static int ida_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) -{ - drv_info_t *drv = get_drv(bdev->bd_disk); - ctlr_info_t *host = get_host(bdev->bd_disk); - int error; - ida_ioctl_t __user *io = (ida_ioctl_t __user *)arg; - ida_ioctl_t *my_io; - - switch(cmd) { - case IDAGETDRVINFO: - if (copy_to_user(&io->c.drv, drv, sizeof(drv_info_t))) - return -EFAULT; - return 0; - case IDAPASSTHRU: - if (!capable(CAP_SYS_RAWIO)) - return -EPERM; - my_io = kmalloc(sizeof(ida_ioctl_t), GFP_KERNEL); - if (!my_io) - return -ENOMEM; - error = -EFAULT; - if (copy_from_user(my_io, io, sizeof(*my_io))) - goto out_passthru; - error = ida_ctlr_ioctl(host, drv - host->drv, my_io); - if (error) - goto out_passthru; - error = -EFAULT; - if (copy_to_user(io, my_io, sizeof(*my_io))) - goto out_passthru; - error = 0; -out_passthru: - kfree(my_io); - return error; - case IDAGETCTLRSIG: - if (!arg) return -EINVAL; - if (put_user(host->ctlr_sig, (int __user *)arg)) - return -EFAULT; - return 0; - case IDAREVALIDATEVOLS: - if (MINOR(bdev->bd_dev) != 0) - return -ENXIO; - return revalidate_allvol(host); - case IDADRIVERVERSION: - if (!arg) return -EINVAL; - if (put_user(DRIVER_VERSION, (unsigned long __user *)arg)) - return -EFAULT; - return 0; - case IDAGETPCIINFO: - { - - ida_pci_info_struct pciinfo; - - if (!arg) return -EINVAL; - memset(&pciinfo, 0, sizeof(pciinfo)); - pciinfo.bus = host->pci_dev->bus->number; - pciinfo.dev_fn = host->pci_dev->devfn; - pciinfo.board_id = host->board_id; - if(copy_to_user((void __user *) arg, &pciinfo, - sizeof( ida_pci_info_struct))) - return -EFAULT; - return(0); - } - - default: - return -EINVAL; - } - -} - -static int ida_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long param) -{ - int ret; - - mutex_lock(&cpqarray_mutex); - ret = ida_locked_ioctl(bdev, mode, cmd, param); - mutex_unlock(&cpqarray_mutex); - - return ret; -} - -/* - * ida_ctlr_ioctl is for passing commands to the controller from userspace. - * The command block (io) has already been copied to kernel space for us, - * however, any elements in the sglist need to be copied to kernel space - * or copied back to userspace. - * - * Only root may perform a controller passthru command, however I'm not doing - * any serious sanity checking on the arguments. Doing an IDA_WRITE_MEDIA and - * putting a 64M buffer in the sglist is probably a *bad* idea. - */ -static int ida_ctlr_ioctl(ctlr_info_t *h, int dsk, ida_ioctl_t *io) -{ - int ctlr = h->ctlr; - cmdlist_t *c; - void *p = NULL; - unsigned long flags; - int error; - - if ((c = cmd_alloc(h, 0)) == NULL) - return -ENOMEM; - c->ctlr = ctlr; - c->hdr.unit = (io->unit & UNITVALID) ? (io->unit & ~UNITVALID) : dsk; - c->hdr.size = sizeof(rblk_t) >> 2; - c->size += sizeof(rblk_t); - - c->req.hdr.cmd = io->cmd; - c->req.hdr.blk = io->blk; - c->req.hdr.blk_cnt = io->blk_cnt; - c->type = CMD_IOCTL_PEND; - - /* Pre submit processing */ - switch(io->cmd) { - case PASSTHRU_A: - p = memdup_user(io->sg[0].addr, io->sg[0].size); - if (IS_ERR(p)) { - error = PTR_ERR(p); - cmd_free(h, c, 0); - return error; - } - c->req.hdr.blk = pci_map_single(h->pci_dev, &(io->c), - sizeof(ida_ioctl_t), - PCI_DMA_BIDIRECTIONAL); - c->req.sg[0].size = io->sg[0].size; - c->req.sg[0].addr = pci_map_single(h->pci_dev, p, - c->req.sg[0].size, PCI_DMA_BIDIRECTIONAL); - c->req.hdr.sg_cnt = 1; - break; - case IDA_READ: - case READ_FLASH_ROM: - case SENSE_CONTROLLER_PERFORMANCE: - p = kmalloc(io->sg[0].size, GFP_KERNEL); - if (!p) - { - error = -ENOMEM; - cmd_free(h, c, 0); - return(error); - } - - c->req.sg[0].size = io->sg[0].size; - c->req.sg[0].addr = pci_map_single(h->pci_dev, p, - c->req.sg[0].size, PCI_DMA_BIDIRECTIONAL); - c->req.hdr.sg_cnt = 1; - break; - case IDA_WRITE: - case IDA_WRITE_MEDIA: - case DIAG_PASS_THRU: - case COLLECT_BUFFER: - case WRITE_FLASH_ROM: - p = memdup_user(io->sg[0].addr, io->sg[0].size); - if (IS_ERR(p)) { - error = PTR_ERR(p); - cmd_free(h, c, 0); - return error; - } - c->req.sg[0].size = io->sg[0].size; - c->req.sg[0].addr = pci_map_single(h->pci_dev, p, - c->req.sg[0].size, PCI_DMA_BIDIRECTIONAL); - c->req.hdr.sg_cnt = 1; - break; - default: - c->req.sg[0].size = sizeof(io->c); - c->req.sg[0].addr = pci_map_single(h->pci_dev,&io->c, - c->req.sg[0].size, PCI_DMA_BIDIRECTIONAL); - c->req.hdr.sg_cnt = 1; - } - - /* Put the request on the tail of the request queue */ - spin_lock_irqsave(IDA_LOCK(ctlr), flags); - addQ(&h->reqQ, c); - h->Qdepth++; - start_io(h); - spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); - - /* Wait for completion */ - while(c->type != CMD_IOCTL_DONE) - schedule(); - - /* Unmap the DMA */ - pci_unmap_single(h->pci_dev, c->req.sg[0].addr, c->req.sg[0].size, - PCI_DMA_BIDIRECTIONAL); - /* Post submit processing */ - switch(io->cmd) { - case PASSTHRU_A: - pci_unmap_single(h->pci_dev, c->req.hdr.blk, - sizeof(ida_ioctl_t), - PCI_DMA_BIDIRECTIONAL); - case IDA_READ: - case DIAG_PASS_THRU: - case SENSE_CONTROLLER_PERFORMANCE: - case READ_FLASH_ROM: - if (copy_to_user(io->sg[0].addr, p, io->sg[0].size)) { - kfree(p); - return -EFAULT; - } - /* fall through and free p */ - case IDA_WRITE: - case IDA_WRITE_MEDIA: - case COLLECT_BUFFER: - case WRITE_FLASH_ROM: - kfree(p); - break; - default:; - /* Nothing to do */ - } - - io->rcode = c->req.hdr.rcode; - cmd_free(h, c, 0); - return(0); -} - -/* - * Commands are pre-allocated in a large block. Here we use a simple bitmap - * scheme to suballocte them to the driver. Operations that are not time - * critical (and can wait for kmalloc and possibly sleep) can pass in NULL - * as the first argument to get a new command. - */ -static cmdlist_t * cmd_alloc(ctlr_info_t *h, int get_from_pool) -{ - cmdlist_t * c; - int i; - dma_addr_t cmd_dhandle; - - if (!get_from_pool) { - c = (cmdlist_t*)pci_alloc_consistent(h->pci_dev, - sizeof(cmdlist_t), &cmd_dhandle); - if(c==NULL) - return NULL; - } else { - do { - i = find_first_zero_bit(h->cmd_pool_bits, NR_CMDS); - if (i == NR_CMDS) - return NULL; - } while(test_and_set_bit(i&(BITS_PER_LONG-1), h->cmd_pool_bits+(i/BITS_PER_LONG)) != 0); - c = h->cmd_pool + i; - cmd_dhandle = h->cmd_pool_dhandle + i*sizeof(cmdlist_t); - h->nr_allocs++; - } - - memset(c, 0, sizeof(cmdlist_t)); - c->busaddr = cmd_dhandle; - return c; -} - -static void cmd_free(ctlr_info_t *h, cmdlist_t *c, int got_from_pool) -{ - int i; - - if (!got_from_pool) { - pci_free_consistent(h->pci_dev, sizeof(cmdlist_t), c, - c->busaddr); - } else { - i = c - h->cmd_pool; - clear_bit(i&(BITS_PER_LONG-1), h->cmd_pool_bits+(i/BITS_PER_LONG)); - h->nr_frees++; - } -} - -/*********************************************************************** - name: sendcmd - Send a command to an IDA using the memory mapped FIFO interface - and wait for it to complete. - This routine should only be called at init time. -***********************************************************************/ -static int sendcmd( - __u8 cmd, - int ctlr, - void *buff, - size_t size, - unsigned int blk, - unsigned int blkcnt, - unsigned int log_unit ) -{ - cmdlist_t *c; - int complete; - unsigned long temp; - unsigned long i; - ctlr_info_t *info_p = hba[ctlr]; - - c = cmd_alloc(info_p, 1); - if(!c) - return IO_ERROR; - c->ctlr = ctlr; - c->hdr.unit = log_unit; - c->hdr.prio = 0; - c->hdr.size = sizeof(rblk_t) >> 2; - c->size += sizeof(rblk_t); - - /* The request information. */ - c->req.hdr.next = 0; - c->req.hdr.rcode = 0; - c->req.bp = 0; - c->req.hdr.sg_cnt = 1; - c->req.hdr.reserved = 0; - - if (size == 0) - c->req.sg[0].size = 512; - else - c->req.sg[0].size = size; - - c->req.hdr.blk = blk; - c->req.hdr.blk_cnt = blkcnt; - c->req.hdr.cmd = (unsigned char) cmd; - c->req.sg[0].addr = (__u32) pci_map_single(info_p->pci_dev, - buff, c->req.sg[0].size, PCI_DMA_BIDIRECTIONAL); - /* - * Disable interrupt - */ - info_p->access.set_intr_mask(info_p, 0); - /* Make sure there is room in the command FIFO */ - /* Actually it should be completely empty at this time. */ - for (i = 200000; i > 0; i--) { - temp = info_p->access.fifo_full(info_p); - if (temp != 0) { - break; - } - udelay(10); -DBG( - printk(KERN_WARNING "cpqarray ida%d: idaSendPciCmd FIFO full," - " waiting!\n", ctlr); -); - } - /* - * Send the cmd - */ - info_p->access.submit_command(info_p, c); - complete = pollcomplete(ctlr); - - pci_unmap_single(info_p->pci_dev, (dma_addr_t) c->req.sg[0].addr, - c->req.sg[0].size, PCI_DMA_BIDIRECTIONAL); - if (complete != 1) { - if (complete != c->busaddr) { - printk( KERN_WARNING - "cpqarray ida%d: idaSendPciCmd " - "Invalid command list address returned! (%08lx)\n", - ctlr, (unsigned long)complete); - cmd_free(info_p, c, 1); - return (IO_ERROR); - } - } else { - printk( KERN_WARNING - "cpqarray ida%d: idaSendPciCmd Timeout out, " - "No command list address returned!\n", - ctlr); - cmd_free(info_p, c, 1); - return (IO_ERROR); - } - - if (c->req.hdr.rcode & 0x00FE) { - if (!(c->req.hdr.rcode & BIG_PROBLEM)) { - printk( KERN_WARNING - "cpqarray ida%d: idaSendPciCmd, error: " - "Controller failed at init time " - "cmd: 0x%x, return code = 0x%x\n", - ctlr, c->req.hdr.cmd, c->req.hdr.rcode); - - cmd_free(info_p, c, 1); - return (IO_ERROR); - } - } - cmd_free(info_p, c, 1); - return (IO_OK); -} - -/* - * revalidate_allvol is for online array config utilities. After a - * utility reconfigures the drives in the array, it can use this function - * (through an ioctl) to make the driver zap any previous disk structs for - * that controller and get new ones. - * - * Right now I'm using the getgeometry() function to do this, but this - * function should probably be finer grained and allow you to revalidate one - * particualar logical volume (instead of all of them on a particular - * controller). - */ -static int revalidate_allvol(ctlr_info_t *host) -{ - int ctlr = host->ctlr; - int i; - unsigned long flags; - - spin_lock_irqsave(IDA_LOCK(ctlr), flags); - if (host->usage_count > 1) { - spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); - printk(KERN_WARNING "cpqarray: Device busy for volume" - " revalidation (usage=%d)\n", host->usage_count); - return -EBUSY; - } - host->usage_count++; - spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); - - /* - * Set the partition and block size structures for all volumes - * on this controller to zero. We will reread all of this data - */ - set_capacity(ida_gendisk[ctlr][0], 0); - for (i = 1; i < NWD; i++) { - struct gendisk *disk = ida_gendisk[ctlr][i]; - if (disk->flags & GENHD_FL_UP) - del_gendisk(disk); - } - memset(host->drv, 0, sizeof(drv_info_t)*NWD); - - /* - * Tell the array controller not to give us any interrupts while - * we check the new geometry. Then turn interrupts back on when - * we're done. - */ - host->access.set_intr_mask(host, 0); - getgeometry(ctlr); - host->access.set_intr_mask(host, FIFO_NOT_EMPTY); - - for(i=0; i<NWD; i++) { - struct gendisk *disk = ida_gendisk[ctlr][i]; - drv_info_t *drv = &host->drv[i]; - if (i && !drv->nr_blks) - continue; - blk_queue_logical_block_size(host->queue, drv->blk_size); - set_capacity(disk, drv->nr_blks); - disk->queue = host->queue; - disk->private_data = drv; - if (i) - add_disk(disk); - } - - host->usage_count--; - return 0; -} - -static int ida_revalidate(struct gendisk *disk) -{ - drv_info_t *drv = disk->private_data; - set_capacity(disk, drv->nr_blks); - return 0; -} - -/******************************************************************** - name: pollcomplete - Wait polling for a command to complete. - The memory mapped FIFO is polled for the completion. - Used only at init time, interrupts disabled. - ********************************************************************/ -static int pollcomplete(int ctlr) -{ - int done; - int i; - - /* Wait (up to 2 seconds) for a command to complete */ - - for (i = 200000; i > 0; i--) { - done = hba[ctlr]->access.command_completed(hba[ctlr]); - if (done == 0) { - udelay(10); /* a short fixed delay */ - } else - return (done); - } - /* Invalid address to tell caller we ran out of time */ - return 1; -} -/***************************************************************** - start_fwbk - Starts controller firmwares background processing. - Currently only the Integrated Raid controller needs this done. - If the PCI mem address registers are written to after this, - data corruption may occur -*****************************************************************/ -static void start_fwbk(int ctlr) -{ - id_ctlr_t *id_ctlr_buf; - int ret_code; - - if( (hba[ctlr]->board_id != 0x40400E11) - && (hba[ctlr]->board_id != 0x40480E11) ) - - /* Not a Integrated Raid, so there is nothing for us to do */ - return; - printk(KERN_DEBUG "cpqarray: Starting firmware's background" - " processing\n"); - /* Command does not return anything, but idasend command needs a - buffer */ - id_ctlr_buf = kmalloc(sizeof(id_ctlr_t), GFP_KERNEL); - if(id_ctlr_buf==NULL) - { - printk(KERN_WARNING "cpqarray: Out of memory. " - "Unable to start background processing.\n"); - return; - } - ret_code = sendcmd(RESUME_BACKGROUND_ACTIVITY, ctlr, - id_ctlr_buf, 0, 0, 0, 0); - if(ret_code != IO_OK) - printk(KERN_WARNING "cpqarray: Unable to start" - " background processing\n"); - - kfree(id_ctlr_buf); -} -/***************************************************************** - getgeometry - Get ida logical volume geometry from the controller - This is a large bit of code which once existed in two flavors, - It is used only at init time. -*****************************************************************/ -static void getgeometry(int ctlr) -{ - id_log_drv_t *id_ldrive; - id_ctlr_t *id_ctlr_buf; - sense_log_drv_stat_t *id_lstatus_buf; - config_t *sense_config_buf; - unsigned int log_unit, log_index; - int ret_code, size; - drv_info_t *drv; - ctlr_info_t *info_p = hba[ctlr]; - int i; - - info_p->log_drv_map = 0; - - id_ldrive = kzalloc(sizeof(id_log_drv_t), GFP_KERNEL); - if (!id_ldrive) { - printk( KERN_ERR "cpqarray: out of memory.\n"); - goto err_0; - } - - id_ctlr_buf = kzalloc(sizeof(id_ctlr_t), GFP_KERNEL); - if (!id_ctlr_buf) { - printk( KERN_ERR "cpqarray: out of memory.\n"); - goto err_1; - } - - id_lstatus_buf = kzalloc(sizeof(sense_log_drv_stat_t), GFP_KERNEL); - if (!id_lstatus_buf) { - printk( KERN_ERR "cpqarray: out of memory.\n"); - goto err_2; - } - - sense_config_buf = kzalloc(sizeof(config_t), GFP_KERNEL); - if (!sense_config_buf) { - printk( KERN_ERR "cpqarray: out of memory.\n"); - goto err_3; - } - - info_p->phys_drives = 0; - info_p->log_drv_map = 0; - info_p->drv_assign_map = 0; - info_p->drv_spare_map = 0; - info_p->mp_failed_drv_map = 0; /* only initialized here */ - /* Get controllers info for this logical drive */ - ret_code = sendcmd(ID_CTLR, ctlr, id_ctlr_buf, 0, 0, 0, 0); - if (ret_code == IO_ERROR) { - /* - * If can't get controller info, set the logical drive map to 0, - * so the idastubopen will fail on all logical drives - * on the controller. - */ - printk(KERN_ERR "cpqarray: error sending ID controller\n"); - goto err_4; - } - - info_p->log_drives = id_ctlr_buf->nr_drvs; - for(i=0;i<4;i++) - info_p->firm_rev[i] = id_ctlr_buf->firm_rev[i]; - info_p->ctlr_sig = id_ctlr_buf->cfg_sig; - - printk(" (%s)\n", info_p->product_name); - /* - * Initialize logical drive map to zero - */ - log_index = 0; - /* - * Get drive geometry for all logical drives - */ - if (id_ctlr_buf->nr_drvs > 16) - printk(KERN_WARNING "cpqarray ida%d: This driver supports " - "16 logical drives per controller.\n. " - " Additional drives will not be " - "detected\n", ctlr); - - for (log_unit = 0; - (log_index < id_ctlr_buf->nr_drvs) - && (log_unit < NWD); - log_unit++) { - size = sizeof(sense_log_drv_stat_t); - - /* - Send "Identify logical drive status" cmd - */ - ret_code = sendcmd(SENSE_LOG_DRV_STAT, - ctlr, id_lstatus_buf, size, 0, 0, log_unit); - if (ret_code == IO_ERROR) { - /* - If can't get logical drive status, set - the logical drive map to 0, so the - idastubopen will fail for all logical drives - on the controller. - */ - info_p->log_drv_map = 0; - printk( KERN_WARNING - "cpqarray ida%d: idaGetGeometry - Controller" - " failed to report status of logical drive %d\n" - "Access to this controller has been disabled\n", - ctlr, log_unit); - goto err_4; - } - /* - Make sure the logical drive is configured - */ - if (id_lstatus_buf->status != LOG_NOT_CONF) { - ret_code = sendcmd(ID_LOG_DRV, ctlr, id_ldrive, - sizeof(id_log_drv_t), 0, 0, log_unit); - /* - If error, the bit for this - logical drive won't be set and - idastubopen will return error. - */ - if (ret_code != IO_ERROR) { - drv = &info_p->drv[log_unit]; - drv->blk_size = id_ldrive->blk_size; - drv->nr_blks = id_ldrive->nr_blks; - drv->cylinders = id_ldrive->drv.cyl; - drv->heads = id_ldrive->drv.heads; - drv->sectors = id_ldrive->drv.sect_per_track; - info_p->log_drv_map |= (1 << log_unit); - - printk(KERN_INFO "cpqarray ida/c%dd%d: blksz=%d nr_blks=%d\n", - ctlr, log_unit, drv->blk_size, drv->nr_blks); - ret_code = sendcmd(SENSE_CONFIG, - ctlr, sense_config_buf, - sizeof(config_t), 0, 0, log_unit); - if (ret_code == IO_ERROR) { - info_p->log_drv_map = 0; - printk(KERN_ERR "cpqarray: error sending sense config\n"); - goto err_4; - } - - info_p->phys_drives = - sense_config_buf->ctlr_phys_drv; - info_p->drv_assign_map - |= sense_config_buf->drv_asgn_map; - info_p->drv_assign_map - |= sense_config_buf->spare_asgn_map; - info_p->drv_spare_map - |= sense_config_buf->spare_asgn_map; - } /* end of if no error on id_ldrive */ - log_index = log_index + 1; - } /* end of if logical drive configured */ - } /* end of for log_unit */ - - /* Free all the buffers and return */ -err_4: - kfree(sense_config_buf); -err_3: - kfree(id_lstatus_buf); -err_2: - kfree(id_ctlr_buf); -err_1: - kfree(id_ldrive); -err_0: - return; -} - -static void __exit cpqarray_exit(void) -{ - int i; - - pci_unregister_driver(&cpqarray_pci_driver); - - /* Double check that all controller entries have been removed */ - for(i=0; i<MAX_CTLR; i++) { - if (hba[i] != NULL) { - printk(KERN_WARNING "cpqarray: Removing EISA " - "controller %d\n", i); - cpqarray_remove_one_eisa(i); - } - } - - remove_proc_entry("driver/cpqarray", NULL); -} - -module_init(cpqarray_init) -module_exit(cpqarray_exit) diff --git a/drivers/block/cpqarray.h b/drivers/block/cpqarray.h deleted file mode 100644 index be73e9d579c5..000000000000 --- a/drivers/block/cpqarray.h +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Disk Array driver for Compaq SMART2 Controllers - * Copyright 1998 Compaq Computer Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Questions/Comments/Bugfixes to iss_storagedev@hp.com - * - * If you want to make changes, improve or add functionality to this - * driver, you'll probably need the Compaq Array Controller Interface - * Specificiation (Document number ECG086/1198) - */ -#ifndef CPQARRAY_H -#define CPQARRAY_H - -#ifdef __KERNEL__ -#include <linux/blkdev.h> -#include <linux/slab.h> -#include <linux/proc_fs.h> -#include <linux/timer.h> -#endif - -#include "ida_cmd.h" - -#define IO_OK 0 -#define IO_ERROR 1 -#define NWD 16 -#define NWD_SHIFT 4 - -#define IDA_TIMER (5*HZ) -#define IDA_TIMEOUT (10*HZ) - -#define MISC_NONFATAL_WARN 0x01 - -typedef struct { - unsigned blk_size; - unsigned nr_blks; - unsigned cylinders; - unsigned heads; - unsigned sectors; - int usage_count; -} drv_info_t; - -#ifdef __KERNEL__ - -struct ctlr_info; -typedef struct ctlr_info ctlr_info_t; - -struct access_method { - void (*submit_command)(ctlr_info_t *h, cmdlist_t *c); - void (*set_intr_mask)(ctlr_info_t *h, unsigned long val); - unsigned long (*fifo_full)(ctlr_info_t *h); - unsigned long (*intr_pending)(ctlr_info_t *h); - unsigned long (*command_completed)(ctlr_info_t *h); -}; - -struct board_type { - __u32 board_id; - char *product_name; - struct access_method *access; -}; - -struct ctlr_info { - int ctlr; - char devname[8]; - __u32 log_drv_map; - __u32 drv_assign_map; - __u32 drv_spare_map; - __u32 mp_failed_drv_map; - - char firm_rev[4]; - int ctlr_sig; - - int log_drives; - int phys_drives; - - struct pci_dev *pci_dev; /* NULL if EISA */ - __u32 board_id; - char *product_name; - - void __iomem *vaddr; - unsigned long paddr; - unsigned long io_mem_addr; - unsigned long io_mem_length; - int intr; - int usage_count; - drv_info_t drv[NWD]; - struct proc_dir_entry *proc; - - struct access_method access; - - cmdlist_t *reqQ; - cmdlist_t *cmpQ; - cmdlist_t *cmd_pool; - dma_addr_t cmd_pool_dhandle; - unsigned long *cmd_pool_bits; - struct request_queue *queue; - spinlock_t lock; - - unsigned int Qdepth; - unsigned int maxQsinceinit; - - unsigned int nr_requests; - unsigned int nr_allocs; - unsigned int nr_frees; - struct timer_list timer; - unsigned int misc_tflags; -}; - -#define IDA_LOCK(i) (&hba[i]->lock) - -#endif - -#endif /* CPQARRAY_H */ diff --git a/drivers/block/ida_cmd.h b/drivers/block/ida_cmd.h deleted file mode 100644 index 98b5746b3089..000000000000 --- a/drivers/block/ida_cmd.h +++ /dev/null @@ -1,349 +0,0 @@ -/* - * Disk Array driver for Compaq SMART2 Controllers - * Copyright 1998 Compaq Computer Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Questions/Comments/Bugfixes to iss_storagedev@hp.com - * - */ -#ifndef ARRAYCMD_H -#define ARRAYCMD_H - -#include <asm/types.h> -#if 0 -#include <linux/blkdev.h> -#endif - -/* for the Smart Array 42XX cards */ -#define S42XX_REQUEST_PORT_OFFSET 0x40 -#define S42XX_REPLY_INTR_MASK_OFFSET 0x34 -#define S42XX_REPLY_PORT_OFFSET 0x44 -#define S42XX_INTR_STATUS 0x30 - -#define S42XX_INTR_OFF 0x08 -#define S42XX_INTR_PENDING 0x08 - -#define COMMAND_FIFO 0x04 -#define COMMAND_COMPLETE_FIFO 0x08 -#define INTR_MASK 0x0C -#define INTR_STATUS 0x10 -#define INTR_PENDING 0x14 - -#define FIFO_NOT_EMPTY 0x01 -#define FIFO_NOT_FULL 0x02 - -#define BIG_PROBLEM 0x40 -#define LOG_NOT_CONF 2 - -#pragma pack(1) -typedef struct { - __u32 size; - __u32 addr; -} sg_t; - -#define RCODE_NONFATAL 0x02 -#define RCODE_FATAL 0x04 -#define RCODE_INVREQ 0x10 -typedef struct { - __u16 next; - __u8 cmd; - __u8 rcode; - __u32 blk; - __u16 blk_cnt; - __u8 sg_cnt; - __u8 reserved; -} rhdr_t; - -#define SG_MAX 32 -typedef struct { - rhdr_t hdr; - sg_t sg[SG_MAX]; - __u32 bp; -} rblk_t; - -typedef struct { - __u8 unit; - __u8 prio; - __u16 size; -} chdr_t; - -#define CMD_RWREQ 0x00 -#define CMD_IOCTL_PEND 0x01 -#define CMD_IOCTL_DONE 0x02 - -typedef struct cmdlist { - chdr_t hdr; - rblk_t req; - __u32 size; - int retry_cnt; - __u32 busaddr; - int ctlr; - struct cmdlist *prev; - struct cmdlist *next; - struct request *rq; - int type; -} cmdlist_t; - -#define ID_CTLR 0x11 -typedef struct { - __u8 nr_drvs; - __u32 cfg_sig; - __u8 firm_rev[4]; - __u8 rom_rev[4]; - __u8 hw_rev; - __u32 bb_rev; - __u32 drv_present_map; - __u32 ext_drv_map; - __u32 board_id; - __u8 cfg_error; - __u32 non_disk_bits; - __u8 bad_ram_addr; - __u8 cpu_rev; - __u8 pdpi_rev; - __u8 epic_rev; - __u8 wcxc_rev; - __u8 marketing_rev; - __u8 ctlr_flags; - __u8 host_flags; - __u8 expand_dis; - __u8 scsi_chips; - __u32 max_req_blocks; - __u32 ctlr_clock; - __u8 drvs_per_bus; - __u16 big_drv_present_map[8]; - __u16 big_ext_drv_map[8]; - __u16 big_non_disk_map[8]; - __u16 task_flags; - __u8 icl_bus; - __u8 red_modes; - __u8 cur_red_mode; - __u8 red_ctlr_stat; - __u8 red_fail_reason; - __u8 reserved[403]; -} id_ctlr_t; - -typedef struct { - __u16 cyl; - __u8 heads; - __u8 xsig; - __u8 psectors; - __u16 wpre; - __u8 maxecc; - __u8 drv_ctrl; - __u16 pcyls; - __u8 pheads; - __u16 landz; - __u8 sect_per_track; - __u8 cksum; -} drv_param_t; - -#define ID_LOG_DRV 0x10 -typedef struct { - __u16 blk_size; - __u32 nr_blks; - drv_param_t drv; - __u8 fault_tol; - __u8 reserved; - __u8 bios_disable; -} id_log_drv_t; - -#define ID_LOG_DRV_EXT 0x18 -typedef struct { - __u32 log_drv_id; - __u8 log_drv_label[64]; - __u8 reserved[418]; -} id_log_drv_ext_t; - -#define SENSE_LOG_DRV_STAT 0x12 -typedef struct { - __u8 status; - __u32 fail_map; - __u16 read_err[32]; - __u16 write_err[32]; - __u8 drv_err_data[256]; - __u8 drq_timeout[32]; - __u32 blks_to_recover; - __u8 drv_recovering; - __u16 remap_cnt[32]; - __u32 replace_drv_map; - __u32 act_spare_map; - __u8 spare_stat; - __u8 spare_repl_map[32]; - __u32 repl_ok_map; - __u8 media_exch; - __u8 cache_fail; - __u8 expn_fail; - __u8 unit_flags; - __u16 big_fail_map[8]; - __u16 big_remap_map[128]; - __u16 big_repl_map[8]; - __u16 big_act_spare_map[8]; - __u8 big_spar_repl_map[128]; - __u16 big_repl_ok_map[8]; - __u8 big_drv_rebuild; - __u8 reserved[36]; -} sense_log_drv_stat_t; - -#define START_RECOVER 0x13 - -#define ID_PHYS_DRV 0x15 -typedef struct { - __u8 scsi_bus; - __u8 scsi_id; - __u16 blk_size; - __u32 nr_blks; - __u32 rsvd_blks; - __u8 drv_model[40]; - __u8 drv_sn[40]; - __u8 drv_fw[8]; - __u8 scsi_iq_bits; - __u8 compaq_drv_stmp; - __u8 last_fail; - __u8 phys_drv_flags; - __u8 phys_drv_flags1; - __u8 scsi_lun; - __u8 phys_drv_flags2; - __u8 reserved; - __u32 spi_speed_rules; - __u8 phys_connector[2]; - __u8 phys_box_on_bus; - __u8 phys_bay_in_box; -} id_phys_drv_t; - -#define BLINK_DRV_LEDS 0x16 -typedef struct { - __u32 blink_duration; - __u32 reserved; - __u8 blink[256]; - __u8 reserved1[248]; -} blink_drv_leds_t; - -#define SENSE_BLINK_LEDS 0x17 -typedef struct { - __u32 blink_duration; - __u32 btime_elap; - __u8 blink[256]; - __u8 reserved1[248]; -} sense_blink_leds_t; - -#define IDA_READ 0x20 -#define IDA_WRITE 0x30 -#define IDA_WRITE_MEDIA 0x31 -#define RESET_TO_DIAG 0x40 -#define DIAG_PASS_THRU 0x41 - -#define SENSE_CONFIG 0x50 -#define SET_CONFIG 0x51 -typedef struct { - __u32 cfg_sig; - __u16 compat_port; - __u8 data_dist_mode; - __u8 surf_an_ctrl; - __u16 ctlr_phys_drv; - __u16 log_unit_phys_drv; - __u16 fault_tol_mode; - __u8 phys_drv_param[16]; - drv_param_t drv; - __u32 drv_asgn_map; - __u16 dist_factor; - __u32 spare_asgn_map; - __u8 reserved[6]; - __u16 os; - __u8 ctlr_order; - __u8 extra_info; - __u32 data_offs; - __u8 parity_backedout_write_drvs; - __u8 parity_dist_mode; - __u8 parity_shift_fact; - __u8 bios_disable_flag; - __u32 blks_on_vol; - __u32 blks_per_drv; - __u8 scratch[16]; - __u16 big_drv_map[8]; - __u16 big_spare_map[8]; - __u8 ss_source_vol; - __u8 mix_drv_cap_range; - struct { - __u16 big_drv_map[8]; - __u32 blks_per_drv; - __u16 fault_tol_mode; - __u16 dist_factor; - } MDC_range[4]; - __u8 reserved1[248]; -} config_t; - -#define BYPASS_VOL_STATE 0x52 -#define SS_CREATE_VOL 0x53 -#define CHANGE_CONFIG 0x54 -#define SENSE_ORIG_CONF 0x55 -#define REORDER_LOG_DRV 0x56 -typedef struct { - __u8 old_units[32]; -} reorder_log_drv_t; - -#define LABEL_LOG_DRV 0x57 -typedef struct { - __u8 log_drv_label[64]; -} label_log_drv_t; - -#define SS_TO_VOL 0x58 - -#define SET_SURF_DELAY 0x60 -typedef struct { - __u16 delay; - __u8 reserved[510]; -} surf_delay_t; - -#define SET_OVERHEAT_DELAY 0x61 -typedef struct { - __u16 delay; -} overhead_delay_t; - -#define SET_MP_DELAY -typedef struct { - __u16 delay; - __u8 reserved[510]; -} mp_delay_t; - -#define PASSTHRU_A 0x91 -typedef struct { - __u8 target; - __u8 bus; - __u8 lun; - __u32 timeout; - __u32 flags; - __u8 status; - __u8 error; - __u8 cdb_len; - __u8 sense_error; - __u8 sense_key; - __u32 sense_info; - __u8 sense_code; - __u8 sense_qual; - __u32 residual; - __u8 reserved[4]; - __u8 cdb[12]; -} scsi_param_t; - -#define RESUME_BACKGROUND_ACTIVITY 0x99 -#define SENSE_CONTROLLER_PERFORMANCE 0xa8 -#define FLUSH_CACHE 0xc2 -#define COLLECT_BUFFER 0xd2 -#define READ_FLASH_ROM 0xf6 -#define WRITE_FLASH_ROM 0xf7 -#pragma pack() - -#endif /* ARRAYCMD_H */ diff --git a/drivers/block/ida_ioctl.h b/drivers/block/ida_ioctl.h deleted file mode 100644 index 888fff9caed0..000000000000 --- a/drivers/block/ida_ioctl.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Disk Array driver for Compaq SMART2 Controllers - * Copyright 1998 Compaq Computer Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Questions/Comments/Bugfixes to iss_storagedev@hp.com - * - */ -#ifndef IDA_IOCTL_H -#define IDA_IOCTL_H - -#include "ida_cmd.h" -#include "cpqarray.h" - -#define IDAGETDRVINFO 0x27272828 -#define IDAPASSTHRU 0x28282929 -#define IDAGETCTLRSIG 0x29293030 -#define IDAREVALIDATEVOLS 0x30303131 -#define IDADRIVERVERSION 0x31313232 -#define IDAGETPCIINFO 0x32323333 - -typedef struct _ida_pci_info_struct -{ - unsigned char bus; - unsigned char dev_fn; - __u32 board_id; -} ida_pci_info_struct; -/* - * Normally, the ioctl determines the logical unit for this command by - * the major,minor number of the fd passed to ioctl. If you need to send - * a command to a different/nonexistant unit (such as during config), you - * can override the normal behavior by setting the unit valid bit. (Normally, - * it should be zero) The controller the command is sent to is still - * determined by the major number of the open device. - */ - -#define UNITVALID 0x80 -typedef struct { - __u8 cmd; - __u8 rcode; - __u8 unit; - __u32 blk; - __u16 blk_cnt; - -/* currently, sg_cnt is assumed to be 1: only the 0th element of sg is used */ - struct { - void __user *addr; - size_t size; - } sg[SG_MAX]; - int sg_cnt; - - union ctlr_cmds { - drv_info_t drv; - unsigned char buf[1024]; - - id_ctlr_t id_ctlr; - drv_param_t drv_param; - id_log_drv_t id_log_drv; - id_log_drv_ext_t id_log_drv_ext; - sense_log_drv_stat_t sense_log_drv_stat; - id_phys_drv_t id_phys_drv; - blink_drv_leds_t blink_drv_leds; - sense_blink_leds_t sense_blink_leds; - config_t config; - reorder_log_drv_t reorder_log_drv; - label_log_drv_t label_log_drv; - surf_delay_t surf_delay; - overhead_delay_t overhead_delay; - mp_delay_t mp_delay; - scsi_param_t scsi_param; - } c; -} ida_ioctl_t; - -#endif /* IDA_IOCTL_H */ diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 9b180dbbd03c..cc2e71d0a77f 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -173,7 +173,13 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd) { struct request *rq; + if (mtip_check_surprise_removal(dd->pdev)) + return NULL; + rq = blk_mq_alloc_request(dd->queue, 0, BLK_MQ_REQ_RESERVED); + if (IS_ERR(rq)) + return NULL; + return blk_mq_rq_to_pdu(rq); } @@ -233,15 +239,9 @@ static void mtip_async_complete(struct mtip_port *port, "Command tag %d failed due to TFE\n", tag); } - /* Unmap the DMA scatter list entries */ - dma_unmap_sg(&dd->pdev->dev, cmd->sg, cmd->scatter_ents, cmd->direction); - rq = mtip_rq_from_tag(dd, tag); - if (unlikely(cmd->unaligned)) - up(&port->cmd_slot_unal); - - blk_mq_end_request(rq, status ? -EIO : 0); + blk_mq_complete_request(rq, status); } /* @@ -581,6 +581,8 @@ static void mtip_completion(struct mtip_port *port, dev_warn(&port->dd->pdev->dev, "Internal command %d completed with TFE\n", tag); + command->comp_func = NULL; + command->comp_data = NULL; complete(waiting); } @@ -618,8 +620,6 @@ static void mtip_handle_tfe(struct driver_data *dd) port = dd->port; - set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); - if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n"); @@ -628,7 +628,7 @@ static void mtip_handle_tfe(struct driver_data *dd) cmd->comp_func(port, MTIP_TAG_INTERNAL, cmd, PORT_IRQ_TF_ERR); } - goto handle_tfe_exit; + return; } /* clear the tag accumulator */ @@ -701,7 +701,7 @@ static void mtip_handle_tfe(struct driver_data *dd) fail_reason = "thermal shutdown"; } if (buf[288] == 0xBF) { - set_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag); + set_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag); dev_info(&dd->pdev->dev, "Drive indicates rebuild has failed. Secure erase required.\n"); fail_all_ncq_cmds = 1; @@ -771,11 +771,6 @@ static void mtip_handle_tfe(struct driver_data *dd) } } print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt); - -handle_tfe_exit: - /* clear eh_active */ - clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); - wake_up_interruptible(&port->svc_wait); } /* @@ -1007,6 +1002,7 @@ static bool mtip_pause_ncq(struct mtip_port *port, (fis->features == 0x27 || fis->features == 0x72 || fis->features == 0x62 || fis->features == 0x26))) { clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag); + clear_bit(MTIP_DDF_REBUILD_FAILED_BIT, &port->dd->dd_flag); /* Com reset after secure erase or lowlevel format */ mtip_restart_port(port); clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); @@ -1021,12 +1017,14 @@ static bool mtip_pause_ncq(struct mtip_port *port, * * @port Pointer to port data structure * @timeout Max duration to wait (ms) + * @atomic gfp_t flag to indicate blockable context or not * * return value * 0 Success * -EBUSY Commands still active */ -static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout) +static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout, + gfp_t atomic) { unsigned long to; unsigned int n; @@ -1037,16 +1035,21 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout) to = jiffies + msecs_to_jiffies(timeout); do { if (test_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags) && - test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) { + test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags) && + atomic == GFP_KERNEL) { msleep(20); continue; /* svc thd is actively issuing commands */ } - msleep(100); + if (atomic == GFP_KERNEL) + msleep(100); + else { + cpu_relax(); + udelay(100); + } + if (mtip_check_surprise_removal(port->dd->pdev)) goto err_fault; - if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag)) - goto err_fault; /* * Ignore s_active bit 0 of array element 0. @@ -1099,6 +1102,7 @@ static int mtip_exec_internal_command(struct mtip_port *port, struct mtip_cmd *int_cmd; struct driver_data *dd = port->dd; int rv = 0; + unsigned long start; /* Make sure the buffer is 8 byte aligned. This is asic specific. */ if (buffer & 0x00000007) { @@ -1107,6 +1111,10 @@ static int mtip_exec_internal_command(struct mtip_port *port, } int_cmd = mtip_get_int_command(dd); + if (!int_cmd) { + dbg_printk(MTIP_DRV_NAME "Unable to allocate tag for PIO cmd\n"); + return -EFAULT; + } set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); @@ -1119,7 +1127,7 @@ static int mtip_exec_internal_command(struct mtip_port *port, if (fis->command != ATA_CMD_STANDBYNOW1) { /* wait for io to complete if non atomic */ if (mtip_quiesce_io(port, - MTIP_QUIESCE_IO_TIMEOUT_MS) < 0) { + MTIP_QUIESCE_IO_TIMEOUT_MS, atomic) < 0) { dev_warn(&dd->pdev->dev, "Failed to quiesce IO\n"); mtip_put_int_command(dd, int_cmd); @@ -1162,6 +1170,8 @@ static int mtip_exec_internal_command(struct mtip_port *port, /* Populate the command header */ int_cmd->command_header->byte_count = 0; + start = jiffies; + /* Issue the command to the hardware */ mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL); @@ -1170,10 +1180,12 @@ static int mtip_exec_internal_command(struct mtip_port *port, if ((rv = wait_for_completion_interruptible_timeout( &wait, msecs_to_jiffies(timeout))) <= 0) { + if (rv == -ERESTARTSYS) { /* interrupted */ dev_err(&dd->pdev->dev, - "Internal command [%02X] was interrupted after %lu ms\n", - fis->command, timeout); + "Internal command [%02X] was interrupted after %u ms\n", + fis->command, + jiffies_to_msecs(jiffies - start)); rv = -EINTR; goto exec_ic_exit; } else if (rv == 0) /* timeout */ @@ -2890,6 +2902,42 @@ static int mtip_ftl_rebuild_poll(struct driver_data *dd) return -EFAULT; } +static void mtip_softirq_done_fn(struct request *rq) +{ + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); + struct driver_data *dd = rq->q->queuedata; + + /* Unmap the DMA scatter list entries */ + dma_unmap_sg(&dd->pdev->dev, cmd->sg, cmd->scatter_ents, + cmd->direction); + + if (unlikely(cmd->unaligned)) + up(&dd->port->cmd_slot_unal); + + blk_mq_end_request(rq, rq->errors); +} + +static void mtip_abort_cmd(struct request *req, void *data, + bool reserved) +{ + struct driver_data *dd = data; + + dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag); + + clear_bit(req->tag, dd->port->cmds_to_issue); + req->errors = -EIO; + mtip_softirq_done_fn(req); +} + +static void mtip_queue_cmd(struct request *req, void *data, + bool reserved) +{ + struct driver_data *dd = data; + + set_bit(req->tag, dd->port->cmds_to_issue); + blk_abort_request(req); +} + /* * service thread to issue queued commands * @@ -2902,7 +2950,7 @@ static int mtip_ftl_rebuild_poll(struct driver_data *dd) static int mtip_service_thread(void *data) { struct driver_data *dd = (struct driver_data *)data; - unsigned long slot, slot_start, slot_wrap; + unsigned long slot, slot_start, slot_wrap, to; unsigned int num_cmd_slots = dd->slot_groups * 32; struct mtip_port *port = dd->port; @@ -2917,9 +2965,7 @@ static int mtip_service_thread(void *data) * is in progress nor error handling is active */ wait_event_interruptible(port->svc_wait, (port->flags) && - !(port->flags & MTIP_PF_PAUSE_IO)); - - set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags); + (port->flags & MTIP_PF_SVC_THD_WORK)); if (kthread_should_stop() || test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags)) @@ -2929,6 +2975,8 @@ static int mtip_service_thread(void *data) &dd->dd_flag))) goto st_out; + set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags); + restart_eh: /* Demux bits: start with error handling */ if (test_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags)) { @@ -2939,6 +2987,32 @@ restart_eh: if (test_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags)) goto restart_eh; + if (test_bit(MTIP_PF_TO_ACTIVE_BIT, &port->flags)) { + to = jiffies + msecs_to_jiffies(5000); + + do { + mdelay(100); + } while (atomic_read(&dd->irq_workers_active) != 0 && + time_before(jiffies, to)); + + if (atomic_read(&dd->irq_workers_active) != 0) + dev_warn(&dd->pdev->dev, + "Completion workers still active!"); + + spin_lock(dd->queue->queue_lock); + blk_mq_all_tag_busy_iter(*dd->tags.tags, + mtip_queue_cmd, dd); + spin_unlock(dd->queue->queue_lock); + + set_bit(MTIP_PF_ISSUE_CMDS_BIT, &dd->port->flags); + + if (mtip_device_reset(dd)) + blk_mq_all_tag_busy_iter(*dd->tags.tags, + mtip_abort_cmd, dd); + + clear_bit(MTIP_PF_TO_ACTIVE_BIT, &dd->port->flags); + } + if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) { slot = 1; /* used to restrict the loop to one iteration */ @@ -2971,10 +3045,8 @@ restart_eh: } if (test_bit(MTIP_PF_REBUILD_BIT, &port->flags)) { - if (mtip_ftl_rebuild_poll(dd) < 0) - set_bit(MTIP_DDF_REBUILD_FAILED_BIT, - &dd->dd_flag); - clear_bit(MTIP_PF_REBUILD_BIT, &port->flags); + if (mtip_ftl_rebuild_poll(dd) == 0) + clear_bit(MTIP_PF_REBUILD_BIT, &port->flags); } } @@ -3089,7 +3161,7 @@ static int mtip_hw_get_identify(struct driver_data *dd) if (buf[288] == 0xBF) { dev_info(&dd->pdev->dev, "Drive indicates rebuild has failed.\n"); - /* TODO */ + set_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag); } } @@ -3263,20 +3335,25 @@ out1: return rv; } -static void mtip_standby_drive(struct driver_data *dd) +static int mtip_standby_drive(struct driver_data *dd) { - if (dd->sr) - return; + int rv = 0; + if (dd->sr || !dd->port) + return -ENODEV; /* * Send standby immediate (E0h) to the drive so that it * saves its state. */ if (!test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags) && - !test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag)) - if (mtip_standby_immediate(dd->port)) + !test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag) && + !test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag)) { + rv = mtip_standby_immediate(dd->port); + if (rv) dev_warn(&dd->pdev->dev, "STANDBY IMMEDIATE failed\n"); + } + return rv; } /* @@ -3289,10 +3366,6 @@ static void mtip_standby_drive(struct driver_data *dd) */ static int mtip_hw_exit(struct driver_data *dd) { - /* - * Send standby immediate (E0h) to the drive so that it - * saves its state. - */ if (!dd->sr) { /* de-initialize the port. */ mtip_deinit_port(dd->port); @@ -3334,8 +3407,7 @@ static int mtip_hw_shutdown(struct driver_data *dd) * Send standby immediate (E0h) to the drive so that it * saves its state. */ - if (!dd->sr && dd->port) - mtip_standby_immediate(dd->port); + mtip_standby_drive(dd); return 0; } @@ -3358,7 +3430,7 @@ static int mtip_hw_suspend(struct driver_data *dd) * Send standby immediate (E0h) to the drive * so that it saves its state. */ - if (mtip_standby_immediate(dd->port) != 0) { + if (mtip_standby_drive(dd) != 0) { dev_err(&dd->pdev->dev, "Failed standby-immediate command\n"); return -EFAULT; @@ -3596,6 +3668,28 @@ static int mtip_block_getgeo(struct block_device *dev, return 0; } +static int mtip_block_open(struct block_device *dev, fmode_t mode) +{ + struct driver_data *dd; + + if (dev && dev->bd_disk) { + dd = (struct driver_data *) dev->bd_disk->private_data; + + if (dd) { + if (test_bit(MTIP_DDF_REMOVAL_BIT, + &dd->dd_flag)) { + return -ENODEV; + } + return 0; + } + } + return -ENODEV; +} + +void mtip_block_release(struct gendisk *disk, fmode_t mode) +{ +} + /* * Block device operation function. * @@ -3603,6 +3697,8 @@ static int mtip_block_getgeo(struct block_device *dev, * layer. */ static const struct block_device_operations mtip_block_ops = { + .open = mtip_block_open, + .release = mtip_block_release, .ioctl = mtip_block_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = mtip_block_compat_ioctl, @@ -3664,10 +3760,9 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq) rq_data_dir(rq))) { return -ENODATA; } - if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))) + if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag) || + test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag))) return -ENODATA; - if (test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)) - return -ENXIO; } if (rq->cmd_flags & REQ_DISCARD) { @@ -3779,11 +3874,32 @@ static int mtip_init_cmd(void *data, struct request *rq, unsigned int hctx_idx, return 0; } +static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req, + bool reserved) +{ + struct driver_data *dd = req->q->queuedata; + + if (reserved) + goto exit_handler; + + if (test_bit(req->tag, dd->port->cmds_to_issue)) + goto exit_handler; + + if (test_and_set_bit(MTIP_PF_TO_ACTIVE_BIT, &dd->port->flags)) + goto exit_handler; + + wake_up_interruptible(&dd->port->svc_wait); +exit_handler: + return BLK_EH_RESET_TIMER; +} + static struct blk_mq_ops mtip_mq_ops = { .queue_rq = mtip_queue_rq, .map_queue = blk_mq_map_queue, .init_request = mtip_init_cmd, .exit_request = mtip_free_cmd, + .complete = mtip_softirq_done_fn, + .timeout = mtip_cmd_timeout, }; /* @@ -3850,7 +3966,6 @@ static int mtip_block_initialize(struct driver_data *dd) mtip_hw_debugfs_init(dd); -skip_create_disk: memset(&dd->tags, 0, sizeof(dd->tags)); dd->tags.ops = &mtip_mq_ops; dd->tags.nr_hw_queues = 1; @@ -3860,12 +3975,13 @@ skip_create_disk: dd->tags.numa_node = dd->numa_node; dd->tags.flags = BLK_MQ_F_SHOULD_MERGE; dd->tags.driver_data = dd; + dd->tags.timeout = MTIP_NCQ_CMD_TIMEOUT_MS; rv = blk_mq_alloc_tag_set(&dd->tags); if (rv) { dev_err(&dd->pdev->dev, "Unable to allocate request queue\n"); - goto block_queue_alloc_init_error; + goto block_queue_alloc_tag_error; } /* Allocate the request queue. */ @@ -3880,6 +3996,7 @@ skip_create_disk: dd->disk->queue = dd->queue; dd->queue->queuedata = dd; +skip_create_disk: /* Initialize the protocol layer. */ wait_for_rebuild = mtip_hw_get_identify(dd); if (wait_for_rebuild < 0) { @@ -3976,8 +4093,9 @@ kthread_run_error: read_capacity_error: init_hw_cmds_error: blk_cleanup_queue(dd->queue); - blk_mq_free_tag_set(&dd->tags); block_queue_alloc_init_error: + blk_mq_free_tag_set(&dd->tags); +block_queue_alloc_tag_error: mtip_hw_debugfs_exit(dd); disk_index_error: spin_lock(&rssd_index_lock); @@ -3994,6 +4112,22 @@ protocol_init_error: return rv; } +static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv) +{ + struct driver_data *dd = (struct driver_data *)data; + struct mtip_cmd *cmd; + + if (likely(!reserv)) + blk_mq_complete_request(rq, -ENODEV); + else if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &dd->port->flags)) { + + cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); + if (cmd->comp_func) + cmd->comp_func(dd->port, MTIP_TAG_INTERNAL, + cmd, -ENODEV); + } +} + /* * Block layer deinitialization function. * @@ -4025,12 +4159,23 @@ static int mtip_block_remove(struct driver_data *dd) } } - if (!dd->sr) - mtip_standby_drive(dd); + if (!dd->sr) { + /* + * Explicitly wait here for IOs to quiesce, + * as mtip_standby_drive usually won't wait for IOs. + */ + if (!mtip_quiesce_io(dd->port, MTIP_QUIESCE_IO_TIMEOUT_MS, + GFP_KERNEL)) + mtip_standby_drive(dd); + } else dev_info(&dd->pdev->dev, "device %s surprise removal\n", dd->disk->disk_name); + blk_mq_freeze_queue_start(dd->queue); + blk_mq_stop_hw_queues(dd->queue); + blk_mq_all_tag_busy_iter(dd->tags.tags[0], mtip_no_dev_cleanup, dd); + /* * Delete our gendisk structure. This also removes the device * from /dev @@ -4040,7 +4185,8 @@ static int mtip_block_remove(struct driver_data *dd) dd->bdev = NULL; } if (dd->disk) { - del_gendisk(dd->disk); + if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) + del_gendisk(dd->disk); if (dd->disk->queue) { blk_cleanup_queue(dd->queue); blk_mq_free_tag_set(&dd->tags); @@ -4081,7 +4227,8 @@ static int mtip_block_shutdown(struct driver_data *dd) dev_info(&dd->pdev->dev, "Shutting down %s ...\n", dd->disk->disk_name); - del_gendisk(dd->disk); + if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) + del_gendisk(dd->disk); if (dd->disk->queue) { blk_cleanup_queue(dd->queue); blk_mq_free_tag_set(&dd->tags); @@ -4426,7 +4573,7 @@ static void mtip_pci_remove(struct pci_dev *pdev) struct driver_data *dd = pci_get_drvdata(pdev); unsigned long flags, to; - set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag); + set_bit(MTIP_DDF_REMOVAL_BIT, &dd->dd_flag); spin_lock_irqsave(&dev_lock, flags); list_del_init(&dd->online_list); @@ -4443,12 +4590,17 @@ static void mtip_pci_remove(struct pci_dev *pdev) } while (atomic_read(&dd->irq_workers_active) != 0 && time_before(jiffies, to)); + if (!dd->sr) + fsync_bdev(dd->bdev); + if (atomic_read(&dd->irq_workers_active) != 0) { dev_warn(&dd->pdev->dev, "Completion workers still active!\n"); } - blk_mq_stop_hw_queues(dd->queue); + blk_set_queue_dying(dd->queue); + set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag); + /* Clean up the block layer. */ mtip_block_remove(dd); diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index 3274784008eb..7617888f7944 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -134,16 +134,24 @@ enum { MTIP_PF_EH_ACTIVE_BIT = 1, /* error handling */ MTIP_PF_SE_ACTIVE_BIT = 2, /* secure erase */ MTIP_PF_DM_ACTIVE_BIT = 3, /* download microcde */ + MTIP_PF_TO_ACTIVE_BIT = 9, /* timeout handling */ MTIP_PF_PAUSE_IO = ((1 << MTIP_PF_IC_ACTIVE_BIT) | (1 << MTIP_PF_EH_ACTIVE_BIT) | (1 << MTIP_PF_SE_ACTIVE_BIT) | - (1 << MTIP_PF_DM_ACTIVE_BIT)), + (1 << MTIP_PF_DM_ACTIVE_BIT) | + (1 << MTIP_PF_TO_ACTIVE_BIT)), MTIP_PF_SVC_THD_ACTIVE_BIT = 4, MTIP_PF_ISSUE_CMDS_BIT = 5, MTIP_PF_REBUILD_BIT = 6, MTIP_PF_SVC_THD_STOP_BIT = 8, + MTIP_PF_SVC_THD_WORK = ((1 << MTIP_PF_EH_ACTIVE_BIT) | + (1 << MTIP_PF_ISSUE_CMDS_BIT) | + (1 << MTIP_PF_REBUILD_BIT) | + (1 << MTIP_PF_SVC_THD_STOP_BIT) | + (1 << MTIP_PF_TO_ACTIVE_BIT)), + /* below are bit numbers in 'dd_flag' defined in driver_data */ MTIP_DDF_SEC_LOCK_BIT = 0, MTIP_DDF_REMOVE_PENDING_BIT = 1, @@ -153,6 +161,7 @@ enum { MTIP_DDF_RESUME_BIT = 6, MTIP_DDF_INIT_DONE_BIT = 7, MTIP_DDF_REBUILD_FAILED_BIT = 8, + MTIP_DDF_REMOVAL_BIT = 9, MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) | (1 << MTIP_DDF_SEC_LOCK_BIT) | diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index e4c5cc107934..08afbc7a2bb8 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -57,10 +57,12 @@ struct nbd_device { int blksize; loff_t bytesize; int xmit_timeout; + bool timedout; bool disconnect; /* a disconnect has been requested by user */ struct timer_list timeout_timer; - spinlock_t tasks_lock; + /* protects initialization and shutdown of the socket */ + spinlock_t sock_lock; struct task_struct *task_recv; struct task_struct *task_send; @@ -98,6 +100,11 @@ static inline struct device *nbd_to_dev(struct nbd_device *nbd) return disk_to_dev(nbd->disk); } +static bool nbd_is_connected(struct nbd_device *nbd) +{ + return !!nbd->task_recv; +} + static const char *nbdcmd_to_ascii(int cmd) { switch (cmd) { @@ -110,6 +117,42 @@ static const char *nbdcmd_to_ascii(int cmd) return "invalid"; } +static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev) +{ + bdev->bd_inode->i_size = 0; + set_capacity(nbd->disk, 0); + kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); + + return 0; +} + +static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev) +{ + if (!nbd_is_connected(nbd)) + return; + + bdev->bd_inode->i_size = nbd->bytesize; + set_capacity(nbd->disk, nbd->bytesize >> 9); + kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); +} + +static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev, + int blocksize, int nr_blocks) +{ + int ret; + + ret = set_blocksize(bdev, blocksize); + if (ret) + return ret; + + nbd->blksize = blocksize; + nbd->bytesize = (loff_t)blocksize * (loff_t)nr_blocks; + + nbd_size_update(nbd, bdev); + + return 0; +} + static void nbd_end_request(struct nbd_device *nbd, struct request *req) { int error = req->errors ? -EIO : 0; @@ -129,13 +172,20 @@ static void nbd_end_request(struct nbd_device *nbd, struct request *req) */ static void sock_shutdown(struct nbd_device *nbd) { - if (!nbd->sock) + spin_lock_irq(&nbd->sock_lock); + + if (!nbd->sock) { + spin_unlock_irq(&nbd->sock_lock); return; + } dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n"); kernel_sock_shutdown(nbd->sock, SHUT_RDWR); + sockfd_put(nbd->sock); nbd->sock = NULL; - del_timer_sync(&nbd->timeout_timer); + spin_unlock_irq(&nbd->sock_lock); + + del_timer(&nbd->timeout_timer); } static void nbd_xmit_timeout(unsigned long arg) @@ -146,19 +196,16 @@ static void nbd_xmit_timeout(unsigned long arg) if (list_empty(&nbd->queue_head)) return; - nbd->disconnect = true; + spin_lock_irqsave(&nbd->sock_lock, flags); - spin_lock_irqsave(&nbd->tasks_lock, flags); + nbd->timedout = true; - if (nbd->task_recv) - force_sig(SIGKILL, nbd->task_recv); - - if (nbd->task_send) - force_sig(SIGKILL, nbd->task_send); + if (nbd->sock) + kernel_sock_shutdown(nbd->sock, SHUT_RDWR); - spin_unlock_irqrestore(&nbd->tasks_lock, flags); + spin_unlock_irqrestore(&nbd->sock_lock, flags); - dev_err(nbd_to_dev(nbd), "Connection timed out, killed receiver and sender, shutting down connection\n"); + dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n"); } /* @@ -171,7 +218,6 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size, int result; struct msghdr msg; struct kvec iov; - sigset_t blocked, oldset; unsigned long pflags = current->flags; if (unlikely(!sock)) { @@ -181,11 +227,6 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size, return -EINVAL; } - /* Allow interception of SIGKILL only - * Don't allow other signals to interrupt the transmission */ - siginitsetinv(&blocked, sigmask(SIGKILL)); - sigprocmask(SIG_SETMASK, &blocked, &oldset); - current->flags |= PF_MEMALLOC; do { sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC; @@ -212,7 +253,6 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size, buf += result; } while (size > 0); - sigprocmask(SIG_SETMASK, &oldset, NULL); tsk_restore_flags(current, pflags, PF_MEMALLOC); if (!send && nbd->xmit_timeout) @@ -402,31 +442,28 @@ static struct device_attribute pid_attr = { .show = pid_show, }; -static int nbd_thread_recv(struct nbd_device *nbd) +static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev) { struct request *req; int ret; - unsigned long flags; BUG_ON(nbd->magic != NBD_MAGIC); sk_set_memalloc(nbd->sock->sk); - spin_lock_irqsave(&nbd->tasks_lock, flags); nbd->task_recv = current; - spin_unlock_irqrestore(&nbd->tasks_lock, flags); ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr); if (ret) { dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); - spin_lock_irqsave(&nbd->tasks_lock, flags); nbd->task_recv = NULL; - spin_unlock_irqrestore(&nbd->tasks_lock, flags); return ret; } + nbd_size_update(nbd, bdev); + while (1) { req = nbd_read_stat(nbd); if (IS_ERR(req)) { @@ -437,21 +474,11 @@ static int nbd_thread_recv(struct nbd_device *nbd) nbd_end_request(nbd, req); } + nbd_size_clear(nbd, bdev); + device_remove_file(disk_to_dev(nbd->disk), &pid_attr); - spin_lock_irqsave(&nbd->tasks_lock, flags); nbd->task_recv = NULL; - spin_unlock_irqrestore(&nbd->tasks_lock, flags); - - if (signal_pending(current)) { - ret = kernel_dequeue_signal(NULL); - dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n", - task_pid_nr(current), current->comm, ret); - mutex_lock(&nbd->tx_lock); - sock_shutdown(nbd); - mutex_unlock(&nbd->tx_lock); - ret = -ETIMEDOUT; - } return ret; } @@ -544,11 +571,8 @@ static int nbd_thread_send(void *data) { struct nbd_device *nbd = data; struct request *req; - unsigned long flags; - spin_lock_irqsave(&nbd->tasks_lock, flags); nbd->task_send = current; - spin_unlock_irqrestore(&nbd->tasks_lock, flags); set_user_nice(current, MIN_NICE); while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) { @@ -557,17 +581,6 @@ static int nbd_thread_send(void *data) kthread_should_stop() || !list_empty(&nbd->waiting_queue)); - if (signal_pending(current)) { - int ret = kernel_dequeue_signal(NULL); - - dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n", - task_pid_nr(current), current->comm, ret); - mutex_lock(&nbd->tx_lock); - sock_shutdown(nbd); - mutex_unlock(&nbd->tx_lock); - break; - } - /* extract request */ if (list_empty(&nbd->waiting_queue)) continue; @@ -582,13 +595,7 @@ static int nbd_thread_send(void *data) nbd_handle_req(nbd, req); } - spin_lock_irqsave(&nbd->tasks_lock, flags); nbd->task_send = NULL; - spin_unlock_irqrestore(&nbd->tasks_lock, flags); - - /* Clear maybe pending signals */ - if (signal_pending(current)) - kernel_dequeue_signal(NULL); return 0; } @@ -618,8 +625,8 @@ static void nbd_request_handler(struct request_queue *q) req, req->cmd_type); if (unlikely(!nbd->sock)) { - dev_err(disk_to_dev(nbd->disk), - "Attempted send on closed socket\n"); + dev_err_ratelimited(disk_to_dev(nbd->disk), + "Attempted send on closed socket\n"); req->errors++; nbd_end_request(nbd, req); spin_lock_irq(q->queue_lock); @@ -636,6 +643,61 @@ static void nbd_request_handler(struct request_queue *q) } } +static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock) +{ + int ret = 0; + + spin_lock_irq(&nbd->sock_lock); + + if (nbd->sock) { + ret = -EBUSY; + goto out; + } + + nbd->sock = sock; + +out: + spin_unlock_irq(&nbd->sock_lock); + + return ret; +} + +/* Reset all properties of an NBD device */ +static void nbd_reset(struct nbd_device *nbd) +{ + nbd->disconnect = false; + nbd->timedout = false; + nbd->blksize = 1024; + nbd->bytesize = 0; + set_capacity(nbd->disk, 0); + nbd->flags = 0; + nbd->xmit_timeout = 0; + queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); + del_timer_sync(&nbd->timeout_timer); +} + +static void nbd_bdev_reset(struct block_device *bdev) +{ + set_device_ro(bdev, false); + bdev->bd_inode->i_size = 0; + if (max_part > 0) { + blkdev_reread_part(bdev); + bdev->bd_invalidated = 1; + } +} + +static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev) +{ + if (nbd->flags & NBD_FLAG_READ_ONLY) + set_device_ro(bdev, true); + if (nbd->flags & NBD_FLAG_SEND_TRIM) + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); + if (nbd->flags & NBD_FLAG_SEND_FLUSH) + blk_queue_flush(nbd->disk->queue, REQ_FLUSH); + else + blk_queue_flush(nbd->disk->queue, 0); +} + static int nbd_dev_dbg_init(struct nbd_device *nbd); static void nbd_dev_dbg_close(struct nbd_device *nbd); @@ -668,48 +730,40 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, return 0; } - case NBD_CLEAR_SOCK: { - struct socket *sock = nbd->sock; - nbd->sock = NULL; + case NBD_CLEAR_SOCK: + sock_shutdown(nbd); nbd_clear_que(nbd); BUG_ON(!list_empty(&nbd->queue_head)); BUG_ON(!list_empty(&nbd->waiting_queue)); kill_bdev(bdev); - if (sock) - sockfd_put(sock); return 0; - } case NBD_SET_SOCK: { - struct socket *sock; int err; - if (nbd->sock) - return -EBUSY; - sock = sockfd_lookup(arg, &err); - if (sock) { - nbd->sock = sock; - if (max_part > 0) - bdev->bd_invalidated = 1; - nbd->disconnect = false; /* we're connected now */ - return 0; - } - return -EINVAL; + struct socket *sock = sockfd_lookup(arg, &err); + + if (!sock) + return err; + + err = nbd_set_socket(nbd, sock); + if (!err && max_part) + bdev->bd_invalidated = 1; + + return err; } - case NBD_SET_BLKSIZE: - nbd->blksize = arg; - nbd->bytesize &= ~(nbd->blksize-1); - bdev->bd_inode->i_size = nbd->bytesize; - set_blocksize(bdev, nbd->blksize); - set_capacity(nbd->disk, nbd->bytesize >> 9); - return 0; + case NBD_SET_BLKSIZE: { + loff_t bsize = div_s64(nbd->bytesize, arg); + + return nbd_size_set(nbd, bdev, arg, bsize); + } case NBD_SET_SIZE: - nbd->bytesize = arg & ~(nbd->blksize-1); - bdev->bd_inode->i_size = nbd->bytesize; - set_blocksize(bdev, nbd->blksize); - set_capacity(nbd->disk, nbd->bytesize >> 9); - return 0; + return nbd_size_set(nbd, bdev, nbd->blksize, + arg / nbd->blksize); + + case NBD_SET_SIZE_BLOCKS: + return nbd_size_set(nbd, bdev, nbd->blksize, arg); case NBD_SET_TIMEOUT: nbd->xmit_timeout = arg * HZ; @@ -725,16 +779,8 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, nbd->flags = arg; return 0; - case NBD_SET_SIZE_BLOCKS: - nbd->bytesize = ((u64) arg) * nbd->blksize; - bdev->bd_inode->i_size = nbd->bytesize; - set_blocksize(bdev, nbd->blksize); - set_capacity(nbd->disk, nbd->bytesize >> 9); - return 0; - case NBD_DO_IT: { struct task_struct *thread; - struct socket *sock; int error; if (nbd->task_recv) @@ -744,15 +790,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, mutex_unlock(&nbd->tx_lock); - if (nbd->flags & NBD_FLAG_READ_ONLY) - set_device_ro(bdev, true); - if (nbd->flags & NBD_FLAG_SEND_TRIM) - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, - nbd->disk->queue); - if (nbd->flags & NBD_FLAG_SEND_FLUSH) - blk_queue_flush(nbd->disk->queue, REQ_FLUSH); - else - blk_queue_flush(nbd->disk->queue, 0); + nbd_parse_flags(nbd, bdev); thread = kthread_run(nbd_thread_send, nbd, "%s", nbd_name(nbd)); @@ -762,29 +800,24 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, } nbd_dev_dbg_init(nbd); - error = nbd_thread_recv(nbd); + error = nbd_thread_recv(nbd, bdev); nbd_dev_dbg_close(nbd); kthread_stop(thread); mutex_lock(&nbd->tx_lock); sock_shutdown(nbd); - sock = nbd->sock; - nbd->sock = NULL; nbd_clear_que(nbd); kill_bdev(bdev); - queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); - set_device_ro(bdev, false); - if (sock) - sockfd_put(sock); - nbd->flags = 0; - nbd->bytesize = 0; - bdev->bd_inode->i_size = 0; - set_capacity(nbd->disk, 0); - if (max_part > 0) - blkdev_reread_part(bdev); + nbd_bdev_reset(bdev); + if (nbd->disconnect) /* user requested, ignore socket errors */ - return 0; + error = 0; + if (nbd->timedout) + error = -ETIMEDOUT; + + nbd_reset(nbd); + return error; } @@ -892,50 +925,23 @@ static const struct file_operations nbd_dbg_flags_ops = { static int nbd_dev_dbg_init(struct nbd_device *nbd) { struct dentry *dir; - struct dentry *f; + + if (!nbd_dbg_dir) + return -EIO; dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir); - if (IS_ERR_OR_NULL(dir)) { - dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s' (%ld)\n", - nbd_name(nbd), PTR_ERR(dir)); - return PTR_ERR(dir); + if (!dir) { + dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n", + nbd_name(nbd)); + return -EIO; } nbd->dbg_dir = dir; - f = debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops); - if (IS_ERR_OR_NULL(f)) { - dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'tasks', %ld\n", - PTR_ERR(f)); - return PTR_ERR(f); - } - - f = debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize); - if (IS_ERR_OR_NULL(f)) { - dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'size_bytes', %ld\n", - PTR_ERR(f)); - return PTR_ERR(f); - } - - f = debugfs_create_u32("timeout", 0444, dir, &nbd->xmit_timeout); - if (IS_ERR_OR_NULL(f)) { - dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'timeout', %ld\n", - PTR_ERR(f)); - return PTR_ERR(f); - } - - f = debugfs_create_u32("blocksize", 0444, dir, &nbd->blksize); - if (IS_ERR_OR_NULL(f)) { - dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'blocksize', %ld\n", - PTR_ERR(f)); - return PTR_ERR(f); - } - - f = debugfs_create_file("flags", 0444, dir, &nbd, &nbd_dbg_flags_ops); - if (IS_ERR_OR_NULL(f)) { - dev_err(nbd_to_dev(nbd), "Failed to create debugfs file 'flags', %ld\n", - PTR_ERR(f)); - return PTR_ERR(f); - } + debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops); + debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize); + debugfs_create_u32("timeout", 0444, dir, &nbd->xmit_timeout); + debugfs_create_u32("blocksize", 0444, dir, &nbd->blksize); + debugfs_create_file("flags", 0444, dir, &nbd, &nbd_dbg_flags_ops); return 0; } @@ -950,8 +956,8 @@ static int nbd_dbg_init(void) struct dentry *dbg_dir; dbg_dir = debugfs_create_dir("nbd", NULL); - if (IS_ERR(dbg_dir)) - return PTR_ERR(dbg_dir); + if (!dbg_dir) + return -EIO; nbd_dbg_dir = dbg_dir; @@ -1069,7 +1075,7 @@ static int __init nbd_init(void) nbd_dev[i].magic = NBD_MAGIC; INIT_LIST_HEAD(&nbd_dev[i].waiting_queue); spin_lock_init(&nbd_dev[i].queue_lock); - spin_lock_init(&nbd_dev[i].tasks_lock); + spin_lock_init(&nbd_dev[i].sock_lock); INIT_LIST_HEAD(&nbd_dev[i].queue_head); mutex_init(&nbd_dev[i].tx_lock); init_timer(&nbd_dev[i].timeout_timer); @@ -1077,14 +1083,12 @@ static int __init nbd_init(void) nbd_dev[i].timeout_timer.data = (unsigned long)&nbd_dev[i]; init_waitqueue_head(&nbd_dev[i].active_wq); init_waitqueue_head(&nbd_dev[i].waiting_wq); - nbd_dev[i].blksize = 1024; - nbd_dev[i].bytesize = 0; disk->major = NBD_MAJOR; disk->first_minor = i << part_shift; disk->fops = &nbd_fops; disk->private_data = &nbd_dev[i]; sprintf(disk->disk_name, "nbd%d", i); - set_capacity(disk, 0); + nbd_reset(&nbd_dev[i]); add_disk(disk); } diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 876763f7f13e..26aa080e243c 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -23,8 +23,7 @@ #include <xen/grant_table.h> #include "common.h" -/* Enlarge the array size in order to fully show blkback name. */ -#define BLKBACK_NAME_LEN (20) +/* On the XenBus the max length of 'ring-ref%u'. */ #define RINGREF_NAME_LEN (20) struct backend_info { @@ -76,7 +75,7 @@ static int blkback_name(struct xen_blkif *blkif, char *buf) else devname = devpath; - snprintf(buf, BLKBACK_NAME_LEN, "blkback.%d.%s", blkif->domid, devname); + snprintf(buf, TASK_COMM_LEN, "%d.%s", blkif->domid, devname); kfree(devpath); return 0; @@ -85,7 +84,7 @@ static int blkback_name(struct xen_blkif *blkif, char *buf) static void xen_update_blkif_status(struct xen_blkif *blkif) { int err; - char name[BLKBACK_NAME_LEN]; + char name[TASK_COMM_LEN]; struct xen_blkif_ring *ring; int i; @@ -618,6 +617,14 @@ static int xen_blkbk_probe(struct xenbus_device *dev, goto fail; } + err = xenbus_printf(XBT_NIL, dev->nodename, + "feature-max-indirect-segments", "%u", + MAX_INDIRECT_SEGMENTS); + if (err) + dev_warn(&dev->dev, + "writing %s/feature-max-indirect-segments (%d)", + dev->nodename, err); + /* Multi-queue: advertise how many queues are supported by us.*/ err = xenbus_printf(XBT_NIL, dev->nodename, "multi-queue-max-queues", "%u", xenblk_max_queues); @@ -849,11 +856,6 @@ again: dev->nodename); goto abort; } - err = xenbus_printf(xbt, dev->nodename, "feature-max-indirect-segments", "%u", - MAX_INDIRECT_SEGMENTS); - if (err) - dev_warn(&dev->dev, "writing %s/feature-max-indirect-segments (%d)", - dev->nodename, err); err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", (unsigned long long)vbd_sz(&be->blkif->vbd)); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 83eb9e6bf8b0..6405b6557792 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -125,8 +125,10 @@ static const struct block_device_operations xlvbd_block_fops; */ static unsigned int xen_blkif_max_segments = 32; -module_param_named(max, xen_blkif_max_segments, int, S_IRUGO); -MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)"); +module_param_named(max_indirect_segments, xen_blkif_max_segments, uint, + S_IRUGO); +MODULE_PARM_DESC(max_indirect_segments, + "Maximum amount of segments in indirect requests (default is 32)"); static unsigned int xen_blkif_max_queues = 4; module_param_named(max_queues, xen_blkif_max_queues, uint, S_IRUGO); diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 9f6acd5d1d2e..0d1fb6b40c46 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -250,7 +250,7 @@ int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd, return 0; } - plane_cnt = (1 << dev->plane_mode); + plane_cnt = dev->plane_mode; rqd->nr_pages = plane_cnt * nr_ppas; if (dev->ops->max_phys_sect < rqd->nr_pages) @@ -463,11 +463,7 @@ static int nvm_core_init(struct nvm_dev *dev) dev->sec_per_lun = dev->sec_per_blk * dev->blks_per_lun; dev->nr_luns = dev->luns_per_chnl * dev->nr_chnls; - dev->total_blocks = dev->nr_planes * - dev->blks_per_lun * - dev->luns_per_chnl * - dev->nr_chnls; - dev->total_pages = dev->total_blocks * dev->pgs_per_blk; + dev->total_secs = dev->nr_luns * dev->sec_per_lun; INIT_LIST_HEAD(&dev->online_targets); mutex_init(&dev->mlock); @@ -872,20 +868,19 @@ static int nvm_configure_by_str_event(const char *val, static int nvm_configure_get(char *buf, const struct kernel_param *kp) { - int sz = 0; - char *buf_start = buf; + int sz; struct nvm_dev *dev; - buf += sprintf(buf, "available devices:\n"); + sz = sprintf(buf, "available devices:\n"); down_write(&nvm_lock); list_for_each_entry(dev, &nvm_devices, devices) { - if (sz > 4095 - DISK_NAME_LEN) + if (sz > 4095 - DISK_NAME_LEN - 2) break; - buf += sprintf(buf, " %32s\n", dev->name); + sz += sprintf(buf + sz, " %32s\n", dev->name); } up_write(&nvm_lock); - return buf - buf_start - 1; + return sz; } static const struct kernel_param_ops nvm_configure_by_str_event_param_ops = { diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c index 7fb725b16148..d65ec36a2231 100644 --- a/drivers/lightnvm/gennvm.c +++ b/drivers/lightnvm/gennvm.c @@ -100,14 +100,13 @@ static int gennvm_block_map(u64 slba, u32 nlb, __le64 *entries, void *private) { struct nvm_dev *dev = private; struct gen_nvm *gn = dev->mp; - sector_t max_pages = dev->total_pages * (dev->sec_size >> 9); u64 elba = slba + nlb; struct gen_lun *lun; struct nvm_block *blk; u64 i; int lun_id; - if (unlikely(elba > dev->total_pages)) { + if (unlikely(elba > dev->total_secs)) { pr_err("gennvm: L2P data from device is out of bounds!\n"); return -EINVAL; } @@ -115,7 +114,7 @@ static int gennvm_block_map(u64 slba, u32 nlb, __le64 *entries, void *private) for (i = 0; i < nlb; i++) { u64 pba = le64_to_cpu(entries[i]); - if (unlikely(pba >= max_pages && pba != U64_MAX)) { + if (unlikely(pba >= dev->total_secs && pba != U64_MAX)) { pr_err("gennvm: L2P data entry is out of bounds!\n"); return -EINVAL; } @@ -197,7 +196,7 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn) } if (dev->ops->get_l2p_tbl) { - ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_pages, + ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_secs, gennvm_block_map, dev); if (ret) { pr_err("gennvm: could not read L2P table.\n"); diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c index 307db1ea22de..82343783aa47 100644 --- a/drivers/lightnvm/rrpc.c +++ b/drivers/lightnvm/rrpc.c @@ -38,7 +38,7 @@ static void rrpc_page_invalidate(struct rrpc *rrpc, struct rrpc_addr *a) spin_lock(&rblk->lock); - div_u64_rem(a->addr, rrpc->dev->pgs_per_blk, &pg_offset); + div_u64_rem(a->addr, rrpc->dev->sec_per_blk, &pg_offset); WARN_ON(test_and_set_bit(pg_offset, rblk->invalid_pages)); rblk->nr_invalid_pages++; @@ -113,14 +113,24 @@ static void rrpc_discard(struct rrpc *rrpc, struct bio *bio) static int block_is_full(struct rrpc *rrpc, struct rrpc_block *rblk) { - return (rblk->next_page == rrpc->dev->pgs_per_blk); + return (rblk->next_page == rrpc->dev->sec_per_blk); } +/* Calculate relative addr for the given block, considering instantiated LUNs */ +static u64 block_to_rel_addr(struct rrpc *rrpc, struct rrpc_block *rblk) +{ + struct nvm_block *blk = rblk->parent; + int lun_blk = blk->id % (rrpc->dev->blks_per_lun * rrpc->nr_luns); + + return lun_blk * rrpc->dev->sec_per_blk; +} + +/* Calculate global addr for the given block */ static u64 block_to_addr(struct rrpc *rrpc, struct rrpc_block *rblk) { struct nvm_block *blk = rblk->parent; - return blk->id * rrpc->dev->pgs_per_blk; + return blk->id * rrpc->dev->sec_per_blk; } static struct ppa_addr linear_to_generic_addr(struct nvm_dev *dev, @@ -136,7 +146,7 @@ static struct ppa_addr linear_to_generic_addr(struct nvm_dev *dev, l.g.sec = secs; sector_div(ppa, dev->sec_per_pg); - div_u64_rem(ppa, dev->sec_per_blk, &pgs); + div_u64_rem(ppa, dev->pgs_per_blk, &pgs); l.g.pg = pgs; sector_div(ppa, dev->pgs_per_blk); @@ -191,12 +201,12 @@ static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun, return NULL; } - rblk = &rlun->blocks[blk->id]; + rblk = rrpc_get_rblk(rlun, blk->id); list_add_tail(&rblk->list, &rlun->open_list); spin_unlock(&lun->lock); blk->priv = rblk; - bitmap_zero(rblk->invalid_pages, rrpc->dev->pgs_per_blk); + bitmap_zero(rblk->invalid_pages, rrpc->dev->sec_per_blk); rblk->next_page = 0; rblk->nr_invalid_pages = 0; atomic_set(&rblk->data_cmnt_size, 0); @@ -286,11 +296,11 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk) struct bio *bio; struct page *page; int slot; - int nr_pgs_per_blk = rrpc->dev->pgs_per_blk; + int nr_sec_per_blk = rrpc->dev->sec_per_blk; u64 phys_addr; DECLARE_COMPLETION_ONSTACK(wait); - if (bitmap_full(rblk->invalid_pages, nr_pgs_per_blk)) + if (bitmap_full(rblk->invalid_pages, nr_sec_per_blk)) return 0; bio = bio_alloc(GFP_NOIO, 1); @@ -306,10 +316,10 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk) } while ((slot = find_first_zero_bit(rblk->invalid_pages, - nr_pgs_per_blk)) < nr_pgs_per_blk) { + nr_sec_per_blk)) < nr_sec_per_blk) { /* Lock laddr */ - phys_addr = (rblk->parent->id * nr_pgs_per_blk) + slot; + phys_addr = rblk->parent->id * nr_sec_per_blk + slot; try: spin_lock(&rrpc->rev_lock); @@ -381,7 +391,7 @@ finished: mempool_free(page, rrpc->page_pool); bio_put(bio); - if (!bitmap_full(rblk->invalid_pages, nr_pgs_per_blk)) { + if (!bitmap_full(rblk->invalid_pages, nr_sec_per_blk)) { pr_err("nvm: failed to garbage collect block\n"); return -EIO; } @@ -499,12 +509,21 @@ static void rrpc_gc_queue(struct work_struct *work) struct rrpc *rrpc = gcb->rrpc; struct rrpc_block *rblk = gcb->rblk; struct nvm_lun *lun = rblk->parent->lun; + struct nvm_block *blk = rblk->parent; struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset]; spin_lock(&rlun->lock); list_add_tail(&rblk->prio, &rlun->prio_list); spin_unlock(&rlun->lock); + spin_lock(&lun->lock); + lun->nr_open_blocks--; + lun->nr_closed_blocks++; + blk->state &= ~NVM_BLK_ST_OPEN; + blk->state |= NVM_BLK_ST_CLOSED; + list_move_tail(&rblk->list, &rlun->closed_list); + spin_unlock(&lun->lock); + mempool_free(gcb, rrpc->gcb_pool); pr_debug("nvm: block '%lu' is full, allow GC (sched)\n", rblk->parent->id); @@ -545,7 +564,7 @@ static struct rrpc_addr *rrpc_update_map(struct rrpc *rrpc, sector_t laddr, struct rrpc_addr *gp; struct rrpc_rev_addr *rev; - BUG_ON(laddr >= rrpc->nr_pages); + BUG_ON(laddr >= rrpc->nr_sects); gp = &rrpc->trans_map[laddr]; spin_lock(&rrpc->rev_lock); @@ -668,20 +687,8 @@ static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd, lun = rblk->parent->lun; cmnt_size = atomic_inc_return(&rblk->data_cmnt_size); - if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk)) { - struct nvm_block *blk = rblk->parent; - struct rrpc_lun *rlun = rblk->rlun; - - spin_lock(&lun->lock); - lun->nr_open_blocks--; - lun->nr_closed_blocks++; - blk->state &= ~NVM_BLK_ST_OPEN; - blk->state |= NVM_BLK_ST_CLOSED; - list_move_tail(&rblk->list, &rlun->closed_list); - spin_unlock(&lun->lock); - + if (unlikely(cmnt_size == rrpc->dev->sec_per_blk)) rrpc_run_gc(rrpc, rblk); - } } } @@ -726,7 +733,7 @@ static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio, for (i = 0; i < npages; i++) { /* We assume that mapping occurs at 4KB granularity */ - BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_pages)); + BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_sects)); gp = &rrpc->trans_map[laddr + i]; if (gp->rblk) { @@ -757,7 +764,7 @@ static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd, if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd)) return NVM_IO_REQUEUE; - BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_pages)); + BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_sects)); gp = &rrpc->trans_map[laddr]; if (gp->rblk) { @@ -1007,21 +1014,21 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private) struct nvm_dev *dev = rrpc->dev; struct rrpc_addr *addr = rrpc->trans_map + slba; struct rrpc_rev_addr *raddr = rrpc->rev_trans_map; - sector_t max_pages = dev->total_pages * (dev->sec_size >> 9); u64 elba = slba + nlb; u64 i; - if (unlikely(elba > dev->total_pages)) { + if (unlikely(elba > dev->total_secs)) { pr_err("nvm: L2P data from device is out of bounds!\n"); return -EINVAL; } for (i = 0; i < nlb; i++) { u64 pba = le64_to_cpu(entries[i]); + unsigned int mod; /* LNVM treats address-spaces as silos, LBA and PBA are * equally large and zero-indexed. */ - if (unlikely(pba >= max_pages && pba != U64_MAX)) { + if (unlikely(pba >= dev->total_secs && pba != U64_MAX)) { pr_err("nvm: L2P data entry is out of bounds!\n"); return -EINVAL; } @@ -1033,8 +1040,10 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private) if (!pba) continue; + div_u64_rem(pba, rrpc->nr_sects, &mod); + addr[i].addr = pba; - raddr[pba].addr = slba + i; + raddr[mod].addr = slba + i; } return 0; @@ -1046,16 +1055,16 @@ static int rrpc_map_init(struct rrpc *rrpc) sector_t i; int ret; - rrpc->trans_map = vzalloc(sizeof(struct rrpc_addr) * rrpc->nr_pages); + rrpc->trans_map = vzalloc(sizeof(struct rrpc_addr) * rrpc->nr_sects); if (!rrpc->trans_map) return -ENOMEM; rrpc->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr) - * rrpc->nr_pages); + * rrpc->nr_sects); if (!rrpc->rev_trans_map) return -ENOMEM; - for (i = 0; i < rrpc->nr_pages; i++) { + for (i = 0; i < rrpc->nr_sects; i++) { struct rrpc_addr *p = &rrpc->trans_map[i]; struct rrpc_rev_addr *r = &rrpc->rev_trans_map[i]; @@ -1067,8 +1076,8 @@ static int rrpc_map_init(struct rrpc *rrpc) return 0; /* Bring up the mapping table from device */ - ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_pages, - rrpc_l2p_update, rrpc); + ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_secs, rrpc_l2p_update, + rrpc); if (ret) { pr_err("nvm: rrpc: could not read L2P table.\n"); return -EINVAL; @@ -1141,7 +1150,7 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end) struct rrpc_lun *rlun; int i, j; - if (dev->pgs_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) { + if (dev->sec_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) { pr_err("rrpc: number of pages per block too high."); return -EINVAL; } @@ -1168,7 +1177,7 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end) spin_lock_init(&rlun->lock); rrpc->total_blocks += dev->blks_per_lun; - rrpc->nr_pages += dev->sec_per_lun; + rrpc->nr_sects += dev->sec_per_lun; rlun->blocks = vzalloc(sizeof(struct rrpc_block) * rrpc->dev->blks_per_lun); @@ -1221,9 +1230,9 @@ static sector_t rrpc_capacity(void *private) /* cur, gc, and two emergency blocks for each lun */ reserved = rrpc->nr_luns * dev->max_pages_per_blk * 4; - provisioned = rrpc->nr_pages - reserved; + provisioned = rrpc->nr_sects - reserved; - if (reserved > rrpc->nr_pages) { + if (reserved > rrpc->nr_sects) { pr_err("rrpc: not enough space available to expose storage.\n"); return 0; } @@ -1242,10 +1251,11 @@ static void rrpc_block_map_update(struct rrpc *rrpc, struct rrpc_block *rblk) struct nvm_dev *dev = rrpc->dev; int offset; struct rrpc_addr *laddr; - u64 paddr, pladdr; + u64 bpaddr, paddr, pladdr; - for (offset = 0; offset < dev->pgs_per_blk; offset++) { - paddr = block_to_addr(rrpc, rblk) + offset; + bpaddr = block_to_rel_addr(rrpc, rblk); + for (offset = 0; offset < dev->sec_per_blk; offset++) { + paddr = bpaddr + offset; pladdr = rrpc->rev_trans_map[paddr].addr; if (pladdr == ADDR_EMPTY) @@ -1386,7 +1396,7 @@ static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk, blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue)); pr_info("nvm: rrpc initialized with %u luns and %llu pages.\n", - rrpc->nr_luns, (unsigned long long)rrpc->nr_pages); + rrpc->nr_luns, (unsigned long long)rrpc->nr_sects); mod_timer(&rrpc->gc_timer, jiffies + msecs_to_jiffies(10)); diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h index f7b37336353f..855f4a5ca7dd 100644 --- a/drivers/lightnvm/rrpc.h +++ b/drivers/lightnvm/rrpc.h @@ -104,7 +104,7 @@ struct rrpc { struct rrpc_lun *luns; /* calculated values */ - unsigned long long nr_pages; + unsigned long long nr_sects; unsigned long total_blocks; /* Write strategy variables. Move these into each for structure for each @@ -156,6 +156,15 @@ struct rrpc_rev_addr { u64 addr; }; +static inline struct rrpc_block *rrpc_get_rblk(struct rrpc_lun *rlun, + int blk_id) +{ + struct rrpc *rrpc = rlun->rrpc; + int lun_blk = blk_id % rrpc->dev->blks_per_lun; + + return &rlun->blocks[lun_blk]; +} + static inline sector_t rrpc_get_laddr(struct bio *bio) { return bio->bi_iter.bi_sector / NR_PHY_IN_LOG; @@ -206,7 +215,7 @@ static inline int rrpc_lock_laddr(struct rrpc *rrpc, sector_t laddr, unsigned pages, struct rrpc_inflight_rq *r) { - BUG_ON((laddr + pages) > rrpc->nr_pages); + BUG_ON((laddr + pages) > rrpc->nr_sects); return __rrpc_lock_laddr(rrpc, laddr, pages, r); } @@ -243,7 +252,7 @@ static inline void rrpc_unlock_rq(struct rrpc *rrpc, struct nvm_rq *rqd) struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd); uint8_t pages = rqd->nr_pages; - BUG_ON((r->l_start + pages) > rrpc->nr_pages); + BUG_ON((r->l_start + pages) > rrpc->nr_sects); rrpc_unlock_laddr(rrpc, r); } diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 8d0ead98eb6e..a296425a7270 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1015,8 +1015,12 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) */ atomic_set(&dc->count, 1); - if (bch_cached_dev_writeback_start(dc)) + /* Block writeback thread, but spawn it */ + down_write(&dc->writeback_lock); + if (bch_cached_dev_writeback_start(dc)) { + up_write(&dc->writeback_lock); return -ENOMEM; + } if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { bch_sectors_dirty_init(dc); @@ -1028,6 +1032,9 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) bch_cached_dev_run(dc); bcache_device_link(&dc->disk, c, "bdev"); + /* Allow the writeback thread to proceed */ + up_write(&dc->writeback_lock); + pr_info("Caching %s as %s on set %pU", bdevname(dc->bdev, buf), dc->disk.disk->disk_name, dc->disk.c->sb.set_uuid); @@ -1366,6 +1373,9 @@ static void cache_set_flush(struct closure *cl) struct btree *b; unsigned i; + if (!c) + closure_return(cl); + bch_cache_accounting_destroy(&c->accounting); kobject_put(&c->internal); @@ -1828,11 +1838,12 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca) return 0; } -static void register_cache(struct cache_sb *sb, struct page *sb_page, +static int register_cache(struct cache_sb *sb, struct page *sb_page, struct block_device *bdev, struct cache *ca) { char name[BDEVNAME_SIZE]; - const char *err = "cannot allocate memory"; + const char *err = NULL; + int ret = 0; memcpy(&ca->sb, sb, sizeof(struct cache_sb)); ca->bdev = bdev; @@ -1847,27 +1858,35 @@ static void register_cache(struct cache_sb *sb, struct page *sb_page, if (blk_queue_discard(bdev_get_queue(ca->bdev))) ca->discard = CACHE_DISCARD(&ca->sb); - if (cache_alloc(sb, ca) != 0) + ret = cache_alloc(sb, ca); + if (ret != 0) goto err; - err = "error creating kobject"; - if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) - goto err; + if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) { + err = "error calling kobject_add"; + ret = -ENOMEM; + goto out; + } mutex_lock(&bch_register_lock); err = register_cache_set(ca); mutex_unlock(&bch_register_lock); - if (err) - goto err; + if (err) { + ret = -ENODEV; + goto out; + } pr_info("registered cache device %s", bdevname(bdev, name)); + out: kobject_put(&ca->kobj); - return; + err: - pr_notice("error opening %s: %s", bdevname(bdev, name), err); - goto out; + if (err) + pr_notice("error opening %s: %s", bdevname(bdev, name), err); + + return ret; } /* Global interfaces/init */ @@ -1965,7 +1984,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (!ca) goto err_close; - register_cache(sb, sb_page, bdev, ca); + if (register_cache(sb, sb_page, bdev, ca) != 0) + goto err_close; } out: if (sb_page) diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index b586d84f2518..c894841c6456 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -1,6 +1,10 @@ +config NVME_CORE + tristate + config BLK_DEV_NVME tristate "NVM Express block device" depends on PCI && BLOCK + select NVME_CORE ---help--- The NVM Express driver is for solid state drives directly connected to the PCI or PCI Express bus. If you know you @@ -11,7 +15,7 @@ config BLK_DEV_NVME config BLK_DEV_NVME_SCSI bool "SCSI emulation for NVMe device nodes" - depends on BLK_DEV_NVME + depends on NVME_CORE ---help--- This adds support for the SG_IO ioctl on the NVMe character and block devices nodes, as well a a translation for a small diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index 51bf90871549..9a3ca892b4a7 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -1,6 +1,8 @@ +obj-$(CONFIG_NVME_CORE) += nvme-core.o +obj-$(CONFIG_BLK_DEV_NVME) += nvme.o -obj-$(CONFIG_BLK_DEV_NVME) += nvme.o +nvme-core-y := core.o +nvme-core-$(CONFIG_BLK_DEV_NVME_SCSI) += scsi.o +nvme-core-$(CONFIG_NVM) += lightnvm.o -lightnvm-$(CONFIG_NVM) := lightnvm.o -nvme-y += core.o pci.o $(lightnvm-y) -nvme-$(CONFIG_BLK_DEV_NVME_SCSI) += scsi.o +nvme-y += pci.o diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 03c46412fff4..643f457131c2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -33,6 +33,20 @@ #define NVME_MINORS (1U << MINORBITS) +unsigned char admin_timeout = 60; +module_param(admin_timeout, byte, 0644); +MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands"); +EXPORT_SYMBOL_GPL(admin_timeout); + +unsigned char nvme_io_timeout = 30; +module_param_named(io_timeout, nvme_io_timeout, byte, 0644); +MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O"); +EXPORT_SYMBOL_GPL(nvme_io_timeout); + +unsigned char shutdown_timeout = 5; +module_param(shutdown_timeout, byte, 0644); +MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown"); + static int nvme_major; module_param(nvme_major, int, 0); @@ -40,7 +54,7 @@ static int nvme_char_major; module_param(nvme_char_major, int, 0); static LIST_HEAD(nvme_ctrl_list); -DEFINE_SPINLOCK(dev_list_lock); +static DEFINE_SPINLOCK(dev_list_lock); static struct class *nvme_class; @@ -72,11 +86,21 @@ static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk) spin_lock(&dev_list_lock); ns = disk->private_data; - if (ns && !kref_get_unless_zero(&ns->kref)) - ns = NULL; + if (ns) { + if (!kref_get_unless_zero(&ns->kref)) + goto fail; + if (!try_module_get(ns->ctrl->ops->module)) + goto fail_put_ns; + } spin_unlock(&dev_list_lock); return ns; + +fail_put_ns: + kref_put(&ns->kref, nvme_free_ns); +fail: + spin_unlock(&dev_list_lock); + return NULL; } void nvme_requeue_req(struct request *req) @@ -89,6 +113,7 @@ void nvme_requeue_req(struct request *req) blk_mq_kick_requeue_list(req->q); spin_unlock_irqrestore(req->q->queue_lock, flags); } +EXPORT_SYMBOL_GPL(nvme_requeue_req); struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, unsigned int flags) @@ -108,17 +133,18 @@ struct request *nvme_alloc_request(struct request_queue *q, req->cmd = (unsigned char *)cmd; req->cmd_len = sizeof(struct nvme_command); - req->special = (void *)0; return req; } +EXPORT_SYMBOL_GPL(nvme_alloc_request); /* * Returns 0 on success. If the result is negative, it's a Linux error code; * if the result is positive, it's an NVM Express status code */ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, - void *buffer, unsigned bufflen, u32 *result, unsigned timeout) + struct nvme_completion *cqe, void *buffer, unsigned bufflen, + unsigned timeout) { struct request *req; int ret; @@ -128,6 +154,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, return PTR_ERR(req); req->timeout = timeout ? timeout : ADMIN_TIMEOUT; + req->special = cqe; if (buffer && bufflen) { ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL); @@ -136,8 +163,6 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, } blk_execute_rq(req->q, NULL, req, 0); - if (result) - *result = (u32)(uintptr_t)req->special; ret = req->errors; out: blk_mq_free_request(req); @@ -147,8 +172,9 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buffer, unsigned bufflen) { - return __nvme_submit_sync_cmd(q, cmd, buffer, bufflen, NULL, 0); + return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen, 0); } +EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd); int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, void __user *ubuffer, unsigned bufflen, @@ -156,6 +182,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, u32 *result, unsigned timeout) { bool write = cmd->common.opcode & 1; + struct nvme_completion cqe; struct nvme_ns *ns = q->queuedata; struct gendisk *disk = ns ? ns->disk : NULL; struct request *req; @@ -168,6 +195,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, return PTR_ERR(req); req->timeout = timeout ? timeout : ADMIN_TIMEOUT; + req->special = &cqe; if (ubuffer && bufflen) { ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, @@ -222,7 +250,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, blk_execute_rq(req->q, disk, req, 0); ret = req->errors; if (result) - *result = (u32)(uintptr_t)req->special; + *result = le32_to_cpu(cqe.result); if (meta && !ret && !write) { if (copy_to_user(meta_buffer, meta, meta_len)) ret = -EFAULT; @@ -303,6 +331,8 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, dma_addr_t dma_addr, u32 *result) { struct nvme_command c; + struct nvme_completion cqe; + int ret; memset(&c, 0, sizeof(c)); c.features.opcode = nvme_admin_get_features; @@ -310,13 +340,18 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, c.features.prp1 = cpu_to_le64(dma_addr); c.features.fid = cpu_to_le32(fid); - return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0); + ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0); + if (ret >= 0) + *result = le32_to_cpu(cqe.result); + return ret; } int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, dma_addr_t dma_addr, u32 *result) { struct nvme_command c; + struct nvme_completion cqe; + int ret; memset(&c, 0, sizeof(c)); c.features.opcode = nvme_admin_set_features; @@ -324,7 +359,10 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, c.features.fid = cpu_to_le32(fid); c.features.dword11 = cpu_to_le32(dword11); - return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0); + ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0); + if (ret >= 0) + *result = le32_to_cpu(cqe.result); + return ret; } int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log) @@ -364,6 +402,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count) *count = min(*count, nr_io_queues); return 0; } +EXPORT_SYMBOL_GPL(nvme_set_queue_count); static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) { @@ -504,7 +543,10 @@ static int nvme_open(struct block_device *bdev, fmode_t mode) static void nvme_release(struct gendisk *disk, fmode_t mode) { - nvme_put_ns(disk->private_data); + struct nvme_ns *ns = disk->private_data; + + module_put(ns->ctrl->ops->module); + nvme_put_ns(ns); } static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) @@ -545,8 +587,14 @@ static void nvme_init_integrity(struct nvme_ns *ns) static void nvme_config_discard(struct nvme_ns *ns) { + struct nvme_ctrl *ctrl = ns->ctrl; u32 logical_block_size = queue_logical_block_size(ns->queue); - ns->queue->limits.discard_zeroes_data = 0; + + if (ctrl->quirks & NVME_QUIRK_DISCARD_ZEROES) + ns->queue->limits.discard_zeroes_data = 1; + else + ns->queue->limits.discard_zeroes_data = 0; + ns->queue->limits.discard_alignment = logical_block_size; ns->queue->limits.discard_granularity = logical_block_size; blk_queue_max_discard_sectors(ns->queue, 0xffffffff); @@ -566,8 +614,8 @@ static int nvme_revalidate_disk(struct gendisk *disk) return -ENODEV; } if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) { - dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n", - __func__, ns->ctrl->instance, ns->ns_id); + dev_warn(disk_to_dev(ns->disk), "%s: Identify failure\n", + __func__); return -ENODEV; } if (id->ncap == 0) { @@ -577,7 +625,7 @@ static int nvme_revalidate_disk(struct gendisk *disk) if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) { if (nvme_nvm_register(ns->queue, disk->disk_name)) { - dev_warn(ns->ctrl->dev, + dev_warn(disk_to_dev(ns->disk), "%s: LightNVM init failure\n", __func__); kfree(id); return -ENODEV; @@ -750,7 +798,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) if (fatal_signal_pending(current)) return -EINTR; if (time_after(jiffies, timeout)) { - dev_err(ctrl->dev, + dev_err(ctrl->device, "Device not ready; aborting %s\n", enabled ? "initialisation" : "reset"); return -ENODEV; @@ -778,6 +826,7 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap) return ret; return nvme_wait_ready(ctrl, cap, false); } +EXPORT_SYMBOL_GPL(nvme_disable_ctrl); int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap) { @@ -790,7 +839,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap) int ret; if (page_shift < dev_page_min) { - dev_err(ctrl->dev, + dev_err(ctrl->device, "Minimum device page size %u too large for host (%u)\n", 1 << dev_page_min, 1 << page_shift); return -ENODEV; @@ -809,6 +858,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap) return ret; return nvme_wait_ready(ctrl, cap, true); } +EXPORT_SYMBOL_GPL(nvme_enable_ctrl); int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl) { @@ -831,7 +881,7 @@ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl) if (fatal_signal_pending(current)) return -EINTR; if (time_after(jiffies, timeout)) { - dev_err(ctrl->dev, + dev_err(ctrl->device, "Device shutdown incomplete; abort shutdown\n"); return -ENODEV; } @@ -839,6 +889,7 @@ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl) return ret; } +EXPORT_SYMBOL_GPL(nvme_shutdown_ctrl); static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, struct request_queue *q) @@ -870,13 +921,13 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs); if (ret) { - dev_err(ctrl->dev, "Reading VS failed (%d)\n", ret); + dev_err(ctrl->device, "Reading VS failed (%d)\n", ret); return ret; } ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap); if (ret) { - dev_err(ctrl->dev, "Reading CAP failed (%d)\n", ret); + dev_err(ctrl->device, "Reading CAP failed (%d)\n", ret); return ret; } page_shift = NVME_CAP_MPSMIN(cap) + 12; @@ -886,13 +937,15 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ret = nvme_identify_ctrl(ctrl, &id); if (ret) { - dev_err(ctrl->dev, "Identify Controller failed (%d)\n", ret); + dev_err(ctrl->device, "Identify Controller failed (%d)\n", ret); return -EIO; } + ctrl->vid = le16_to_cpu(id->vid); ctrl->oncs = le16_to_cpup(&id->oncs); atomic_set(&ctrl->abort_limit, id->acl + 1); ctrl->vwc = id->vwc; + ctrl->cntlid = le16_to_cpup(&id->cntlid); memcpy(ctrl->serial, id->sn, sizeof(id->sn)); memcpy(ctrl->model, id->mn, sizeof(id->mn)); memcpy(ctrl->firmware_rev, id->fr, sizeof(id->fr)); @@ -919,6 +972,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) kfree(id); return 0; } +EXPORT_SYMBOL_GPL(nvme_init_identify); static int nvme_dev_open(struct inode *inode, struct file *file) { @@ -965,13 +1019,13 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list); if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { - dev_warn(ctrl->dev, + dev_warn(ctrl->device, "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); ret = -EINVAL; goto out_unlock; } - dev_warn(ctrl->dev, + dev_warn(ctrl->device, "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); kref_get(&ns->kref); mutex_unlock(&ctrl->namespaces_mutex); @@ -997,7 +1051,7 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd, case NVME_IOCTL_IO_CMD: return nvme_dev_user_cmd(ctrl, argp); case NVME_IOCTL_RESET: - dev_warn(ctrl->dev, "resetting controller\n"); + dev_warn(ctrl->device, "resetting controller\n"); return ctrl->ops->reset_ctrl(ctrl); case NVME_IOCTL_SUBSYS_RESET: return nvme_reset_subsystem(ctrl); @@ -1028,6 +1082,30 @@ static ssize_t nvme_sysfs_reset(struct device *dev, } static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset); +static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct nvme_ns *ns = dev_to_disk(dev)->private_data; + struct nvme_ctrl *ctrl = ns->ctrl; + int serial_len = sizeof(ctrl->serial); + int model_len = sizeof(ctrl->model); + + if (memchr_inv(ns->uuid, 0, sizeof(ns->uuid))) + return sprintf(buf, "eui.%16phN\n", ns->uuid); + + if (memchr_inv(ns->eui, 0, sizeof(ns->eui))) + return sprintf(buf, "eui.%8phN\n", ns->eui); + + while (ctrl->serial[serial_len - 1] == ' ') + serial_len--; + while (ctrl->model[model_len - 1] == ' ') + model_len--; + + return sprintf(buf, "nvme.%04x-%*phN-%*phN-%08x\n", ctrl->vid, + serial_len, ctrl->serial, model_len, ctrl->model, ns->ns_id); +} +static DEVICE_ATTR(wwid, S_IRUGO, wwid_show, NULL); + static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1053,6 +1131,7 @@ static ssize_t nsid_show(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL); static struct attribute *nvme_ns_attrs[] = { + &dev_attr_wwid.attr, &dev_attr_uuid.attr, &dev_attr_eui.attr, &dev_attr_nsid.attr, @@ -1081,7 +1160,7 @@ static const struct attribute_group nvme_ns_attr_group = { .is_visible = nvme_attrs_are_visible, }; -#define nvme_show_function(field) \ +#define nvme_show_str_function(field) \ static ssize_t field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ @@ -1090,15 +1169,26 @@ static ssize_t field##_show(struct device *dev, \ } \ static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); -nvme_show_function(model); -nvme_show_function(serial); -nvme_show_function(firmware_rev); +#define nvme_show_int_function(field) \ +static ssize_t field##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \ + return sprintf(buf, "%d\n", ctrl->field); \ +} \ +static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); + +nvme_show_str_function(model); +nvme_show_str_function(serial); +nvme_show_str_function(firmware_rev); +nvme_show_int_function(cntlid); static struct attribute *nvme_dev_attrs[] = { &dev_attr_reset_controller.attr, &dev_attr_model.attr, &dev_attr_serial.attr, &dev_attr_firmware_rev.attr, + &dev_attr_cntlid.attr, NULL }; @@ -1308,6 +1398,7 @@ void nvme_scan_namespaces(struct nvme_ctrl *ctrl) mutex_unlock(&ctrl->namespaces_mutex); kfree(id); } +EXPORT_SYMBOL_GPL(nvme_scan_namespaces); void nvme_remove_namespaces(struct nvme_ctrl *ctrl) { @@ -1316,6 +1407,7 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) nvme_ns_remove(ns); } +EXPORT_SYMBOL_GPL(nvme_remove_namespaces); static DEFINE_IDA(nvme_instance_ida); @@ -1347,13 +1439,14 @@ static void nvme_release_instance(struct nvme_ctrl *ctrl) } void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) - { +{ device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance)); spin_lock(&dev_list_lock); list_del(&ctrl->node); spin_unlock(&dev_list_lock); } +EXPORT_SYMBOL_GPL(nvme_uninit_ctrl); static void nvme_free_ctrl(struct kref *kref) { @@ -1370,6 +1463,7 @@ void nvme_put_ctrl(struct nvme_ctrl *ctrl) { kref_put(&ctrl->kref, nvme_free_ctrl); } +EXPORT_SYMBOL_GPL(nvme_put_ctrl); /* * Initialize a NVMe controller structures. This needs to be called during @@ -1394,14 +1488,13 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ctrl->device = device_create_with_groups(nvme_class, ctrl->dev, MKDEV(nvme_char_major, ctrl->instance), - dev, nvme_dev_attr_groups, + ctrl, nvme_dev_attr_groups, "nvme%d", ctrl->instance); if (IS_ERR(ctrl->device)) { ret = PTR_ERR(ctrl->device); goto out_release_instance; } get_device(ctrl->device); - dev_set_drvdata(ctrl->device, ctrl); ida_init(&ctrl->ns_ida); spin_lock(&dev_list_lock); @@ -1414,6 +1507,7 @@ out_release_instance: out: return ret; } +EXPORT_SYMBOL_GPL(nvme_init_ctrl); /** * nvme_kill_queues(): Ends all namespace queues @@ -1446,6 +1540,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) } mutex_unlock(&ctrl->namespaces_mutex); } +EXPORT_SYMBOL_GPL(nvme_kill_queues); void nvme_stop_queues(struct nvme_ctrl *ctrl) { @@ -1462,6 +1557,7 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl) } mutex_unlock(&ctrl->namespaces_mutex); } +EXPORT_SYMBOL_GPL(nvme_stop_queues); void nvme_start_queues(struct nvme_ctrl *ctrl) { @@ -1475,6 +1571,7 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) } mutex_unlock(&ctrl->namespaces_mutex); } +EXPORT_SYMBOL_GPL(nvme_start_queues); int __init nvme_core_init(void) { @@ -1514,3 +1611,8 @@ void nvme_core_exit(void) class_destroy(nvme_class); __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); } + +MODULE_LICENSE("GPL"); +MODULE_VERSION("1.0"); +module_init(nvme_core_init); +module_exit(nvme_core_exit); diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 6bb15e4926dc..42a01a931989 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -379,8 +379,31 @@ out: return ret; } +static void nvme_nvm_bb_tbl_fold(struct nvm_dev *nvmdev, + int nr_dst_blks, u8 *dst_blks, + int nr_src_blks, u8 *src_blks) +{ + int blk, offset, pl, blktype; + + for (blk = 0; blk < nr_dst_blks; blk++) { + offset = blk * nvmdev->plane_mode; + blktype = src_blks[offset]; + + /* Bad blocks on any planes take precedence over other types */ + for (pl = 0; pl < nvmdev->plane_mode; pl++) { + if (src_blks[offset + pl] & + (NVM_BLK_T_BAD|NVM_BLK_T_GRWN_BAD)) { + blktype = src_blks[offset + pl]; + break; + } + } + + dst_blks[blk] = blktype; + } +} + static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, - int nr_blocks, nvm_bb_update_fn *update_bbtbl, + int nr_dst_blks, nvm_bb_update_fn *update_bbtbl, void *priv) { struct request_queue *q = nvmdev->q; @@ -388,7 +411,9 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_nvm_command c = {}; struct nvme_nvm_bb_tbl *bb_tbl; - int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blocks; + u8 *dst_blks = NULL; + int nr_src_blks = nr_dst_blks * nvmdev->plane_mode; + int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_src_blks; int ret = 0; c.get_bb.opcode = nvme_nvm_admin_get_bb_tbl; @@ -399,6 +424,12 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, if (!bb_tbl) return -ENOMEM; + dst_blks = kzalloc(nr_dst_blks, GFP_KERNEL); + if (!dst_blks) { + ret = -ENOMEM; + goto out; + } + ret = nvme_submit_sync_cmd(ctrl->admin_q, (struct nvme_command *)&c, bb_tbl, tblsz); if (ret) { @@ -420,16 +451,21 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, goto out; } - if (le32_to_cpu(bb_tbl->tblks) != nr_blocks) { + if (le32_to_cpu(bb_tbl->tblks) != nr_src_blks) { ret = -EINVAL; dev_err(ctrl->dev, "bbt unsuspected blocks returned (%u!=%u)", - le32_to_cpu(bb_tbl->tblks), nr_blocks); + le32_to_cpu(bb_tbl->tblks), nr_src_blks); goto out; } + nvme_nvm_bb_tbl_fold(nvmdev, nr_dst_blks, dst_blks, + nr_src_blks, bb_tbl->blk); + ppa = dev_to_generic_addr(nvmdev, ppa); - ret = update_bbtbl(ppa, nr_blocks, bb_tbl->blk, priv); + ret = update_bbtbl(ppa, nr_dst_blks, dst_blks, priv); + out: + kfree(dst_blks); kfree(bb_tbl); return ret; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index fb15ba5f5d19..f846da4eb338 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -59,6 +59,12 @@ enum nvme_quirks { * correctly. */ NVME_QUIRK_IDENTIFY_CNS = (1 << 1), + + /* + * The controller deterministically returns O's on reads to discarded + * logical blocks. + */ + NVME_QUIRK_DISCARD_ZEROES = (1 << 2), }; struct nvme_ctrl { @@ -78,6 +84,7 @@ struct nvme_ctrl { char serial[20]; char model[40]; char firmware_rev[8]; + int cntlid; u32 ctrl_config; @@ -85,6 +92,7 @@ struct nvme_ctrl { u32 max_hw_sectors; u32 stripe_size; u16 oncs; + u16 vid; atomic_t abort_limit; u8 event_limit; u8 vwc; @@ -124,6 +132,7 @@ struct nvme_ns { }; struct nvme_ctrl_ops { + struct module *module; int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val); int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val); int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val); @@ -255,7 +264,8 @@ void nvme_requeue_req(struct request *req); int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buf, unsigned bufflen); int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, - void *buffer, unsigned bufflen, u32 *result, unsigned timeout); + struct nvme_completion *cqe, void *buffer, unsigned bufflen, + unsigned timeout); int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, void __user *ubuffer, unsigned bufflen, u32 *result, unsigned timeout); @@ -273,8 +283,6 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, dma_addr_t dma_addr, u32 *result); int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); -extern spinlock_t dev_list_lock; - struct sg_io_hdr; int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 680f5780750c..f8db70ae172d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -27,7 +27,6 @@ #include <linux/interrupt.h> #include <linux/io.h> #include <linux/kdev_t.h> -#include <linux/kthread.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/module.h> @@ -39,6 +38,7 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/t10-pi.h> +#include <linux/timer.h> #include <linux/types.h> #include <linux/io-64-nonatomic-lo-hi.h> #include <asm/unaligned.h> @@ -57,18 +57,6 @@ #define NVME_NR_AEN_COMMANDS 1 #define NVME_AQ_BLKMQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS) -unsigned char admin_timeout = 60; -module_param(admin_timeout, byte, 0644); -MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands"); - -unsigned char nvme_io_timeout = 30; -module_param_named(io_timeout, nvme_io_timeout, byte, 0644); -MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O"); - -unsigned char shutdown_timeout = 5; -module_param(shutdown_timeout, byte, 0644); -MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown"); - static int use_threaded_interrupts; module_param(use_threaded_interrupts, int, 0); @@ -76,10 +64,7 @@ static bool use_cmb_sqes = true; module_param(use_cmb_sqes, bool, 0644); MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes"); -static LIST_HEAD(dev_list); -static struct task_struct *nvme_thread; static struct workqueue_struct *nvme_workq; -static wait_queue_head_t nvme_kthread_wait; struct nvme_dev; struct nvme_queue; @@ -92,7 +77,6 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); * Represents an NVM Express device. Each nvme_dev is a PCI function. */ struct nvme_dev { - struct list_head node; struct nvme_queue **queues; struct blk_mq_tag_set tagset; struct blk_mq_tag_set admin_tagset; @@ -110,6 +94,8 @@ struct nvme_dev { struct work_struct reset_work; struct work_struct scan_work; struct work_struct remove_work; + struct work_struct async_work; + struct timer_list watchdog_timer; struct mutex shutdown_lock; bool subsystem; void __iomem *cmb; @@ -148,7 +134,6 @@ struct nvme_queue { u32 __iomem *q_db; u16 q_depth; s16 cq_vector; - u16 sq_head; u16 sq_tail; u16 cq_head; u16 qid; @@ -303,17 +288,20 @@ static void nvme_complete_async_event(struct nvme_dev *dev, u16 status = le16_to_cpu(cqe->status) >> 1; u32 result = le32_to_cpu(cqe->result); - if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ) + if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ) { ++dev->ctrl.event_limit; + queue_work(nvme_workq, &dev->async_work); + } + if (status != NVME_SC_SUCCESS) return; switch (result & 0xff07) { case NVME_AER_NOTICE_NS_CHANGED: - dev_info(dev->dev, "rescanning\n"); + dev_info(dev->ctrl.device, "rescanning\n"); nvme_queue_scan(dev); default: - dev_warn(dev->dev, "async event result %08x\n", result); + dev_warn(dev->ctrl.device, "async event result %08x\n", result); } } @@ -727,7 +715,7 @@ static void nvme_complete_rq(struct request *req) } if (unlikely(iod->aborted)) { - dev_warn(dev->dev, + dev_warn(dev->ctrl.device, "completing aborted command with status: %04x\n", req->errors); } @@ -749,7 +737,6 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) if ((status & 1) != phase) break; - nvmeq->sq_head = le16_to_cpu(cqe.sq_head); if (++head == nvmeq->q_depth) { head = 0; phase = !phase; @@ -759,7 +746,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) *tag = -1; if (unlikely(cqe.command_id >= nvmeq->q_depth)) { - dev_warn(nvmeq->q_dmadev, + dev_warn(nvmeq->dev->ctrl.device, "invalid id %d completed on queue %d\n", cqe.command_id, le16_to_cpu(cqe.sq_id)); continue; @@ -778,10 +765,8 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) } req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id); - if (req->cmd_type == REQ_TYPE_DRV_PRIV) { - u32 result = le32_to_cpu(cqe.result); - req->special = (void *)(uintptr_t)result; - } + if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special) + memcpy(req->special, &cqe, sizeof(cqe)); blk_mq_complete_request(req, status >> 1); } @@ -846,15 +831,22 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) return 0; } -static void nvme_submit_async_event(struct nvme_dev *dev) +static void nvme_async_event_work(struct work_struct *work) { + struct nvme_dev *dev = container_of(work, struct nvme_dev, async_work); + struct nvme_queue *nvmeq = dev->queues[0]; struct nvme_command c; memset(&c, 0, sizeof(c)); c.common.opcode = nvme_admin_async_event; - c.common.command_id = NVME_AQ_BLKMQ_DEPTH + --dev->ctrl.event_limit; - __nvme_submit_cmd(dev->queues[0], &c); + spin_lock_irq(&nvmeq->q_lock); + while (dev->ctrl.event_limit > 0) { + c.common.command_id = NVME_AQ_BLKMQ_DEPTH + + --dev->ctrl.event_limit; + __nvme_submit_cmd(nvmeq, &c); + } + spin_unlock_irq(&nvmeq->q_lock); } static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) @@ -924,12 +916,10 @@ static void abort_endio(struct request *req, int error) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_queue *nvmeq = iod->nvmeq; - u32 result = (u32)(uintptr_t)req->special; u16 status = req->errors; - dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result); + dev_warn(nvmeq->dev->ctrl.device, "Abort status: 0x%x", status); atomic_inc(&nvmeq->dev->ctrl.abort_limit); - blk_mq_free_request(req); } @@ -948,7 +938,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) * shutdown, so we return BLK_EH_HANDLED. */ if (test_bit(NVME_CTRL_RESETTING, &dev->flags)) { - dev_warn(dev->dev, + dev_warn(dev->ctrl.device, "I/O %d QID %d timeout, disable controller\n", req->tag, nvmeq->qid); nvme_dev_disable(dev, false); @@ -962,7 +952,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) * returned to the driver, or if this is the admin queue. */ if (!nvmeq->qid || iod->aborted) { - dev_warn(dev->dev, + dev_warn(dev->ctrl.device, "I/O %d QID %d timeout, reset controller\n", req->tag, nvmeq->qid); nvme_dev_disable(dev, false); @@ -988,8 +978,9 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) cmd.abort.cid = req->tag; cmd.abort.sqid = cpu_to_le16(nvmeq->qid); - dev_warn(nvmeq->q_dmadev, "I/O %d QID %d timeout, aborting\n", - req->tag, nvmeq->qid); + dev_warn(nvmeq->dev->ctrl.device, + "I/O %d QID %d timeout, aborting\n", + req->tag, nvmeq->qid); abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd, BLK_MQ_REQ_NOWAIT); @@ -1018,7 +1009,7 @@ static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved if (!blk_mq_request_started(req)) return; - dev_dbg_ratelimited(nvmeq->q_dmadev, + dev_dbg_ratelimited(nvmeq->dev->ctrl.device, "Cancelling I/O %d QID %d\n", req->tag, nvmeq->qid); status = NVME_SC_ABORT_REQ; @@ -1173,9 +1164,6 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, nvmeq->qid = qid; nvmeq->cq_vector = -1; dev->queues[qid] = nvmeq; - - /* make sure queue descriptor is set before queue count, for kthread */ - mb(); dev->queue_count++; return nvmeq; @@ -1360,53 +1348,31 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) return result; } -static int nvme_kthread(void *data) -{ - struct nvme_dev *dev, *next; - - while (!kthread_should_stop()) { - set_current_state(TASK_INTERRUPTIBLE); - spin_lock(&dev_list_lock); - list_for_each_entry_safe(dev, next, &dev_list, node) { - int i; - u32 csts = readl(dev->bar + NVME_REG_CSTS); - - /* - * Skip controllers currently under reset. - */ - if (work_pending(&dev->reset_work) || work_busy(&dev->reset_work)) - continue; - - if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) || - csts & NVME_CSTS_CFS) { - if (queue_work(nvme_workq, &dev->reset_work)) { - dev_warn(dev->dev, - "Failed status: %x, reset controller\n", - readl(dev->bar + NVME_REG_CSTS)); - } - continue; - } - for (i = 0; i < dev->queue_count; i++) { - struct nvme_queue *nvmeq = dev->queues[i]; - if (!nvmeq) - continue; - spin_lock_irq(&nvmeq->q_lock); - nvme_process_cq(nvmeq); - - while (i == 0 && dev->ctrl.event_limit > 0) - nvme_submit_async_event(dev); - spin_unlock_irq(&nvmeq->q_lock); - } +static void nvme_watchdog_timer(unsigned long data) +{ + struct nvme_dev *dev = (struct nvme_dev *)data; + u32 csts = readl(dev->bar + NVME_REG_CSTS); + + /* + * Skip controllers currently under reset. + */ + if (!work_pending(&dev->reset_work) && !work_busy(&dev->reset_work) && + ((csts & NVME_CSTS_CFS) || + (dev->subsystem && (csts & NVME_CSTS_NSSRO)))) { + if (queue_work(nvme_workq, &dev->reset_work)) { + dev_warn(dev->dev, + "Failed status: 0x%x, reset controller.\n", + csts); } - spin_unlock(&dev_list_lock); - schedule_timeout(round_jiffies_relative(HZ)); + return; } - return 0; + + mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ)); } static int nvme_create_io_queues(struct nvme_dev *dev) { - unsigned i; + unsigned i, max; int ret = 0; for (i = dev->queue_count; i <= dev->max_qid; i++) { @@ -1416,7 +1382,8 @@ static int nvme_create_io_queues(struct nvme_dev *dev) } } - for (i = dev->online_queues; i <= dev->queue_count - 1; i++) { + max = min(dev->max_qid, dev->queue_count - 1); + for (i = dev->online_queues; i <= max; i++) { ret = nvme_create_queue(dev->queues[i], i); if (ret) { nvme_free_queues(dev, i); @@ -1507,7 +1474,8 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) * access to the admin queue, as that might be only way to fix them up. */ if (result > 0) { - dev_err(dev->dev, "Could not set queue count (%d)\n", result); + dev_err(dev->ctrl.device, + "Could not set queue count (%d)\n", result); nr_io_queues = 0; result = 0; } @@ -1573,9 +1541,6 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) adminq->cq_vector = -1; goto free_queues; } - - /* Free previously allocated queues that are no longer usable */ - nvme_free_queues(dev, nr_io_queues + 1); return nvme_create_io_queues(dev); free_queues: @@ -1709,7 +1674,13 @@ static int nvme_dev_add(struct nvme_dev *dev) if (blk_mq_alloc_tag_set(&dev->tagset)) return 0; dev->ctrl.tagset = &dev->tagset; + } else { + blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1); + + /* Free previously allocated queues that are no longer usable */ + nvme_free_queues(dev, dev->online_queues); } + nvme_queue_scan(dev); return 0; } @@ -1796,56 +1767,12 @@ static void nvme_pci_disable(struct nvme_dev *dev) } } -static int nvme_dev_list_add(struct nvme_dev *dev) -{ - bool start_thread = false; - - spin_lock(&dev_list_lock); - if (list_empty(&dev_list) && IS_ERR_OR_NULL(nvme_thread)) { - start_thread = true; - nvme_thread = NULL; - } - list_add(&dev->node, &dev_list); - spin_unlock(&dev_list_lock); - - if (start_thread) { - nvme_thread = kthread_run(nvme_kthread, NULL, "nvme"); - wake_up_all(&nvme_kthread_wait); - } else - wait_event_killable(nvme_kthread_wait, nvme_thread); - - if (IS_ERR_OR_NULL(nvme_thread)) - return nvme_thread ? PTR_ERR(nvme_thread) : -EINTR; - - return 0; -} - -/* -* Remove the node from the device list and check -* for whether or not we need to stop the nvme_thread. -*/ -static void nvme_dev_list_remove(struct nvme_dev *dev) -{ - struct task_struct *tmp = NULL; - - spin_lock(&dev_list_lock); - list_del_init(&dev->node); - if (list_empty(&dev_list) && !IS_ERR_OR_NULL(nvme_thread)) { - tmp = nvme_thread; - nvme_thread = NULL; - } - spin_unlock(&dev_list_lock); - - if (tmp) - kthread_stop(tmp); -} - static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) { int i; u32 csts = -1; - nvme_dev_list_remove(dev); + del_timer_sync(&dev->watchdog_timer); mutex_lock(&dev->shutdown_lock); if (pci_is_enabled(to_pci_dev(dev->dev))) { @@ -1907,7 +1834,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status) { - dev_warn(dev->dev, "Removing after probe failure status: %d\n", status); + dev_warn(dev->ctrl.device, "Removing after probe failure status: %d\n", status); kref_get(&dev->ctrl.kref); nvme_dev_disable(dev, false); @@ -1954,17 +1881,16 @@ static void nvme_reset_work(struct work_struct *work) goto out; dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS; + queue_work(nvme_workq, &dev->async_work); - result = nvme_dev_list_add(dev); - if (result) - goto out; + mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ)); /* * Keep the controller around but remove all namespaces if we don't have * any working I/O queue. */ if (dev->online_queues < 2) { - dev_warn(dev->dev, "IO queues not created\n"); + dev_warn(dev->ctrl.device, "IO queues not created\n"); nvme_remove_namespaces(&dev->ctrl); } else { nvme_start_queues(&dev->ctrl); @@ -2032,6 +1958,7 @@ static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl) } static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { + .module = THIS_MODULE, .reg_read32 = nvme_pci_reg_read32, .reg_write32 = nvme_pci_reg_write32, .reg_read64 = nvme_pci_reg_read64, @@ -2089,10 +2016,12 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto free; - INIT_LIST_HEAD(&dev->node); INIT_WORK(&dev->scan_work, nvme_dev_scan); INIT_WORK(&dev->reset_work, nvme_reset_work); INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work); + INIT_WORK(&dev->async_work, nvme_async_event_work); + setup_timer(&dev->watchdog_timer, nvme_watchdog_timer, + (unsigned long)dev); mutex_init(&dev->shutdown_lock); init_completion(&dev->ioq_wait); @@ -2105,6 +2034,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto release_pools; + dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); + queue_work(nvme_workq, &dev->reset_work); return 0; @@ -2145,8 +2076,11 @@ static void nvme_remove(struct pci_dev *pdev) { struct nvme_dev *dev = pci_get_drvdata(pdev); + del_timer_sync(&dev->watchdog_timer); + set_bit(NVME_CTRL_REMOVING, &dev->flags); pci_set_drvdata(pdev, NULL); + flush_work(&dev->async_work); flush_work(&dev->scan_work); nvme_remove_namespaces(&dev->ctrl); nvme_uninit_ctrl(&dev->ctrl); @@ -2192,7 +2126,7 @@ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev, * shutdown the controller to quiesce. The controller will be restarted * after the slot reset through driver's slot_reset callback. */ - dev_warn(&pdev->dev, "error detected: state:%d\n", state); + dev_warn(dev->ctrl.device, "error detected: state:%d\n", state); switch (state) { case pci_channel_io_normal: return PCI_ERS_RESULT_CAN_RECOVER; @@ -2209,7 +2143,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev) { struct nvme_dev *dev = pci_get_drvdata(pdev); - dev_info(&pdev->dev, "restart after slot reset\n"); + dev_info(dev->ctrl.device, "restart after slot reset\n"); pci_restore_state(pdev); queue_work(nvme_workq, &dev->reset_work); return PCI_ERS_RESULT_RECOVERED; @@ -2232,7 +2166,8 @@ static const struct pci_error_handlers nvme_err_handler = { static const struct pci_device_id nvme_id_table[] = { { PCI_VDEVICE(INTEL, 0x0953), - .driver_data = NVME_QUIRK_STRIPE_SIZE, }, + .driver_data = NVME_QUIRK_STRIPE_SIZE | + NVME_QUIRK_DISCARD_ZEROES, }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, @@ -2257,34 +2192,20 @@ static int __init nvme_init(void) { int result; - init_waitqueue_head(&nvme_kthread_wait); - nvme_workq = alloc_workqueue("nvme", WQ_UNBOUND | WQ_MEM_RECLAIM, 0); if (!nvme_workq) return -ENOMEM; - result = nvme_core_init(); - if (result < 0) - goto kill_workq; - result = pci_register_driver(&nvme_driver); if (result) - goto core_exit; - return 0; - - core_exit: - nvme_core_exit(); - kill_workq: - destroy_workqueue(nvme_workq); + destroy_workqueue(nvme_workq); return result; } static void __exit nvme_exit(void) { pci_unregister_driver(&nvme_driver); - nvme_core_exit(); destroy_workqueue(nvme_workq); - BUG_ON(nvme_thread && !IS_ERR(nvme_thread)); _nvme_check_size(); } diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 2190419bdf0a..c3c43184a787 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -92,9 +92,9 @@ enum { NVM_ADDRMODE_CHANNEL = 1, /* Plane programming mode for LUN */ - NVM_PLANE_SINGLE = 0, - NVM_PLANE_DOUBLE = 1, - NVM_PLANE_QUAD = 2, + NVM_PLANE_SINGLE = 1, + NVM_PLANE_DOUBLE = 2, + NVM_PLANE_QUAD = 4, /* Status codes */ NVM_RSP_SUCCESS = 0x0, @@ -341,8 +341,8 @@ struct nvm_dev { int lps_per_blk; int *lptbl; - unsigned long total_pages; unsigned long total_blocks; + unsigned long total_secs; int nr_luns; unsigned max_pages_per_blk; |