diff options
author | Mike Miller <mike.miller@hp.com> | 2010-06-02 21:58:06 +0200 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2010-08-07 18:12:51 +0200 |
commit | 5e216153c34ac21781110795284a784037f808e3 (patch) | |
tree | b42ceb1e884bdb397283ddb01562fe03d1f95cad /drivers/block/cciss.c | |
parent | cciss: make interrupt access methods return type bool (diff) | |
download | linux-5e216153c34ac21781110795284a784037f808e3.tar.xz linux-5e216153c34ac21781110795284a784037f808e3.zip |
cciss: add performant mode support for Stars/Sirius
Add a mode of controller operation called Performant Mode. Even though
cciss has been deprecated in favor of hpsa there are new controllers due
out next year that HP must support in older vendor distros. Vendors
require all fixes/features be upstream. These new controllers support
only 16 commands in simple mode but support up to 1024 in performant mode.
This requires us to add this support at this late date.
The performant mode transport minimizes host PCI accesses by performinf
many completions per read. PCI writes are posted so the host can write
then immediately get off the bus not waiting for the writwe to complete to
the target. In the context of performant mode the host read out to a
controller pulls all posted writes into host memory ensuring the reply
queue is coherent.
Signed-off-by: Mike Miller <mike.miller@hp.com>
Cc: Stephen M. Cameron <scameron@beardog.cce.hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'drivers/block/cciss.c')
-rw-r--r-- | drivers/block/cciss.c | 267 |
1 files changed, 216 insertions, 51 deletions
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index cd830cb64a5d..08a2e619dc36 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -206,6 +206,11 @@ static void cciss_device_release(struct device *dev); static void cciss_free_gendisk(ctlr_info_t *h, int drv_index); static void cciss_free_drive_info(ctlr_info_t *h, int drv_index); +/* performant mode helper functions */ +static void calc_bucket_map(int *bucket, int num_buckets, int nsgs, + int *bucket_map); +static void cciss_put_controller_into_performant_mode(ctlr_info_t *h); + #ifdef CONFIG_PROC_FS static void cciss_procinit(int i); #else @@ -231,6 +236,16 @@ static const struct block_device_operations cciss_fops = { .revalidate_disk = cciss_revalidate, }; +/* set_performant_mode: Modify the tag for cciss performant + * set bit 0 for pull model, bits 3-1 for block fetch + * register number + */ +static void set_performant_mode(ctlr_info_t *h, CommandList_struct *c) +{ + if (likely(h->transMethod == CFGTBL_Trans_Performant)) + c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1); +} + /* * Enqueuing and dequeuing functions for cmdlists. */ @@ -261,6 +276,7 @@ static void enqueue_cmd_and_start_io(ctlr_info_t *h, CommandList_struct *c) { unsigned long flags; + set_performant_mode(h, c); spin_lock_irqsave(&h->lock, flags); addQ(&h->reqQ, c); h->Qdepth++; @@ -350,6 +366,28 @@ static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG", #ifdef CONFIG_PROC_FS +static inline u32 next_command(ctlr_info_t *h) +{ + u32 a; + + if (unlikely(h->transMethod != CFGTBL_Trans_Performant)) + return h->access.command_completed(h); + + if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) { + a = *(h->reply_pool_head); /* Next cmd in ring buffer */ + (h->reply_pool_head)++; + h->commands_outstanding--; + } else { + a = FIFO_EMPTY; + } + /* Check for wraparound */ + if (h->reply_pool_head == (h->reply_pool + h->max_commands)) { + h->reply_pool_head = h->reply_pool; + h->reply_pool_wraparound ^= 1; + } + return a; +} + /* * Report information about this controller. */ @@ -377,7 +415,7 @@ static void cciss_seq_show_header(struct seq_file *seq) h->product_name, (unsigned long)h->board_id, h->firm_ver[0], h->firm_ver[1], h->firm_ver[2], - h->firm_ver[3], (unsigned int)h->intr[SIMPLE_MODE_INT], + h->firm_ver[3], (unsigned int)h->intr[PERF_MODE_INT], h->num_luns, h->Qdepth, h->commands_outstanding, h->maxQsinceinit, h->max_outstanding, h->maxSG); @@ -3126,13 +3164,13 @@ after_error_processing: static inline u32 cciss_tag_contains_index(u32 tag) { -#define DIRECT_LOOKUP_BIT 0x04 +#define DIRECT_LOOKUP_BIT 0x10 return tag & DIRECT_LOOKUP_BIT; } static inline u32 cciss_tag_to_index(u32 tag) { -#define DIRECT_LOOKUP_SHIFT 3 +#define DIRECT_LOOKUP_SHIFT 5 return tag >> DIRECT_LOOKUP_SHIFT; } @@ -3262,9 +3300,12 @@ static void do_cciss_request(struct request_queue *q) blk_rq_sectors(creq), seg, chained); #endif /* CCISS_DEBUG */ - c->Header.SGList = c->Header.SGTotal = seg + chained; - if (seg > h->max_cmd_sgentries) + c->Header.SGTotal = seg + chained; + if (seg <= h->max_cmd_sgentries) + c->Header.SGList = c->Header.SGTotal; + else c->Header.SGList = h->max_cmd_sgentries; + set_performant_mode(h, c); if (likely(blk_fs_request(creq))) { if(h->cciss_read == CCISS_READ_10) { @@ -3370,10 +3411,10 @@ static inline u32 process_indexed_cmd(ctlr_info_t *h, u32 raw_tag) tag_index = cciss_tag_to_index(raw_tag); if (bad_tag(h, tag_index, raw_tag)) - return get_next_completion(h); + return next_command(h); c = h->cmd_pool + tag_index; finish_cmd(h, c, raw_tag); - return get_next_completion(h); + return next_command(h); } /* process completion of a non-indexed command */ @@ -3390,11 +3431,11 @@ static inline u32 process_nonindexed_cmd(ctlr_info_t *h, u32 raw_tag) tag_masked = cciss_tag_discard_error_bits(tag); if (busaddr_masked == tag_masked) { finish_cmd(h, c, raw_tag); - return get_next_completion(h); + return next_command(h); } } bad_tag(h, h->nr_cmds + 1, raw_tag); - return get_next_completion(h); + return next_command(h); } static irqreturn_t do_cciss_intx(int irq, void *dev_id) @@ -3700,6 +3741,155 @@ static int find_PCI_BAR_index(struct pci_dev *pdev, unsigned long pci_bar_addr) return -1; } +/* Fill in bucket_map[], given nsgs (the max number of + * scatter gather elements supported) and bucket[], + * which is an array of 8 integers. The bucket[] array + * contains 8 different DMA transfer sizes (in 16 + * byte increments) which the controller uses to fetch + * commands. This function fills in bucket_map[], which + * maps a given number of scatter gather elements to one of + * the 8 DMA transfer sizes. The point of it is to allow the + * controller to only do as much DMA as needed to fetch the + * command, with the DMA transfer size encoded in the lower + * bits of the command address. + */ +static void calc_bucket_map(int bucket[], int num_buckets, + int nsgs, int *bucket_map) +{ + int i, j, b, size; + + /* even a command with 0 SGs requires 4 blocks */ +#define MINIMUM_TRANSFER_BLOCKS 4 +#define NUM_BUCKETS 8 + /* Note, bucket_map must have nsgs+1 entries. */ + for (i = 0; i <= nsgs; i++) { + /* Compute size of a command with i SG entries */ + size = i + MINIMUM_TRANSFER_BLOCKS; + b = num_buckets; /* Assume the biggest bucket */ + /* Find the bucket that is just big enough */ + for (j = 0; j < 8; j++) { + if (bucket[j] >= size) { + b = j; + break; + } + } + /* for a command with i SG entries, use bucket b. */ + bucket_map[i] = b; + } +} + +static void +cciss_put_controller_into_performant_mode(ctlr_info_t *h) +{ + int l = 0; + __u32 trans_support; + __u32 trans_offset; + /* + * 5 = 1 s/g entry or 4k + * 6 = 2 s/g entry or 8k + * 8 = 4 s/g entry or 16k + * 10 = 6 s/g entry or 24k + */ + int bft[8] = { 5, 6, 8, 10, 12, 20, 28, MAXSGENTRIES + 4}; + unsigned long register_value; + + BUILD_BUG_ON(28 > MAXSGENTRIES + 4); + + /* Attempt to put controller into performant mode if supported */ + /* Does board support performant mode? */ + trans_support = readl(&(h->cfgtable->TransportSupport)); + if (!(trans_support & PERFORMANT_MODE)) + return; + + printk(KERN_WARNING "cciss%d: Placing controller into " + "performant mode\n", h->ctlr); + /* Performant mode demands commands on a 32 byte boundary + * pci_alloc_consistent aligns on page boundarys already. + * Just need to check if divisible by 32 + */ + if ((sizeof(CommandList_struct) % 32) != 0) { + printk(KERN_WARNING "%s %d %s\n", + "cciss info: command size[", + (int)sizeof(CommandList_struct), + "] not divisible by 32, no performant mode..\n"); + return; + } + + /* Performant mode ring buffer and supporting data structures */ + h->reply_pool = (__u64 *)pci_alloc_consistent( + h->pdev, h->max_commands * sizeof(__u64), + &(h->reply_pool_dhandle)); + + /* Need a block fetch table for performant mode */ + h->blockFetchTable = kmalloc(((h->maxsgentries+1) * + sizeof(__u32)), GFP_KERNEL); + + if ((h->reply_pool == NULL) || (h->blockFetchTable == NULL)) + goto clean_up; + + h->reply_pool_wraparound = 1; /* spec: init to 1 */ + + /* Controller spec: zero out this buffer. */ + memset(h->reply_pool, 0, h->max_commands * sizeof(__u64)); + h->reply_pool_head = h->reply_pool; + + trans_offset = readl(&(h->cfgtable->TransMethodOffset)); + calc_bucket_map(bft, ARRAY_SIZE(bft), h->maxsgentries, + h->blockFetchTable); + writel(bft[0], &h->transtable->BlockFetch0); + writel(bft[1], &h->transtable->BlockFetch1); + writel(bft[2], &h->transtable->BlockFetch2); + writel(bft[3], &h->transtable->BlockFetch3); + writel(bft[4], &h->transtable->BlockFetch4); + writel(bft[5], &h->transtable->BlockFetch5); + writel(bft[6], &h->transtable->BlockFetch6); + writel(bft[7], &h->transtable->BlockFetch7); + + /* size of controller ring buffer */ + writel(h->max_commands, &h->transtable->RepQSize); + writel(1, &h->transtable->RepQCount); + writel(0, &h->transtable->RepQCtrAddrLow32); + writel(0, &h->transtable->RepQCtrAddrHigh32); + writel(h->reply_pool_dhandle, &h->transtable->RepQAddr0Low32); + writel(0, &h->transtable->RepQAddr0High32); + writel(CFGTBL_Trans_Performant, + &(h->cfgtable->HostWrite.TransportRequest)); + + h->transMethod = CFGTBL_Trans_Performant; + writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL); + /* under certain very rare conditions, this can take awhile. + * (e.g.: hot replace a failed 144GB drive in a RAID 5 set right + * as we enter this code.) */ + for (l = 0; l < MAX_CONFIG_WAIT; l++) { + register_value = readl(h->vaddr + SA5_DOORBELL); + if (!(register_value & CFGTBL_ChangeReq)) + break; + /* delay and try again */ + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(10); + } + register_value = readl(&(h->cfgtable->TransportActive)); + if (!(register_value & CFGTBL_Trans_Performant)) { + printk(KERN_WARNING "cciss: unable to get board into" + " performant mode\n"); + return; + } + + /* Change the access methods to the performant access methods */ + h->access = SA5_performant_access; + + return; +clean_up: + kfree(h->blockFetchTable); + if (h->reply_pool) + pci_free_consistent(h->pdev, + h->max_commands * sizeof(__u64), + h->reply_pool, + h->reply_pool_dhandle); + return; + +} /* cciss_put_controller_into_performant_mode */ + /* If MSI/MSI-X is supported by the kernel we will try to enable it on * controllers that are capable. If not, we use IO-APIC mode. */ @@ -3749,7 +3939,7 @@ static void __devinit cciss_interrupt_mode(ctlr_info_t *c, default_int_mode: #endif /* CONFIG_PCI_MSI */ /* if we get here we're going to use the default interrupt mode */ - c->intr[SIMPLE_MODE_INT] = pdev->irq; + c->intr[PERF_MODE_INT] = pdev->irq; return; } @@ -3761,6 +3951,7 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) __u32 cfg_base_addr; __u64 cfg_base_addr_index; int i, prod_index, err; + __u32 trans_offset; subsystem_vendor_id = pdev->subsystem_vendor; subsystem_device_id = pdev->subsystem_device; @@ -3874,11 +4065,16 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) c->cfgtable = remap_pci_mem(pci_resource_start(pdev, cfg_base_addr_index) + cfg_offset, sizeof(CfgTable_struct)); + /* Find performant mode table. */ + trans_offset = readl(&(c->cfgtable->TransMethodOffset)); + c->transtable = remap_pci_mem(pci_resource_start(pdev, + cfg_base_addr_index) + cfg_offset+trans_offset, + sizeof(*c->transtable)); c->board_id = board_id; -#ifdef CCISS_DEBUG - print_cfg_table(c->cfgtable); -#endif /* CCISS_DEBUG */ + #ifdef CCISS_DEBUG + print_cfg_table(c->cfgtable); + #endif /* CCISS_DEBUG */ /* Some controllers support Zero Memory Raid (ZMR). * When configured in ZMR mode the number of supported @@ -3888,7 +4084,7 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) * are supported on the controller then subtract 4 to * leave a little room for ioctl calls. */ - c->max_commands = readl(&(c->cfgtable->CmdsOutMax)); + c->max_commands = readl(&(c->cfgtable->MaxPerformantModeCommands)); c->maxsgentries = readl(&(c->cfgtable->MaxSGElements)); /* @@ -3933,7 +4129,7 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) * kernels revealed a bug in the refetch if dom0 resides on a P600. */ if(board_id == 0x3225103C) { - __u32 dma_prefetch; + __u32 dma_prefetch; __u32 dma_refetch; dma_prefetch = readl(c->vaddr + I2O_DMA1_CFG); dma_prefetch |= 0x8000; @@ -3944,38 +4140,8 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) } #ifdef CCISS_DEBUG - printk("Trying to put board into Simple mode\n"); + printk(KERN_WARNING "Trying to put board into Performant mode\n"); #endif /* CCISS_DEBUG */ - c->max_commands = readl(&(c->cfgtable->CmdsOutMax)); - /* Update the field, and then ring the doorbell */ - writel(CFGTBL_Trans_Simple, &(c->cfgtable->HostWrite.TransportRequest)); - writel(CFGTBL_ChangeReq, c->vaddr + SA5_DOORBELL); - - /* under certain very rare conditions, this can take awhile. - * (e.g.: hot replace a failed 144GB drive in a RAID 5 set right - * as we enter this code.) */ - for (i = 0; i < MAX_CONFIG_WAIT; i++) { - if (!(readl(c->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq)) - break; - /* delay and try again */ - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(msecs_to_jiffies(1)); - } - -#ifdef CCISS_DEBUG - printk(KERN_DEBUG "I counter got to %d %x\n", i, - readl(c->vaddr + SA5_DOORBELL)); -#endif /* CCISS_DEBUG */ -#ifdef CCISS_DEBUG - print_cfg_table(c->cfgtable); -#endif /* CCISS_DEBUG */ - - if (!(readl(&(c->cfgtable->TransportActive)) & CFGTBL_Trans_Simple)) { - printk(KERN_WARNING "cciss: unable to get board into" - " simple mode\n"); - err = -ENODEV; - goto err_out_free_res; - } return 0; err_out_free_res: @@ -3984,6 +4150,7 @@ err_out_free_res: * Smart Array controllers that pci_enable_device does not undo */ pci_release_regions(pdev); + cciss_put_controller_into_performant_mode(c); return err; } @@ -4260,7 +4427,6 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, i = alloc_cciss_hba(); if (i < 0) return -1; - hba[i]->busy_initializing = 1; INIT_HLIST_HEAD(&hba[i]->cmpQ); INIT_HLIST_HEAD(&hba[i]->reqQ); @@ -4327,7 +4493,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, printk(KERN_INFO "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n", hba[i]->devname, pdev->device, pci_name(pdev), - hba[i]->intr[SIMPLE_MODE_INT], dac ? "" : " not"); + hba[i]->intr[PERF_MODE_INT], dac ? "" : " not"); hba[i]->cmd_pool_bits = kmalloc(DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG) @@ -4433,7 +4599,7 @@ clean4: hba[i]->nr_cmds * sizeof(ErrorInfo_struct), hba[i]->errinfo_pool, hba[i]->errinfo_pool_dhandle); - free_irq(hba[i]->intr[SIMPLE_MODE_INT], hba[i]); + free_irq(hba[i]->intr[PERF_MODE_INT], hba[i]); clean2: unregister_blkdev(hba[i]->major, hba[i]->devname); clean1: @@ -4475,7 +4641,7 @@ static void cciss_shutdown(struct pci_dev *pdev) printk(KERN_WARNING "cciss%d: Error flushing cache\n", h->ctlr); h->access.set_intr_mask(h, CCISS_INTR_OFF); - free_irq(h->intr[2], h); + free_irq(h->intr[PERF_MODE_INT], h); } static void __devexit cciss_remove_one(struct pci_dev *pdev) @@ -4575,7 +4741,6 @@ static int __init cciss_init(void) * array of them, the size must be a multiple of 8 bytes. */ BUILD_BUG_ON(sizeof(CommandList_struct) % COMMANDLIST_ALIGNMENT); - printk(KERN_INFO DRIVER_NAME "\n"); err = bus_register(&cciss_bus_type); |