diff options
Diffstat (limited to 'drivers/scsi/storvsc_drv.c')
-rw-r--r-- | drivers/scsi/storvsc_drv.c | 162 |
1 files changed, 141 insertions, 21 deletions
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 05526b71541b..638e5f427c90 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -136,6 +136,8 @@ struct hv_fc_wwn_packet { #define SRB_FLAGS_PORT_DRIVER_RESERVED 0x0F000000 #define SRB_FLAGS_CLASS_DRIVER_RESERVED 0xF0000000 +#define SP_UNTAGGED ((unsigned char) ~0) +#define SRB_SIMPLE_TAG_REQUEST 0x20 /* * Platform neutral description of a scsi request - @@ -278,7 +280,7 @@ static const struct vmstor_protocol vmstor_protocols[] = { /* - * This structure is sent during the intialization phase to get the different + * This structure is sent during the initialization phase to get the different * properties of the channel. */ @@ -375,6 +377,7 @@ enum storvsc_request_type { #define SRB_STATUS_SUCCESS 0x01 #define SRB_STATUS_ABORTED 0x02 #define SRB_STATUS_ERROR 0x04 +#define SRB_STATUS_DATA_OVERRUN 0x12 #define SRB_STATUS(status) \ (status & ~(SRB_STATUS_AUTOSENSE_VALID | SRB_STATUS_QUEUE_FROZEN)) @@ -458,6 +461,15 @@ struct storvsc_device { * Max I/O, the device can support. */ u32 max_transfer_bytes; + /* + * Number of sub-channels we will open. + */ + u16 num_sc; + struct vmbus_channel **stor_chns; + /* + * Mask of CPUs bound to subchannels. + */ + struct cpumask alloced_cpus; /* Used for vsc/vsp channel reset process */ struct storvsc_cmd_request init_request; struct storvsc_cmd_request reset_request; @@ -635,6 +647,11 @@ static void handle_sc_creation(struct vmbus_channel *new_sc) (void *)&props, sizeof(struct vmstorage_channel_properties), storvsc_on_channel_callback, new_sc); + + if (new_sc->state == CHANNEL_OPENED_STATE) { + stor_device->stor_chns[new_sc->target_cpu] = new_sc; + cpumask_set_cpu(new_sc->target_cpu, &stor_device->alloced_cpus); + } } static void handle_multichannel_storage(struct hv_device *device, int max_chns) @@ -651,6 +668,7 @@ static void handle_multichannel_storage(struct hv_device *device, int max_chns) if (!stor_device) return; + stor_device->num_sc = num_sc; request = &stor_device->init_request; vstor_packet = &request->vstor_packet; @@ -838,6 +856,25 @@ static int storvsc_channel_init(struct hv_device *device, bool is_fc) * support multi-channel. */ max_chns = vstor_packet->storage_channel_properties.max_channel_cnt; + + /* + * Allocate state to manage the sub-channels. + * We allocate an array based on the numbers of possible CPUs + * (Hyper-V does not support cpu online/offline). + * This Array will be sparseley populated with unique + * channels - primary + sub-channels. + * We will however populate all the slots to evenly distribute + * the load. + */ + stor_device->stor_chns = kzalloc(sizeof(void *) * num_possible_cpus(), + GFP_KERNEL); + if (stor_device->stor_chns == NULL) + return -ENOMEM; + + stor_device->stor_chns[device->channel->target_cpu] = device->channel; + cpumask_set_cpu(device->channel->target_cpu, + &stor_device->alloced_cpus); + if (vmstor_proto_version >= VMSTOR_PROTO_VERSION_WIN8) { if (vstor_packet->storage_channel_properties.flags & STORAGE_CHANNEL_SUPPORTS_MULTI_CHANNEL) @@ -889,6 +926,13 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, switch (SRB_STATUS(vm_srb->srb_status)) { case SRB_STATUS_ERROR: /* + * Let upper layer deal with error when + * sense message is present. + */ + + if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) + break; + /* * If there is an error; offline the device since all * error recovery strategies would have already been * deployed on the host side. However, if the command @@ -953,6 +997,7 @@ static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request, struct scsi_cmnd *scmnd = cmd_request->cmd; struct scsi_sense_hdr sense_hdr; struct vmscsi_request *vm_srb; + u32 data_transfer_length; struct Scsi_Host *host; u32 payload_sz = cmd_request->payload_sz; void *payload = cmd_request->payload; @@ -960,6 +1005,7 @@ static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request, host = stor_dev->host; vm_srb = &cmd_request->vstor_packet.vm_srb; + data_transfer_length = vm_srb->data_transfer_length; scmnd->result = vm_srb->scsi_status; @@ -973,13 +1019,20 @@ static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request, &sense_hdr); } - if (vm_srb->srb_status != SRB_STATUS_SUCCESS) + if (vm_srb->srb_status != SRB_STATUS_SUCCESS) { storvsc_handle_error(vm_srb, scmnd, host, sense_hdr.asc, sense_hdr.ascq); + /* + * The Windows driver set data_transfer_length on + * SRB_STATUS_DATA_OVERRUN. On other errors, this value + * is untouched. In these cases we set it to 0. + */ + if (vm_srb->srb_status != SRB_STATUS_DATA_OVERRUN) + data_transfer_length = 0; + } scsi_set_resid(scmnd, - cmd_request->payload->range.len - - vm_srb->data_transfer_length); + cmd_request->payload->range.len - data_transfer_length); scmnd->scsi_done(scmnd); @@ -1198,17 +1251,64 @@ static int storvsc_dev_remove(struct hv_device *device) /* Close the channel */ vmbus_close(device->channel); + kfree(stor_device->stor_chns); kfree(stor_device); return 0; } +static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device, + u16 q_num) +{ + u16 slot = 0; + u16 hash_qnum; + struct cpumask alloced_mask; + int num_channels, tgt_cpu; + + if (stor_device->num_sc == 0) + return stor_device->device->channel; + + /* + * Our channel array is sparsley populated and we + * initiated I/O on a processor/hw-q that does not + * currently have a designated channel. Fix this. + * The strategy is simple: + * I. Ensure NUMA locality + * II. Distribute evenly (best effort) + * III. Mapping is persistent. + */ + + cpumask_and(&alloced_mask, &stor_device->alloced_cpus, + cpumask_of_node(cpu_to_node(q_num))); + + num_channels = cpumask_weight(&alloced_mask); + if (num_channels == 0) + return stor_device->device->channel; + + hash_qnum = q_num; + while (hash_qnum >= num_channels) + hash_qnum -= num_channels; + + for_each_cpu(tgt_cpu, &alloced_mask) { + if (slot == hash_qnum) + break; + slot++; + } + + stor_device->stor_chns[q_num] = stor_device->stor_chns[tgt_cpu]; + + return stor_device->stor_chns[q_num]; +} + + static int storvsc_do_io(struct hv_device *device, - struct storvsc_cmd_request *request) + struct storvsc_cmd_request *request, u16 q_num) { struct storvsc_device *stor_device; struct vstor_packet *vstor_packet; struct vmbus_channel *outgoing_channel; int ret = 0; + struct cpumask alloced_mask; + int tgt_cpu; vstor_packet = &request->vstor_packet; stor_device = get_out_stor_device(device); @@ -1222,7 +1322,26 @@ static int storvsc_do_io(struct hv_device *device, * Select an an appropriate channel to send the request out. */ - outgoing_channel = vmbus_get_outgoing_channel(device->channel); + if (stor_device->stor_chns[q_num] != NULL) { + outgoing_channel = stor_device->stor_chns[q_num]; + if (outgoing_channel->target_cpu == smp_processor_id()) { + /* + * Ideally, we want to pick a different channel if + * available on the same NUMA node. + */ + cpumask_and(&alloced_mask, &stor_device->alloced_cpus, + cpumask_of_node(cpu_to_node(q_num))); + for_each_cpu(tgt_cpu, &alloced_mask) { + if (tgt_cpu != outgoing_channel->target_cpu) { + outgoing_channel = + stor_device->stor_chns[tgt_cpu]; + break; + } + } + } + } else { + outgoing_channel = get_og_chn(stor_device, q_num); + } vstor_packet->flags |= REQUEST_COMPLETION_FLAG; @@ -1267,8 +1386,6 @@ static int storvsc_do_io(struct hv_device *device, static int storvsc_device_configure(struct scsi_device *sdevice) { - blk_queue_max_segment_size(sdevice->request_queue, PAGE_SIZE); - blk_queue_bounce_limit(sdevice->request_queue, BLK_BOUNCE_ANY); blk_queue_rq_timeout(sdevice->request_queue, (storvsc_timeout * HZ)); @@ -1451,6 +1568,13 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) vm_srb->win8_extension.srb_flags |= SRB_FLAGS_DISABLE_SYNCH_TRANSFER; + if (scmnd->device->tagged_supported) { + vm_srb->win8_extension.srb_flags |= + (SRB_FLAGS_QUEUE_ACTION_ENABLE | SRB_FLAGS_NO_QUEUE_FREEZE); + vm_srb->win8_extension.queue_tag = SP_UNTAGGED; + vm_srb->win8_extension.queue_action = SRB_SIMPLE_TAG_REQUEST; + } + /* Build the SRB */ switch (scmnd->sc_data_direction) { case DMA_TO_DEVICE: @@ -1511,20 +1635,14 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) page_to_pfn(sg_page((cur_sgl))); cur_sgl = sg_next(cur_sgl); } - - } else if (scsi_sglist(scmnd)) { - payload->range.len = length; - payload->range.offset = - virt_to_phys(scsi_sglist(scmnd)) & (PAGE_SIZE-1); - payload->range.pfn_array[0] = - virt_to_phys(scsi_sglist(scmnd)) >> PAGE_SHIFT; } cmd_request->payload = payload; cmd_request->payload_sz = payload_sz; /* Invokes the vsc to start an IO */ - ret = storvsc_do_io(dev, cmd_request); + ret = storvsc_do_io(dev, cmd_request, get_cpu()); + put_cpu(); if (ret == -EAGAIN) { /* no more space */ @@ -1550,6 +1668,7 @@ static struct scsi_host_template scsi_driver = { /* Make sure we dont get a sg segment crosses a page boundary */ .dma_boundary = PAGE_SIZE-1, .no_write_same = 1, + .track_queue_depth = 1, }; enum { @@ -1680,6 +1799,11 @@ static int storvsc_probe(struct hv_device *device, * from the host. */ host->sg_tablesize = (stor_device->max_transfer_bytes >> PAGE_SHIFT); + /* + * Set the number of HW queues we are supporting. + */ + if (stor_device->num_sc != 0) + host->nr_hw_queues = stor_device->num_sc + 1; /* Register the HBA and start the scsi bus scan */ ret = scsi_add_host(host, &device->device); @@ -1716,6 +1840,7 @@ err_out2: goto err_out0; err_out1: + kfree(stor_device->stor_chns); kfree(stor_device); err_out0: @@ -1774,11 +1899,6 @@ static int __init storvsc_drv_init(void) fc_transport_template = fc_attach_transport(&fc_transport_functions); if (!fc_transport_template) return -ENODEV; - - /* - * Install Hyper-V specific timeout handler. - */ - fc_transport_template->eh_timed_out = storvsc_eh_timed_out; #endif ret = vmbus_driver_register(&storvsc_drv); |