diff options
Diffstat (limited to 'drivers/hv')
-rw-r--r-- | drivers/hv/channel.c | 36 | ||||
-rw-r--r-- | drivers/hv/channel_mgmt.c | 262 | ||||
-rw-r--r-- | drivers/hv/connection.c | 20 | ||||
-rw-r--r-- | drivers/hv/hv.c | 36 | ||||
-rw-r--r-- | drivers/hv/hv_fcopy.c | 2 | ||||
-rw-r--r-- | drivers/hv/hv_kvp.c | 2 | ||||
-rw-r--r-- | drivers/hv/hv_snapshot.c | 2 | ||||
-rw-r--r-- | drivers/hv/hv_util.c | 1 | ||||
-rw-r--r-- | drivers/hv/hv_utils_transport.c | 3 | ||||
-rw-r--r-- | drivers/hv/hyperv_vmbus.h | 36 | ||||
-rw-r--r-- | drivers/hv/ring_buffer.c | 31 | ||||
-rw-r--r-- | drivers/hv/vmbus_drv.c | 117 |
12 files changed, 377 insertions, 171 deletions
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 1161d68a1863..56dd261f7142 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -219,6 +219,21 @@ error0: } EXPORT_SYMBOL_GPL(vmbus_open); +/* Used for Hyper-V Socket: a guest client's connect() to the host */ +int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id, + const uuid_le *shv_host_servie_id) +{ + struct vmbus_channel_tl_connect_request conn_msg; + + memset(&conn_msg, 0, sizeof(conn_msg)); + conn_msg.header.msgtype = CHANNELMSG_TL_CONNECT_REQUEST; + conn_msg.guest_endpoint_id = *shv_guest_servie_id; + conn_msg.host_service_id = *shv_host_servie_id; + + return vmbus_post_msg(&conn_msg, sizeof(conn_msg)); +} +EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request); + /* * create_gpadl_header - Creates a gpadl for the specified buffer */ @@ -624,6 +639,7 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, u64 aligned_data = 0; int ret; bool signal = false; + bool lock = channel->acquire_ring_lock; int num_vecs = ((bufferlen != 0) ? 3 : 1); @@ -643,7 +659,7 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, bufferlist[2].iov_len = (packetlen_aligned - packetlen); ret = hv_ringbuffer_write(&channel->outbound, bufferlist, num_vecs, - &signal); + &signal, lock); /* * Signalling the host is conditional on many factors: @@ -659,6 +675,9 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, * If we cannot write to the ring-buffer; signal the host * even if we may not have written anything. This is a rare * enough condition that it should not matter. + * NOTE: in this case, the hvsock channel is an exception, because + * it looks the host side's hvsock implementation has a throttling + * mechanism which can hurt the performance otherwise. */ if (channel->signal_policy) @@ -666,7 +685,8 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, else kick_q = true; - if (((ret == 0) && kick_q && signal) || (ret)) + if (((ret == 0) && kick_q && signal) || + (ret && !is_hvsock_channel(channel))) vmbus_setevent(channel); return ret; @@ -719,6 +739,7 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, struct kvec bufferlist[3]; u64 aligned_data = 0; bool signal = false; + bool lock = channel->acquire_ring_lock; if (pagecount > MAX_PAGE_BUFFER_COUNT) return -EINVAL; @@ -755,7 +776,8 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, bufferlist[2].iov_base = &aligned_data; bufferlist[2].iov_len = (packetlen_aligned - packetlen); - ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); + ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, + &signal, lock); /* * Signalling the host is conditional on many factors: @@ -818,6 +840,7 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, struct kvec bufferlist[3]; u64 aligned_data = 0; bool signal = false; + bool lock = channel->acquire_ring_lock; packetlen = desc_size + bufferlen; packetlen_aligned = ALIGN(packetlen, sizeof(u64)); @@ -837,7 +860,8 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, bufferlist[2].iov_base = &aligned_data; bufferlist[2].iov_len = (packetlen_aligned - packetlen); - ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); + ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, + &signal, lock); if (ret == 0 && signal) vmbus_setevent(channel); @@ -862,6 +886,7 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, struct kvec bufferlist[3]; u64 aligned_data = 0; bool signal = false; + bool lock = channel->acquire_ring_lock; u32 pfncount = NUM_PAGES_SPANNED(multi_pagebuffer->offset, multi_pagebuffer->len); @@ -900,7 +925,8 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, bufferlist[2].iov_base = &aligned_data; bufferlist[2].iov_len = (packetlen_aligned - packetlen); - ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); + ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, + &signal, lock); if (ret == 0 && signal) vmbus_setevent(channel); diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 1c1ad47042c5..38b682bab85a 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -28,12 +28,127 @@ #include <linux/list.h> #include <linux/module.h> #include <linux/completion.h> +#include <linux/delay.h> #include <linux/hyperv.h> #include "hyperv_vmbus.h" -static void init_vp_index(struct vmbus_channel *channel, - const uuid_le *type_guid); +static void init_vp_index(struct vmbus_channel *channel, u16 dev_type); + +static const struct vmbus_device vmbus_devs[] = { + /* IDE */ + { .dev_type = HV_IDE, + HV_IDE_GUID, + .perf_device = true, + }, + + /* SCSI */ + { .dev_type = HV_SCSI, + HV_SCSI_GUID, + .perf_device = true, + }, + + /* Fibre Channel */ + { .dev_type = HV_FC, + HV_SYNTHFC_GUID, + .perf_device = true, + }, + + /* Synthetic NIC */ + { .dev_type = HV_NIC, + HV_NIC_GUID, + .perf_device = true, + }, + + /* Network Direct */ + { .dev_type = HV_ND, + HV_ND_GUID, + .perf_device = true, + }, + + /* PCIE */ + { .dev_type = HV_PCIE, + HV_PCIE_GUID, + .perf_device = true, + }, + + /* Synthetic Frame Buffer */ + { .dev_type = HV_FB, + HV_SYNTHVID_GUID, + .perf_device = false, + }, + + /* Synthetic Keyboard */ + { .dev_type = HV_KBD, + HV_KBD_GUID, + .perf_device = false, + }, + + /* Synthetic MOUSE */ + { .dev_type = HV_MOUSE, + HV_MOUSE_GUID, + .perf_device = false, + }, + + /* KVP */ + { .dev_type = HV_KVP, + HV_KVP_GUID, + .perf_device = false, + }, + + /* Time Synch */ + { .dev_type = HV_TS, + HV_TS_GUID, + .perf_device = false, + }, + + /* Heartbeat */ + { .dev_type = HV_HB, + HV_HEART_BEAT_GUID, + .perf_device = false, + }, + + /* Shutdown */ + { .dev_type = HV_SHUTDOWN, + HV_SHUTDOWN_GUID, + .perf_device = false, + }, + + /* File copy */ + { .dev_type = HV_FCOPY, + HV_FCOPY_GUID, + .perf_device = false, + }, + + /* Backup */ + { .dev_type = HV_BACKUP, + HV_VSS_GUID, + .perf_device = false, + }, + + /* Dynamic Memory */ + { .dev_type = HV_DM, + HV_DM_GUID, + .perf_device = false, + }, + + /* Unknown GUID */ + { .dev_type = HV_UNKOWN, + .perf_device = false, + }, +}; + +static u16 hv_get_dev_type(const uuid_le *guid) +{ + u16 i; + + for (i = HV_IDE; i < HV_UNKOWN; i++) { + if (!uuid_le_cmp(*guid, vmbus_devs[i].guid)) + return i; + } + pr_info("Unknown GUID: %pUl\n", guid); + return i; +} /** * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message @@ -144,6 +259,7 @@ static struct vmbus_channel *alloc_channel(void) return NULL; channel->id = atomic_inc_return(&chan_num); + channel->acquire_ring_lock = true; spin_lock_init(&channel->inbound_lock); spin_lock_init(&channel->lock); @@ -195,6 +311,7 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) vmbus_release_relid(relid); BUG_ON(!channel->rescind); + BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); if (channel->target_cpu != get_cpu()) { put_cpu(); @@ -206,9 +323,7 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) } if (channel->primary_channel == NULL) { - mutex_lock(&vmbus_connection.channel_mutex); list_del(&channel->listentry); - mutex_unlock(&vmbus_connection.channel_mutex); primary_channel = channel; } else { @@ -251,6 +366,8 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) struct vmbus_channel *channel; bool fnew = true; unsigned long flags; + u16 dev_type; + int ret; /* Make sure this is a new offer */ mutex_lock(&vmbus_connection.channel_mutex); @@ -288,7 +405,9 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) goto err_free_chan; } - init_vp_index(newchannel, &newchannel->offermsg.offer.if_type); + dev_type = hv_get_dev_type(&newchannel->offermsg.offer.if_type); + + init_vp_index(newchannel, dev_type); if (newchannel->target_cpu != get_cpu()) { put_cpu(); @@ -325,12 +444,17 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) if (!newchannel->device_obj) goto err_deq_chan; + newchannel->device_obj->device_id = dev_type; /* * Add the new device to the bus. This will kick off device-driver * binding which eventually invokes the device driver's AddDevice() * method. */ - if (vmbus_device_register(newchannel->device_obj) != 0) { + mutex_lock(&vmbus_connection.channel_mutex); + ret = vmbus_device_register(newchannel->device_obj); + mutex_unlock(&vmbus_connection.channel_mutex); + + if (ret != 0) { pr_err("unable to add child device object (relid %d)\n", newchannel->offermsg.child_relid); kfree(newchannel->device_obj); @@ -358,37 +482,6 @@ err_free_chan: free_channel(newchannel); } -enum { - IDE = 0, - SCSI, - FC, - NIC, - ND_NIC, - PCIE, - MAX_PERF_CHN, -}; - -/* - * This is an array of device_ids (device types) that are performance critical. - * We attempt to distribute the interrupt load for these devices across - * all available CPUs. - */ -static const struct hv_vmbus_device_id hp_devs[] = { - /* IDE */ - { HV_IDE_GUID, }, - /* Storage - SCSI */ - { HV_SCSI_GUID, }, - /* Storage - FC */ - { HV_SYNTHFC_GUID, }, - /* Network */ - { HV_NIC_GUID, }, - /* NetworkDirect Guest RDMA */ - { HV_ND_GUID, }, - /* PCI Express Pass Through */ - { HV_PCIE_GUID, }, -}; - - /* * We use this state to statically distribute the channel interrupt load. */ @@ -405,22 +498,15 @@ static int next_numa_node_id; * For pre-win8 hosts or non-performance critical channels we assign the * first CPU in the first NUMA node. */ -static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_guid) +static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) { u32 cur_cpu; - int i; - bool perf_chn = false; + bool perf_chn = vmbus_devs[dev_type].perf_device; struct vmbus_channel *primary = channel->primary_channel; int next_node; struct cpumask available_mask; struct cpumask *alloced_mask; - for (i = IDE; i < MAX_PERF_CHN; i++) { - if (!uuid_le_cmp(*type_guid, hp_devs[i].guid)) { - perf_chn = true; - break; - } - } if ((vmbus_proto_version == VERSION_WS2008) || (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) { /* @@ -469,6 +555,17 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui cpumask_of_node(primary->numa_node)); cur_cpu = -1; + + /* + * Normally Hyper-V host doesn't create more subchannels than there + * are VCPUs on the node but it is possible when not all present VCPUs + * on the node are initialized by guest. Clear the alloced_cpus_in_node + * to start over. + */ + if (cpumask_equal(&primary->alloced_cpus_in_node, + cpumask_of_node(primary->numa_node))) + cpumask_clear(&primary->alloced_cpus_in_node); + while (true) { cur_cpu = cpumask_next(cur_cpu, &available_mask); if (cur_cpu >= nr_cpu_ids) { @@ -498,6 +595,32 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui channel->target_vp = hv_context.vp_index[cur_cpu]; } +static void vmbus_wait_for_unload(void) +{ + int cpu = smp_processor_id(); + void *page_addr = hv_context.synic_message_page[cpu]; + struct hv_message *msg = (struct hv_message *)page_addr + + VMBUS_MESSAGE_SINT; + struct vmbus_channel_message_header *hdr; + bool unloaded = false; + + while (1) { + if (READ_ONCE(msg->header.message_type) == HVMSG_NONE) { + mdelay(10); + continue; + } + + hdr = (struct vmbus_channel_message_header *)msg->u.payload; + if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE) + unloaded = true; + + vmbus_signal_eom(msg); + + if (unloaded) + break; + } +} + /* * vmbus_unload_response - Handler for the unload response. */ @@ -510,7 +633,7 @@ static void vmbus_unload_response(struct vmbus_channel_message_header *hdr) complete(&vmbus_connection.unload_event); } -void vmbus_initiate_unload(void) +void vmbus_initiate_unload(bool crash) { struct vmbus_channel_message_header hdr; @@ -523,7 +646,14 @@ void vmbus_initiate_unload(void) hdr.msgtype = CHANNELMSG_UNLOAD; vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header)); - wait_for_completion(&vmbus_connection.unload_event); + /* + * vmbus_initiate_unload() is also called on crash and the crash can be + * happening in an interrupt context, where scheduling is impossible. + */ + if (!crash) + wait_for_completion(&vmbus_connection.unload_event); + else + vmbus_wait_for_unload(); } /* @@ -592,6 +722,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) struct device *dev; rescind = (struct vmbus_channel_rescind_offer *)hdr; + + mutex_lock(&vmbus_connection.channel_mutex); channel = relid2channel(rescind->child_relid); if (channel == NULL) { @@ -600,7 +732,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) * vmbus_process_offer(), we have already invoked * vmbus_release_relid() on error. */ - return; + goto out; } spin_lock_irqsave(&channel->lock, flags); @@ -608,6 +740,10 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) spin_unlock_irqrestore(&channel->lock, flags); if (channel->device_obj) { + if (channel->chn_rescind_callback) { + channel->chn_rescind_callback(channel); + goto out; + } /* * We will have to unregister this device from the * driver core. @@ -621,7 +757,24 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) hv_process_channel_removal(channel, channel->offermsg.child_relid); } + +out: + mutex_unlock(&vmbus_connection.channel_mutex); +} + +void vmbus_hvsock_device_unregister(struct vmbus_channel *channel) +{ + mutex_lock(&vmbus_connection.channel_mutex); + + BUG_ON(!is_hvsock_channel(channel)); + + channel->rescind = true; + vmbus_device_unregister(channel->device_obj); + + mutex_unlock(&vmbus_connection.channel_mutex); } +EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister); + /* * vmbus_onoffers_delivered - @@ -825,6 +978,10 @@ struct vmbus_channel_message_table_entry {CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response}, {CHANNELMSG_UNLOAD, 0, NULL}, {CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response}, + {CHANNELMSG_18, 0, NULL}, + {CHANNELMSG_19, 0, NULL}, + {CHANNELMSG_20, 0, NULL}, + {CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL}, }; /* @@ -973,3 +1130,10 @@ bool vmbus_are_subchannels_present(struct vmbus_channel *primary) return ret; } EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present); + +void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel, + void (*chn_rescind_cb)(struct vmbus_channel *)) +{ + channel->chn_rescind_callback = chn_rescind_cb; +} +EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback); diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 3dc5a9c7fad6..d02f1373dd98 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -88,8 +88,16 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, * This has been the behavior pre-win8. This is not * perf issue and having all channel messages delivered on CPU 0 * would be ok. + * For post win8 hosts, we support receiving channel messagges on + * all the CPUs. This is needed for kexec to work correctly where + * the CPU attempting to connect may not be CPU 0. */ - msg->target_vcpu = 0; + if (version >= VERSION_WIN8_1) { + msg->target_vcpu = hv_context.vp_index[get_cpu()]; + put_cpu(); + } else { + msg->target_vcpu = 0; + } /* * Add to list before we send the request since we may @@ -236,7 +244,7 @@ void vmbus_disconnect(void) /* * First send the unload request to the host. */ - vmbus_initiate_unload(); + vmbus_initiate_unload(false); if (vmbus_connection.work_queue) { drain_workqueue(vmbus_connection.work_queue); @@ -288,7 +296,8 @@ struct vmbus_channel *relid2channel(u32 relid) struct list_head *cur, *tmp; struct vmbus_channel *cur_sc; - mutex_lock(&vmbus_connection.channel_mutex); + BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); + list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { if (channel->offermsg.child_relid == relid) { found_channel = channel; @@ -307,7 +316,6 @@ struct vmbus_channel *relid2channel(u32 relid) } } } - mutex_unlock(&vmbus_connection.channel_mutex); return found_channel; } @@ -474,7 +482,7 @@ int vmbus_post_msg(void *buffer, size_t buflen) /* * vmbus_set_event - Send an event notification to the parent */ -int vmbus_set_event(struct vmbus_channel *channel) +void vmbus_set_event(struct vmbus_channel *channel) { u32 child_relid = channel->offermsg.child_relid; @@ -485,5 +493,5 @@ int vmbus_set_event(struct vmbus_channel *channel) (child_relid >> 5)); } - return hv_signal_event(channel->sig_event); + hv_do_hypercall(HVCALL_SIGNAL_EVENT, channel->sig_event, NULL); } diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 11bca51ef5ff..a1c086ba3b9a 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -204,6 +204,8 @@ int hv_init(void) sizeof(int) * NR_CPUS); memset(hv_context.event_dpc, 0, sizeof(void *) * NR_CPUS); + memset(hv_context.msg_dpc, 0, + sizeof(void *) * NR_CPUS); memset(hv_context.clk_evt, 0, sizeof(void *) * NR_CPUS); @@ -295,8 +297,14 @@ void hv_cleanup(void) * Cleanup the TSC page based CS. */ if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) { - clocksource_change_rating(&hyperv_cs_tsc, 10); - clocksource_unregister(&hyperv_cs_tsc); + /* + * Crash can happen in an interrupt context and unregistering + * a clocksource is impossible and redundant in this case. + */ + if (!oops_in_progress) { + clocksource_change_rating(&hyperv_cs_tsc, 10); + clocksource_unregister(&hyperv_cs_tsc); + } hypercall_msr.as_uint64 = 0; wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64); @@ -337,22 +345,6 @@ int hv_post_message(union hv_connection_id connection_id, return status & 0xFFFF; } - -/* - * hv_signal_event - - * Signal an event on the specified connection using the hypervisor event IPC. - * - * This involves a hypercall. - */ -int hv_signal_event(void *con_id) -{ - u64 status; - - status = hv_do_hypercall(HVCALL_SIGNAL_EVENT, con_id, NULL); - - return status & 0xFFFF; -} - static int hv_ce_set_next_event(unsigned long delta, struct clock_event_device *evt) { @@ -425,6 +417,13 @@ int hv_synic_alloc(void) } tasklet_init(hv_context.event_dpc[cpu], vmbus_on_event, cpu); + hv_context.msg_dpc[cpu] = kmalloc(size, GFP_ATOMIC); + if (hv_context.msg_dpc[cpu] == NULL) { + pr_err("Unable to allocate event dpc\n"); + goto err; + } + tasklet_init(hv_context.msg_dpc[cpu], vmbus_on_msg_dpc, cpu); + hv_context.clk_evt[cpu] = kzalloc(ced_size, GFP_ATOMIC); if (hv_context.clk_evt[cpu] == NULL) { pr_err("Unable to allocate clock event device\n"); @@ -466,6 +465,7 @@ err: static void hv_synic_free_cpu(int cpu) { kfree(hv_context.event_dpc[cpu]); + kfree(hv_context.msg_dpc[cpu]); kfree(hv_context.clk_evt[cpu]); if (hv_context.synic_event_page[cpu]) free_page((unsigned long)hv_context.synic_event_page[cpu]); diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c index c37a71e13de0..23c70799ad8a 100644 --- a/drivers/hv/hv_fcopy.c +++ b/drivers/hv/hv_fcopy.c @@ -251,7 +251,6 @@ void hv_fcopy_onchannelcallback(void *context) */ fcopy_transaction.recv_len = recvlen; - fcopy_transaction.recv_channel = channel; fcopy_transaction.recv_req_id = requestid; fcopy_transaction.fcopy_msg = fcopy_msg; @@ -317,6 +316,7 @@ static void fcopy_on_reset(void) int hv_fcopy_init(struct hv_util_service *srv) { recv_buffer = srv->recv_buffer; + fcopy_transaction.recv_channel = srv->channel; /* * When this driver loads, the user level daemon that diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index d4ab81bcd515..9b9b370fe22a 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -639,7 +639,6 @@ void hv_kvp_onchannelcallback(void *context) */ kvp_transaction.recv_len = recvlen; - kvp_transaction.recv_channel = channel; kvp_transaction.recv_req_id = requestid; kvp_transaction.kvp_msg = kvp_msg; @@ -688,6 +687,7 @@ int hv_kvp_init(struct hv_util_service *srv) { recv_buffer = srv->recv_buffer; + kvp_transaction.recv_channel = srv->channel; /* * When this driver loads, the user level daemon that diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c index 67def4a831c8..3fba14e88f03 100644 --- a/drivers/hv/hv_snapshot.c +++ b/drivers/hv/hv_snapshot.c @@ -263,7 +263,6 @@ void hv_vss_onchannelcallback(void *context) */ vss_transaction.recv_len = recvlen; - vss_transaction.recv_channel = channel; vss_transaction.recv_req_id = requestid; vss_transaction.msg = (struct hv_vss_msg *)vss_msg; @@ -337,6 +336,7 @@ hv_vss_init(struct hv_util_service *srv) return -ENOTSUPP; } recv_buffer = srv->recv_buffer; + vss_transaction.recv_channel = srv->channel; /* * When this driver loads, the user level daemon that diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c index 7994ec2e4151..d5acaa2d8e61 100644 --- a/drivers/hv/hv_util.c +++ b/drivers/hv/hv_util.c @@ -322,6 +322,7 @@ static int util_probe(struct hv_device *dev, srv->recv_buffer = kmalloc(PAGE_SIZE * 4, GFP_KERNEL); if (!srv->recv_buffer) return -ENOMEM; + srv->channel = dev->channel; if (srv->util_init) { ret = srv->util_init(srv); if (ret) { diff --git a/drivers/hv/hv_utils_transport.c b/drivers/hv/hv_utils_transport.c index 4f42c0e20c20..9a9983fa4531 100644 --- a/drivers/hv/hv_utils_transport.c +++ b/drivers/hv/hv_utils_transport.c @@ -310,6 +310,9 @@ struct hvutil_transport *hvutil_transport_init(const char *name, return hvt; err_free_hvt: + spin_lock(&hvt_list_lock); + list_del(&hvt->list); + spin_unlock(&hvt_list_lock); kfree(hvt); return NULL; } diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 2f8c0f40930b..12321b93a756 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -443,10 +443,11 @@ struct hv_context { u32 vp_index[NR_CPUS]; /* * Starting with win8, we can take channel interrupts on any CPU; - * we will manage the tasklet that handles events on a per CPU + * we will manage the tasklet that handles events messages on a per CPU * basis. */ struct tasklet_struct *event_dpc[NR_CPUS]; + struct tasklet_struct *msg_dpc[NR_CPUS]; /* * To optimize the mapping of relid to channel, maintain * per-cpu list of the channels based on their CPU affinity. @@ -495,8 +496,6 @@ extern int hv_post_message(union hv_connection_id connection_id, enum hv_message_type message_type, void *payload, size_t payload_size); -extern int hv_signal_event(void *con_id); - extern int hv_synic_alloc(void); extern void hv_synic_free(void); @@ -525,7 +524,7 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info); int hv_ringbuffer_write(struct hv_ring_buffer_info *ring_info, struct kvec *kv_list, - u32 kv_count, bool *signal); + u32 kv_count, bool *signal, bool lock); int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, void *buffer, u32 buflen, u32 *buffer_actual_len, @@ -620,6 +619,30 @@ struct vmbus_channel_message_table_entry { extern struct vmbus_channel_message_table_entry channel_message_table[CHANNELMSG_COUNT]; +/* Free the message slot and signal end-of-message if required */ +static inline void vmbus_signal_eom(struct hv_message *msg) +{ + msg->header.message_type = HVMSG_NONE; + + /* + * Make sure the write to MessageType (ie set to + * HVMSG_NONE) happens before we read the + * MessagePending and EOMing. Otherwise, the EOMing + * will not deliver any more messages since there is + * no empty slot + */ + mb(); + + if (msg->header.message_flags.msg_pending) { + /* + * This will cause message queue rescan to + * possibly deliver another msg from the + * hypervisor + */ + wrmsrl(HV_X64_MSR_EOM, 0); + } +} + /* General vmbus interface */ struct hv_device *vmbus_device_create(const uuid_le *type, @@ -644,9 +667,10 @@ void vmbus_disconnect(void); int vmbus_post_msg(void *buffer, size_t buflen); -int vmbus_set_event(struct vmbus_channel *channel); +void vmbus_set_event(struct vmbus_channel *channel); void vmbus_on_event(unsigned long data); +void vmbus_on_msg_dpc(unsigned long data); int hv_kvp_init(struct hv_util_service *); void hv_kvp_deinit(void); @@ -659,7 +683,7 @@ void hv_vss_onchannelcallback(void *); int hv_fcopy_init(struct hv_util_service *); void hv_fcopy_deinit(void); void hv_fcopy_onchannelcallback(void *); -void vmbus_initiate_unload(void); +void vmbus_initiate_unload(bool crash); static inline void hv_poll_channel(struct vmbus_channel *channel, void (*cb)(void *)) diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index b53702ce692f..5613e2b5cff7 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -314,7 +314,7 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info) /* Write to the ring buffer. */ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, - struct kvec *kv_list, u32 kv_count, bool *signal) + struct kvec *kv_list, u32 kv_count, bool *signal, bool lock) { int i = 0; u32 bytes_avail_towrite; @@ -324,14 +324,15 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, u32 next_write_location; u32 old_write; u64 prev_indices = 0; - unsigned long flags; + unsigned long flags = 0; for (i = 0; i < kv_count; i++) totalbytes_towrite += kv_list[i].iov_len; totalbytes_towrite += sizeof(u64); - spin_lock_irqsave(&outring_info->ring_lock, flags); + if (lock) + spin_lock_irqsave(&outring_info->ring_lock, flags); hv_get_ringbuffer_availbytes(outring_info, &bytes_avail_toread, @@ -343,7 +344,8 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, * is empty since the read index == write index. */ if (bytes_avail_towrite <= totalbytes_towrite) { - spin_unlock_irqrestore(&outring_info->ring_lock, flags); + if (lock) + spin_unlock_irqrestore(&outring_info->ring_lock, flags); return -EAGAIN; } @@ -374,7 +376,8 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, hv_set_next_write_location(outring_info, next_write_location); - spin_unlock_irqrestore(&outring_info->ring_lock, flags); + if (lock) + spin_unlock_irqrestore(&outring_info->ring_lock, flags); *signal = hv_need_to_signal(old_write, outring_info); return 0; @@ -388,7 +391,6 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, u32 bytes_avail_toread; u32 next_read_location = 0; u64 prev_indices = 0; - unsigned long flags; struct vmpacket_descriptor desc; u32 offset; u32 packetlen; @@ -397,7 +399,6 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, if (buflen <= 0) return -EINVAL; - spin_lock_irqsave(&inring_info->ring_lock, flags); *buffer_actual_len = 0; *requestid = 0; @@ -412,7 +413,7 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, * No error is set when there is even no header, drivers are * supposed to analyze buffer_actual_len. */ - goto out_unlock; + return ret; } next_read_location = hv_get_next_read_location(inring_info); @@ -425,15 +426,11 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, *buffer_actual_len = packetlen; *requestid = desc.trans_id; - if (bytes_avail_toread < packetlen + offset) { - ret = -EAGAIN; - goto out_unlock; - } + if (bytes_avail_toread < packetlen + offset) + return -EAGAIN; - if (packetlen > buflen) { - ret = -ENOBUFS; - goto out_unlock; - } + if (packetlen > buflen) + return -ENOBUFS; next_read_location = hv_get_next_readlocation_withoffset(inring_info, offset); @@ -460,7 +457,5 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, *signal = hv_need_to_signal_on_read(bytes_avail_towrite, inring_info); -out_unlock: - spin_unlock_irqrestore(&inring_info->ring_lock, flags); return ret; } diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 328e4c3808e0..64713ff47e36 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -45,7 +45,6 @@ static struct acpi_device *hv_acpi_dev; -static struct tasklet_struct msg_dpc; static struct completion probe_event; @@ -477,6 +476,24 @@ static ssize_t channel_vp_mapping_show(struct device *dev, } static DEVICE_ATTR_RO(channel_vp_mapping); +static ssize_t vendor_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + return sprintf(buf, "0x%x\n", hv_dev->vendor_id); +} +static DEVICE_ATTR_RO(vendor); + +static ssize_t device_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + return sprintf(buf, "0x%x\n", hv_dev->device_id); +} +static DEVICE_ATTR_RO(device); + /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */ static struct attribute *vmbus_attrs[] = { &dev_attr_id.attr, @@ -502,6 +519,8 @@ static struct attribute *vmbus_attrs[] = { &dev_attr_in_read_bytes_avail.attr, &dev_attr_in_write_bytes_avail.attr, &dev_attr_channel_vp_mapping.attr, + &dev_attr_vendor.attr, + &dev_attr_device.attr, NULL, }; ATTRIBUTE_GROUPS(vmbus); @@ -562,6 +581,10 @@ static int vmbus_match(struct device *device, struct device_driver *driver) struct hv_driver *drv = drv_to_hv_drv(driver); struct hv_device *hv_dev = device_to_hv_device(device); + /* The hv_sock driver handles all hv_sock offers. */ + if (is_hvsock_channel(hv_dev->channel)) + return drv->hvsock; + if (hv_vmbus_get_id(drv->id_table, &hv_dev->dev_type)) return 1; @@ -685,28 +708,10 @@ static void hv_process_timer_expiration(struct hv_message *msg, int cpu) if (dev->event_handler) dev->event_handler(dev); - msg->header.message_type = HVMSG_NONE; - - /* - * Make sure the write to MessageType (ie set to - * HVMSG_NONE) happens before we read the - * MessagePending and EOMing. Otherwise, the EOMing - * will not deliver any more messages since there is - * no empty slot - */ - mb(); - - if (msg->header.message_flags.msg_pending) { - /* - * This will cause message queue rescan to - * possibly deliver another msg from the - * hypervisor - */ - wrmsrl(HV_X64_MSR_EOM, 0); - } + vmbus_signal_eom(msg); } -static void vmbus_on_msg_dpc(unsigned long data) +void vmbus_on_msg_dpc(unsigned long data) { int cpu = smp_processor_id(); void *page_addr = hv_context.synic_message_page[cpu]; @@ -716,52 +721,32 @@ static void vmbus_on_msg_dpc(unsigned long data) struct vmbus_channel_message_table_entry *entry; struct onmessage_work_context *ctx; - while (1) { - if (msg->header.message_type == HVMSG_NONE) - /* no msg */ - break; + if (msg->header.message_type == HVMSG_NONE) + /* no msg */ + return; - hdr = (struct vmbus_channel_message_header *)msg->u.payload; + hdr = (struct vmbus_channel_message_header *)msg->u.payload; - if (hdr->msgtype >= CHANNELMSG_COUNT) { - WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype); - goto msg_handled; - } + if (hdr->msgtype >= CHANNELMSG_COUNT) { + WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype); + goto msg_handled; + } - entry = &channel_message_table[hdr->msgtype]; - if (entry->handler_type == VMHT_BLOCKING) { - ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC); - if (ctx == NULL) - continue; + entry = &channel_message_table[hdr->msgtype]; + if (entry->handler_type == VMHT_BLOCKING) { + ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC); + if (ctx == NULL) + return; - INIT_WORK(&ctx->work, vmbus_onmessage_work); - memcpy(&ctx->msg, msg, sizeof(*msg)); + INIT_WORK(&ctx->work, vmbus_onmessage_work); + memcpy(&ctx->msg, msg, sizeof(*msg)); - queue_work(vmbus_connection.work_queue, &ctx->work); - } else - entry->message_handler(hdr); + queue_work(vmbus_connection.work_queue, &ctx->work); + } else + entry->message_handler(hdr); msg_handled: - msg->header.message_type = HVMSG_NONE; - - /* - * Make sure the write to MessageType (ie set to - * HVMSG_NONE) happens before we read the - * MessagePending and EOMing. Otherwise, the EOMing - * will not deliver any more messages since there is - * no empty slot - */ - mb(); - - if (msg->header.message_flags.msg_pending) { - /* - * This will cause message queue rescan to - * possibly deliver another msg from the - * hypervisor - */ - wrmsrl(HV_X64_MSR_EOM, 0); - } - } + vmbus_signal_eom(msg); } static void vmbus_isr(void) @@ -814,7 +799,7 @@ static void vmbus_isr(void) if (msg->header.message_type == HVMSG_TIMER_EXPIRED) hv_process_timer_expiration(msg, cpu); else - tasklet_schedule(&msg_dpc); + tasklet_schedule(hv_context.msg_dpc[cpu]); } } @@ -838,8 +823,6 @@ static int vmbus_bus_init(void) return ret; } - tasklet_init(&msg_dpc, vmbus_on_msg_dpc, 0); - ret = bus_register(&hv_bus); if (ret) goto err_cleanup; @@ -957,6 +940,7 @@ struct hv_device *vmbus_device_create(const uuid_le *type, memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le)); memcpy(&child_device_obj->dev_instance, instance, sizeof(uuid_le)); + child_device_obj->vendor_id = 0x1414; /* MSFT vendor ID */ return child_device_obj; @@ -1268,7 +1252,7 @@ static void hv_kexec_handler(void) int cpu; hv_synic_clockevents_cleanup(); - vmbus_initiate_unload(); + vmbus_initiate_unload(false); for_each_online_cpu(cpu) smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1); hv_cleanup(); @@ -1276,7 +1260,7 @@ static void hv_kexec_handler(void) static void hv_crash_handler(struct pt_regs *regs) { - vmbus_initiate_unload(); + vmbus_initiate_unload(true); /* * In crash handler we can't schedule synic cleanup for all CPUs, * doing the cleanup for current CPU only. This should be sufficient @@ -1334,7 +1318,8 @@ static void __exit vmbus_exit(void) hv_synic_clockevents_cleanup(); vmbus_disconnect(); hv_remove_vmbus_irq(); - tasklet_kill(&msg_dpc); + for_each_online_cpu(cpu) + tasklet_kill(hv_context.msg_dpc[cpu]); vmbus_free_channels(); if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { unregister_die_notifier(&hyperv_die_block); |