diff options
Diffstat (limited to 'drivers/net/hyperv/netvsc_drv.c')
-rw-r--r-- | drivers/net/hyperv/netvsc_drv.c | 588 |
1 files changed, 351 insertions, 237 deletions
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index fcab8019dda0..bc05c895d958 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -42,21 +42,11 @@ #define RING_SIZE_MIN 64 #define LINKCHANGE_INT (2 * HZ) -#define NETVSC_HW_FEATURES (NETIF_F_RXCSUM | \ - NETIF_F_SG | \ - NETIF_F_TSO | \ - NETIF_F_TSO6 | \ - NETIF_F_HW_CSUM) - -/* Restrict GSO size to account for NVGRE */ -#define NETVSC_GSO_MAX_SIZE 62768 static int ring_size = 128; module_param(ring_size, int, S_IRUGO); MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); -static int max_num_vrss_chns = 8; - static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN | NETIF_MSG_RX_ERR | @@ -145,7 +135,7 @@ static int netvsc_close(struct net_device *net) while (true) { aread = 0; for (i = 0; i < nvdev->num_chn; i++) { - chn = nvdev->chn_table[i]; + chn = nvdev->chan_table[i].channel; if (!chn) continue; @@ -201,22 +191,41 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size, return ppi; } +/* + * Select queue for transmit. + * + * If a valid queue has already been assigned, then use that. + * Otherwise compute tx queue based on hash and the send table. + * + * This is basically similar to default (__netdev_pick_tx) with the added step + * of using the host send_table when no other queue has been assigned. + * + * TODO support XPS - but get_xps_queue not exported + */ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback) { struct net_device_context *net_device_ctx = netdev_priv(ndev); struct netvsc_device *nvsc_dev = net_device_ctx->nvdev; - u32 hash; - u16 q_idx = 0; + struct sock *sk = skb->sk; + int q_idx = sk_tx_queue_get(sk); - if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1) - return 0; + if (q_idx < 0 || skb->ooo_okay || + q_idx >= ndev->real_num_tx_queues) { + u16 hash = __skb_tx_hash(ndev, skb, VRSS_SEND_TAB_SIZE); + int new_idx; - hash = skb_get_hash(skb); - q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] % - ndev->real_num_tx_queues; + new_idx = nvsc_dev->send_table[hash] + % nvsc_dev->num_chn; + + if (q_idx != new_idx && sk && + sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache)) + sk_tx_queue_set(sk, new_idx); + + q_idx = new_idx; + } - if (!nvsc_dev->chn_table[q_idx]) + if (unlikely(!nvsc_dev->chan_table[q_idx].channel)) q_idx = 0; return q_idx; @@ -323,33 +332,25 @@ static int netvsc_get_slots(struct sk_buff *skb) return slots + frag_slots; } -static u32 get_net_transport_info(struct sk_buff *skb, u32 *trans_off) +static u32 net_checksum_info(struct sk_buff *skb) { - u32 ret_val = TRANSPORT_INFO_NOT_IP; + if (skb->protocol == htons(ETH_P_IP)) { + struct iphdr *ip = ip_hdr(skb); - if ((eth_hdr(skb)->h_proto != htons(ETH_P_IP)) && - (eth_hdr(skb)->h_proto != htons(ETH_P_IPV6))) { - goto not_ip; - } - - *trans_off = skb_transport_offset(skb); - - if ((eth_hdr(skb)->h_proto == htons(ETH_P_IP))) { - struct iphdr *iphdr = ip_hdr(skb); - - if (iphdr->protocol == IPPROTO_TCP) - ret_val = TRANSPORT_INFO_IPV4_TCP; - else if (iphdr->protocol == IPPROTO_UDP) - ret_val = TRANSPORT_INFO_IPV4_UDP; + if (ip->protocol == IPPROTO_TCP) + return TRANSPORT_INFO_IPV4_TCP; + else if (ip->protocol == IPPROTO_UDP) + return TRANSPORT_INFO_IPV4_UDP; } else { - if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) - ret_val = TRANSPORT_INFO_IPV6_TCP; + struct ipv6hdr *ip6 = ipv6_hdr(skb); + + if (ip6->nexthdr == IPPROTO_TCP) + return TRANSPORT_INFO_IPV6_TCP; else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP) - ret_val = TRANSPORT_INFO_IPV6_UDP; + return TRANSPORT_INFO_IPV6_UDP; } -not_ip: - return ret_val; + return TRANSPORT_INFO_NOT_IP; } static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) @@ -362,11 +363,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) struct rndis_packet *rndis_pkt; u32 rndis_msg_size; struct rndis_per_packet_info *ppi; - struct ndis_tcp_ip_checksum_info *csum_info; - int hdr_offset; - u32 net_trans_info; u32 hash; - u32 skb_length; struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT]; struct hv_page_buffer *pb = page_buf; @@ -376,7 +373,6 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) * more pages we try linearizing it. */ - skb_length = skb->len; num_data_pgs = netvsc_get_slots(skb) + 2; if (unlikely(num_data_pgs > MAX_PAGE_BUFFER_COUNT)) { @@ -409,6 +405,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) packet->q_idx = skb_get_queue_mapping(skb); packet->total_data_buflen = skb->len; + packet->total_bytes = skb->len; + packet->total_packets = 1; rndis_msg = (struct rndis_message *)skb->head; @@ -445,13 +443,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) VLAN_PRIO_SHIFT; } - net_trans_info = get_net_transport_info(skb, &hdr_offset); - - /* - * Setup the sendside checksum offload only if this is not a - * GSO packet. - */ - if ((net_trans_info & (INFO_TCP | INFO_UDP)) && skb_is_gso(skb)) { + if (skb_is_gso(skb)) { struct ndis_tcp_lso_info *lso_info; rndis_msg_size += NDIS_LSO_PPI_SIZE; @@ -462,7 +454,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) ppi->ppi_offset); lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; - if (net_trans_info & (INFO_IPV4 << 16)) { + if (skb->protocol == htons(ETH_P_IP)) { lso_info->lso_v2_transmit.ip_version = NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; ip_hdr(skb)->tot_len = 0; @@ -478,10 +470,12 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); } - lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset; + lso_info->lso_v2_transmit.tcp_header_offset = skb_transport_offset(skb); lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { - if (net_trans_info & INFO_TCP) { + if (net_checksum_info(skb) & net_device_ctx->tx_checksum_mask) { + struct ndis_tcp_ip_checksum_info *csum_info; + rndis_msg_size += NDIS_CSUM_PPI_SIZE; ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE, TCPIP_CHKSUM_PKTINFO); @@ -489,15 +483,25 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi + ppi->ppi_offset); - if (net_trans_info & (INFO_IPV4 << 16)) + csum_info->transmit.tcp_header_offset = skb_transport_offset(skb); + + if (skb->protocol == htons(ETH_P_IP)) { csum_info->transmit.is_ipv4 = 1; - else + + if (ip_hdr(skb)->protocol == IPPROTO_TCP) + csum_info->transmit.tcp_checksum = 1; + else + csum_info->transmit.udp_checksum = 1; + } else { csum_info->transmit.is_ipv6 = 1; - csum_info->transmit.tcp_checksum = 1; - csum_info->transmit.tcp_header_offset = hdr_offset; + if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) + csum_info->transmit.tcp_checksum = 1; + else + csum_info->transmit.udp_checksum = 1; + } } else { - /* UDP checksum (and other) offload is not supported. */ + /* Can't do offload of this type of checksum */ if (skb_checksum_help(skb)) goto drop; } @@ -513,15 +517,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) skb_tx_timestamp(skb); ret = netvsc_send(net_device_ctx->device_ctx, packet, rndis_msg, &pb, skb); - if (likely(ret == 0)) { - struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats); - - u64_stats_update_begin(&tx_stats->syncp); - tx_stats->packets++; - tx_stats->bytes += skb_length; - u64_stats_update_end(&tx_stats->syncp); + if (likely(ret == 0)) return NETDEV_TX_OK; - } if (ret == -EAGAIN) { ++net_device_ctx->eth_stats.tx_busy; @@ -541,7 +538,6 @@ no_memory: ++net_device_ctx->eth_stats.tx_no_memory; goto drop; } - /* * netvsc_linkstatus_callback - Link up/down notification */ @@ -593,13 +589,13 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj, } static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, - struct hv_netvsc_packet *packet, - struct ndis_tcp_ip_checksum_info *csum_info, - void *data, u16 vlan_tci) + const struct ndis_tcp_ip_checksum_info *csum_info, + const struct ndis_pkt_8021q_info *vlan, + void *data, u32 buflen) { struct sk_buff *skb; - skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen); + skb = netdev_alloc_skb_ip_align(net, buflen); if (!skb) return skb; @@ -607,8 +603,7 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, * Copy to skb. This copy is needed here since the memory pointed by * hv_netvsc_packet cannot be deallocated */ - memcpy(skb_put(skb, packet->total_data_buflen), data, - packet->total_data_buflen); + memcpy(skb_put(skb, buflen), data, buflen); skb->protocol = eth_type_trans(skb, net); @@ -625,9 +620,12 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, skb->ip_summed = CHECKSUM_UNNECESSARY; } - if (vlan_tci & VLAN_TAG_PRESENT) + if (vlan) { + u16 vlan_tci = vlan->vlanid | (vlan->pri << VLAN_PRIO_SHIFT); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); + } return skb; } @@ -636,18 +634,19 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, * netvsc_recv_callback - Callback when we receive a packet from the * "wire" on the specified device. */ -int netvsc_recv_callback(struct hv_device *device_obj, - struct hv_netvsc_packet *packet, - void **data, - struct ndis_tcp_ip_checksum_info *csum_info, - struct vmbus_channel *channel, - u16 vlan_tci) +int netvsc_recv_callback(struct net_device *net, + struct vmbus_channel *channel, + void *data, u32 len, + const struct ndis_tcp_ip_checksum_info *csum_info, + const struct ndis_pkt_8021q_info *vlan) { - struct net_device *net = hv_get_drvdata(device_obj); struct net_device_context *net_device_ctx = netdev_priv(net); + struct netvsc_device *net_device = net_device_ctx->nvdev; struct net_device *vf_netdev; struct sk_buff *skb; struct netvsc_stats *rx_stats; + u16 q_idx = channel->offermsg.offer.sub_channel_index; + if (net->reg_state != NETREG_REGISTERED) return NVSP_STAT_FAIL; @@ -665,7 +664,7 @@ int netvsc_recv_callback(struct hv_device *device_obj, net = vf_netdev; /* Allocate a skb - TODO direct I/O to pages? */ - skb = netvsc_alloc_recv_skb(net, packet, csum_info, *data, vlan_tci); + skb = netvsc_alloc_recv_skb(net, csum_info, vlan, data, len); if (unlikely(!skb)) { ++net->stats.rx_dropped; rcu_read_unlock(); @@ -673,18 +672,17 @@ int netvsc_recv_callback(struct hv_device *device_obj, } if (net != vf_netdev) - skb_record_rx_queue(skb, - channel->offermsg.offer.sub_channel_index); + skb_record_rx_queue(skb, q_idx); /* * Even if injecting the packet, record the statistics * on the synthetic device because modifying the VF device * statistics will not work correctly. */ - rx_stats = this_cpu_ptr(net_device_ctx->rx_stats); + rx_stats = &net_device->chan_table[q_idx].rx_stats; u64_stats_update_begin(&rx_stats->syncp); rx_stats->packets++; - rx_stats->bytes += packet->total_data_buflen; + rx_stats->bytes += len; if (skb->pkt_type == PACKET_BROADCAST) ++rx_stats->broadcast; @@ -697,7 +695,7 @@ int netvsc_recv_callback(struct hv_device *device_obj, * is done. * TODO - use NAPI? */ - netif_rx(skb); + netif_receive_skb(skb); rcu_read_unlock(); return 0; @@ -722,102 +720,76 @@ static void netvsc_get_channels(struct net_device *net, } } +static int netvsc_set_queues(struct net_device *net, struct hv_device *dev, + u32 num_chn) +{ + struct netvsc_device_info device_info; + int ret; + + memset(&device_info, 0, sizeof(device_info)); + device_info.num_chn = num_chn; + device_info.ring_size = ring_size; + device_info.max_num_vrss_chns = num_chn; + + ret = rndis_filter_device_add(dev, &device_info); + if (ret) + return ret; + + ret = netif_set_real_num_tx_queues(net, num_chn); + if (ret) + return ret; + + ret = netif_set_real_num_rx_queues(net, num_chn); + + return ret; +} + static int netvsc_set_channels(struct net_device *net, struct ethtool_channels *channels) { struct net_device_context *net_device_ctx = netdev_priv(net); struct hv_device *dev = net_device_ctx->device_ctx; struct netvsc_device *nvdev = net_device_ctx->nvdev; - struct netvsc_device_info device_info; - u32 num_chn; - u32 max_chn; - int ret = 0; - bool recovering = false; + unsigned int count = channels->combined_count; + int ret; + + /* We do not support separate count for rx, tx, or other */ + if (count == 0 || + channels->rx_count || channels->tx_count || channels->other_count) + return -EINVAL; + + if (count > net->num_tx_queues || count > net->num_rx_queues) + return -EINVAL; if (net_device_ctx->start_remove || !nvdev || nvdev->destroy) return -ENODEV; - num_chn = nvdev->num_chn; - max_chn = min_t(u32, nvdev->max_chn, num_online_cpus()); - - if (nvdev->nvsp_version < NVSP_PROTOCOL_VERSION_5) { - pr_info("vRSS unsupported before NVSP Version 5\n"); + if (nvdev->nvsp_version < NVSP_PROTOCOL_VERSION_5) return -EINVAL; - } - /* We do not support rx, tx, or other */ - if (!channels || - channels->rx_count || - channels->tx_count || - channels->other_count || - (channels->combined_count < 1)) + if (count > nvdev->max_chn) return -EINVAL; - if (channels->combined_count > max_chn) { - pr_info("combined channels too high, using %d\n", max_chn); - channels->combined_count = max_chn; - } - ret = netvsc_close(net); if (ret) - goto out; + return ret; - do_set: net_device_ctx->start_remove = true; - rndis_filter_device_remove(dev); - - nvdev->num_chn = channels->combined_count; + rndis_filter_device_remove(dev, nvdev); - memset(&device_info, 0, sizeof(device_info)); - device_info.num_chn = nvdev->num_chn; /* passed to RNDIS */ - device_info.ring_size = ring_size; - device_info.max_num_vrss_chns = max_num_vrss_chns; - - ret = rndis_filter_device_add(dev, &device_info); - if (ret) { - if (recovering) { - netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); - return ret; - } - goto recover; - } - - nvdev = net_device_ctx->nvdev; - - ret = netif_set_real_num_tx_queues(net, nvdev->num_chn); - if (ret) { - if (recovering) { - netdev_err(net, "could not set tx queue count (ret %d)\n", ret); - return ret; - } - goto recover; - } - - ret = netif_set_real_num_rx_queues(net, nvdev->num_chn); - if (ret) { - if (recovering) { - netdev_err(net, "could not set rx queue count (ret %d)\n", ret); - return ret; - } - goto recover; - } + ret = netvsc_set_queues(net, dev, count); + if (ret == 0) + nvdev->num_chn = count; + else + netvsc_set_queues(net, dev, nvdev->num_chn); - out: netvsc_open(net); net_device_ctx->start_remove = false; + /* We may have missed link change notifications */ schedule_delayed_work(&net_device_ctx->dwork, 0); return ret; - - recover: - /* If the above failed, we attempt to recover through the same - * process but with the original number of channels. - */ - netdev_err(net, "could not set channels, recovering\n"); - recovering = true; - channels->combined_count = num_chn; - goto do_set; } static bool netvsc_validate_ethtool_ss_cmd(const struct ethtool_cmd *cmd) @@ -878,8 +850,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) struct netvsc_device *nvdev = ndevctx->nvdev; struct hv_device *hdev = ndevctx->device_ctx; struct netvsc_device_info device_info; - u32 num_chn; - int ret = 0; + int ret; if (ndevctx->start_remove || !nvdev || nvdev->destroy) return -ENODEV; @@ -888,17 +859,22 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) if (ret) goto out; - num_chn = nvdev->num_chn; + memset(&device_info, 0, sizeof(device_info)); + device_info.ring_size = ring_size; + device_info.num_chn = nvdev->num_chn; + device_info.max_num_vrss_chns = nvdev->num_chn; ndevctx->start_remove = true; - rndis_filter_device_remove(hdev); + rndis_filter_device_remove(hdev, nvdev); + + /* 'nvdev' has been freed in rndis_filter_device_remove() -> + * netvsc_device_remove () -> free_netvsc_device(). + * We mustn't access it before it's re-created in + * rndis_filter_device_add() -> netvsc_device_add(). + */ ndev->mtu = mtu; - memset(&device_info, 0, sizeof(device_info)); - device_info.ring_size = ring_size; - device_info.num_chn = num_chn; - device_info.max_num_vrss_chns = max_num_vrss_chns; rndis_filter_device_add(hdev, &device_info); out: @@ -911,47 +887,50 @@ out: return ret; } -static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net, - struct rtnl_link_stats64 *t) +static void netvsc_get_stats64(struct net_device *net, + struct rtnl_link_stats64 *t) { struct net_device_context *ndev_ctx = netdev_priv(net); - int cpu; - - for_each_possible_cpu(cpu) { - struct netvsc_stats *tx_stats = per_cpu_ptr(ndev_ctx->tx_stats, - cpu); - struct netvsc_stats *rx_stats = per_cpu_ptr(ndev_ctx->rx_stats, - cpu); - u64 tx_packets, tx_bytes, rx_packets, rx_bytes, rx_multicast; + struct netvsc_device *nvdev = ndev_ctx->nvdev; + int i; + + if (!nvdev) + return; + + for (i = 0; i < nvdev->num_chn; i++) { + const struct netvsc_channel *nvchan = &nvdev->chan_table[i]; + const struct netvsc_stats *stats; + u64 packets, bytes, multicast; unsigned int start; + stats = &nvchan->tx_stats; do { - start = u64_stats_fetch_begin_irq(&tx_stats->syncp); - tx_packets = tx_stats->packets; - tx_bytes = tx_stats->bytes; - } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); + start = u64_stats_fetch_begin_irq(&stats->syncp); + packets = stats->packets; + bytes = stats->bytes; + } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + + t->tx_bytes += bytes; + t->tx_packets += packets; + stats = &nvchan->rx_stats; do { - start = u64_stats_fetch_begin_irq(&rx_stats->syncp); - rx_packets = rx_stats->packets; - rx_bytes = rx_stats->bytes; - rx_multicast = rx_stats->multicast + rx_stats->broadcast; - } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); - - t->tx_bytes += tx_bytes; - t->tx_packets += tx_packets; - t->rx_bytes += rx_bytes; - t->rx_packets += rx_packets; - t->multicast += rx_multicast; + start = u64_stats_fetch_begin_irq(&stats->syncp); + packets = stats->packets; + bytes = stats->bytes; + multicast = stats->multicast + stats->broadcast; + } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + + t->rx_bytes += bytes; + t->rx_packets += packets; + t->multicast += multicast; } t->tx_dropped = net->stats.tx_dropped; - t->tx_errors = net->stats.tx_dropped; + t->tx_errors = net->stats.tx_errors; t->rx_dropped = net->stats.rx_dropped; t->rx_errors = net->stats.rx_errors; - - return t; } static int netvsc_set_mac_addr(struct net_device *ndev, void *p) @@ -989,11 +968,19 @@ static const struct { { "tx_busy", offsetof(struct netvsc_ethtool_stats, tx_busy) }, }; +#define NETVSC_GLOBAL_STATS_LEN ARRAY_SIZE(netvsc_stats) + +/* 4 statistics per queue (rx/tx packets/bytes) */ +#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4) + static int netvsc_get_sset_count(struct net_device *dev, int string_set) { + struct net_device_context *ndc = netdev_priv(dev); + struct netvsc_device *nvdev = ndc->nvdev; + switch (string_set) { case ETH_SS_STATS: - return ARRAY_SIZE(netvsc_stats); + return NETVSC_GLOBAL_STATS_LEN + NETVSC_QUEUE_STATS_LEN(nvdev); default: return -EINVAL; } @@ -1003,26 +990,109 @@ static void netvsc_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct net_device_context *ndc = netdev_priv(dev); + struct netvsc_device *nvdev = ndc->nvdev; const void *nds = &ndc->eth_stats; - int i; + const struct netvsc_stats *qstats; + unsigned int start; + u64 packets, bytes; + int i, j; - for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++) + for (i = 0; i < NETVSC_GLOBAL_STATS_LEN; i++) data[i] = *(unsigned long *)(nds + netvsc_stats[i].offset); + + for (j = 0; j < nvdev->num_chn; j++) { + qstats = &nvdev->chan_table[j].tx_stats; + + do { + start = u64_stats_fetch_begin_irq(&qstats->syncp); + packets = qstats->packets; + bytes = qstats->bytes; + } while (u64_stats_fetch_retry_irq(&qstats->syncp, start)); + data[i++] = packets; + data[i++] = bytes; + + qstats = &nvdev->chan_table[j].rx_stats; + do { + start = u64_stats_fetch_begin_irq(&qstats->syncp); + packets = qstats->packets; + bytes = qstats->bytes; + } while (u64_stats_fetch_retry_irq(&qstats->syncp, start)); + data[i++] = packets; + data[i++] = bytes; + } } static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data) { + struct net_device_context *ndc = netdev_priv(dev); + struct netvsc_device *nvdev = ndc->nvdev; + u8 *p = data; int i; switch (stringset) { case ETH_SS_STATS: for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++) - memcpy(data + i * ETH_GSTRING_LEN, + memcpy(p + i * ETH_GSTRING_LEN, netvsc_stats[i].name, ETH_GSTRING_LEN); + + p += i * ETH_GSTRING_LEN; + for (i = 0; i < nvdev->num_chn; i++) { + sprintf(p, "tx_queue_%u_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_queue_%u_bytes", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_bytes", i); + p += ETH_GSTRING_LEN; + } + break; } } +static int +netvsc_get_rss_hash_opts(struct netvsc_device *nvdev, + struct ethtool_rxnfc *info) +{ + info->data = RXH_IP_SRC | RXH_IP_DST; + + switch (info->flow_type) { + case TCP_V4_FLOW: + case TCP_V6_FLOW: + info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + /* fallthrough */ + case UDP_V4_FLOW: + case UDP_V6_FLOW: + case IPV4_FLOW: + case IPV6_FLOW: + break; + default: + info->data = 0; + break; + } + + return 0; +} + +static int +netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, + u32 *rules) +{ + struct net_device_context *ndc = netdev_priv(dev); + struct netvsc_device *nvdev = ndc->nvdev; + + switch (info->cmd) { + case ETHTOOL_GRXRINGS: + info->data = nvdev->num_chn; + return 0; + + case ETHTOOL_GRXFH: + return netvsc_get_rss_hash_opts(nvdev, info); + } + return -EOPNOTSUPP; +} + #ifdef CONFIG_NET_POLL_CONTROLLER static void netvsc_poll_controller(struct net_device *net) { @@ -1032,6 +1102,68 @@ static void netvsc_poll_controller(struct net_device *net) } #endif +static u32 netvsc_get_rxfh_key_size(struct net_device *dev) +{ + return NETVSC_HASH_KEYLEN; +} + +static u32 netvsc_rss_indir_size(struct net_device *dev) +{ + return ITAB_NUM; +} + +static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct net_device_context *ndc = netdev_priv(dev); + struct netvsc_device *ndev = ndc->nvdev; + struct rndis_device *rndis_dev = ndev->extension; + int i; + + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */ + + if (indir) { + for (i = 0; i < ITAB_NUM; i++) + indir[i] = rndis_dev->ind_table[i]; + } + + if (key) + memcpy(key, rndis_dev->rss_key, NETVSC_HASH_KEYLEN); + + return 0; +} + +static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct net_device_context *ndc = netdev_priv(dev); + struct netvsc_device *ndev = ndc->nvdev; + struct rndis_device *rndis_dev = ndev->extension; + int i; + + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + + if (indir) { + for (i = 0; i < ITAB_NUM; i++) + if (indir[i] >= dev->num_rx_queues) + return -EINVAL; + + for (i = 0; i < ITAB_NUM; i++) + rndis_dev->ind_table[i] = indir[i]; + } + + if (!key) { + if (!indir) + return 0; + + key = rndis_dev->rss_key; + } + + return rndis_filter_set_rss_param(rndis_dev, key, ndev->num_chn); +} + static const struct ethtool_ops ethtool_ops = { .get_drvinfo = netvsc_get_drvinfo, .get_link = ethtool_op_get_link, @@ -1043,6 +1175,11 @@ static const struct ethtool_ops ethtool_ops = { .get_ts_info = ethtool_op_get_ts_info, .get_settings = netvsc_get_settings, .set_settings = netvsc_set_settings, + .get_rxnfc = netvsc_get_rxnfc, + .get_rxfh_key_size = netvsc_get_rxfh_key_size, + .get_rxfh_indir_size = netvsc_rss_indir_size, + .get_rxfh = netvsc_get_rxfh, + .set_rxfh = netvsc_set_rxfh, }; static const struct net_device_ops device_ops = { @@ -1163,15 +1300,6 @@ out_unlock: rtnl_unlock(); } -static void netvsc_free_netdev(struct net_device *netdev) -{ - struct net_device_context *net_device_ctx = netdev_priv(netdev); - - free_percpu(net_device_ctx->tx_stats); - free_percpu(net_device_ctx->rx_stats); - free_netdev(netdev); -} - static struct net_device *get_netvsc_bymac(const u8 *mac) { struct net_device *dev; @@ -1308,7 +1436,6 @@ static int netvsc_vf_down(struct net_device *vf_netdev) static int netvsc_unregister_vf(struct net_device *vf_netdev) { struct net_device *ndev; - struct netvsc_device *netvsc_dev; struct net_device_context *net_device_ctx; ndev = get_netvsc_byref(vf_netdev); @@ -1316,7 +1443,6 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) return NOTIFY_DONE; net_device_ctx = netdev_priv(ndev); - netvsc_dev = net_device_ctx->nvdev; netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); @@ -1336,7 +1462,7 @@ static int netvsc_probe(struct hv_device *dev, int ret; net = alloc_etherdev_mq(sizeof(struct net_device_context), - num_online_cpus()); + VRSS_CHANNEL_MAX); if (!net) return -ENOMEM; @@ -1351,18 +1477,6 @@ static int netvsc_probe(struct hv_device *dev, netdev_dbg(net, "netvsc msg_enable: %d\n", net_device_ctx->msg_enable); - net_device_ctx->tx_stats = netdev_alloc_pcpu_stats(struct netvsc_stats); - if (!net_device_ctx->tx_stats) { - free_netdev(net); - return -ENOMEM; - } - net_device_ctx->rx_stats = netdev_alloc_pcpu_stats(struct netvsc_stats); - if (!net_device_ctx->rx_stats) { - free_percpu(net_device_ctx->tx_stats); - free_netdev(net); - return -ENOMEM; - } - hv_set_drvdata(dev, net); net_device_ctx->start_remove = false; @@ -1374,10 +1488,6 @@ static int netvsc_probe(struct hv_device *dev, INIT_LIST_HEAD(&net_device_ctx->reconfig_events); net->netdev_ops = &device_ops; - - net->hw_features = NETVSC_HW_FEATURES; - net->features = NETVSC_HW_FEATURES | NETIF_F_HW_VLAN_CTAG_TX; - net->ethtool_ops = ðtool_ops; SET_NETDEV_DEV(net, &dev->device); @@ -1387,20 +1497,26 @@ static int netvsc_probe(struct hv_device *dev, /* Notify the netvsc driver of the new device */ memset(&device_info, 0, sizeof(device_info)); device_info.ring_size = ring_size; - device_info.max_num_vrss_chns = max_num_vrss_chns; + device_info.max_num_vrss_chns = min_t(u32, VRSS_CHANNEL_DEFAULT, + num_online_cpus()); ret = rndis_filter_device_add(dev, &device_info); if (ret != 0) { netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); - netvsc_free_netdev(net); + free_netdev(net); hv_set_drvdata(dev, NULL); return ret; } memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); + /* hw_features computed in rndis_filter_device_add */ + net->features = net->hw_features | + NETIF_F_HIGHDMA | NETIF_F_SG | + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; + net->vlan_features = net->features; + nvdev = net_device_ctx->nvdev; netif_set_real_num_tx_queues(net, nvdev->num_chn); netif_set_real_num_rx_queues(net, nvdev->num_chn); - netif_set_gso_max_size(net, NETVSC_GSO_MAX_SIZE); /* MTU range: 68 - 1500 or 65521 */ net->min_mtu = NETVSC_MTU_MIN; @@ -1412,8 +1528,8 @@ static int netvsc_probe(struct hv_device *dev, ret = register_netdev(net); if (ret != 0) { pr_err("Unable to register netdev.\n"); - rndis_filter_device_remove(dev); - netvsc_free_netdev(net); + rndis_filter_device_remove(dev, nvdev); + free_netdev(net); } return ret; @@ -1423,7 +1539,6 @@ static int netvsc_remove(struct hv_device *dev) { struct net_device *net; struct net_device_context *ndev_ctx; - struct netvsc_device *net_device; net = hv_get_drvdata(dev); @@ -1433,7 +1548,6 @@ static int netvsc_remove(struct hv_device *dev) } ndev_ctx = netdev_priv(net); - net_device = ndev_ctx->nvdev; /* Avoid racing with netvsc_change_mtu()/netvsc_set_channels() * removing the device. @@ -1454,11 +1568,11 @@ static int netvsc_remove(struct hv_device *dev) * Call to the vsc driver to let it know that the device is being * removed */ - rndis_filter_device_remove(dev); + rndis_filter_device_remove(dev, ndev_ctx->nvdev); hv_set_drvdata(dev, NULL); - netvsc_free_netdev(net); + free_netdev(net); return 0; } @@ -1498,7 +1612,7 @@ static int netvsc_netdev_event(struct notifier_block *this, return NOTIFY_DONE; /* Avoid Vlan dev with same MAC registering as VF */ - if (event_dev->priv_flags & IFF_802_1Q_VLAN) + if (is_vlan_dev(event_dev)) return NOTIFY_DONE; /* Avoid Bonding master dev with same MAC registering as VF */ |