summaryrefslogtreecommitdiffstats
path: root/drivers/net/tun.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-25 20:17:34 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-25 20:17:34 +0100
commit4ba9920e5e9c0e16b5ed24292d45322907bb9035 (patch)
tree7d023baea59ed0886ded1f0b6d1c6385690b88f7 /drivers/net/tun.c
parentMerge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/ke... (diff)
parentnet/cxgb4: Fix referencing freed adapter (diff)
downloadlinux-4ba9920e5e9c0e16b5ed24292d45322907bb9035.tar.xz
linux-4ba9920e5e9c0e16b5ed24292d45322907bb9035.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) BPF debugger and asm tool by Daniel Borkmann. 2) Speed up create/bind in AF_PACKET, also from Daniel Borkmann. 3) Correct reciprocal_divide and update users, from Hannes Frederic Sowa and Daniel Borkmann. 4) Currently we only have a "set" operation for the hw timestamp socket ioctl, add a "get" operation to match. From Ben Hutchings. 5) Add better trace events for debugging driver datapath problems, also from Ben Hutchings. 6) Implement auto corking in TCP, from Eric Dumazet. Basically, if we have a small send and a previous packet is already in the qdisc or device queue, defer until TX completion or we get more data. 7) Allow userspace to manage ipv6 temporary addresses, from Jiri Pirko. 8) Add a qdisc bypass option for AF_PACKET sockets, from Daniel Borkmann. 9) Share IP header compression code between Bluetooth and IEEE802154 layers, from Jukka Rissanen. 10) Fix ipv6 router reachability probing, from Jiri Benc. 11) Allow packets to be captured on macvtap devices, from Vlad Yasevich. 12) Support tunneling in GRO layer, from Jerry Chu. 13) Allow bonding to be configured fully using netlink, from Scott Feldman. 14) Allow AF_PACKET users to obtain the VLAN TPID, just like they can already get the TCI. From Atzm Watanabe. 15) New "Heavy Hitter" qdisc, from Terry Lam. 16) Significantly improve the IPSEC support in pktgen, from Fan Du. 17) Allow ipv4 tunnels to cache routes, just like sockets. From Tom Herbert. 18) Add Proportional Integral Enhanced packet scheduler, from Vijay Subramanian. 19) Allow openvswitch to mmap'd netlink, from Thomas Graf. 20) Key TCP metrics blobs also by source address, not just destination address. From Christoph Paasch. 21) Support 10G in generic phylib. From Andy Fleming. 22) Try to short-circuit GRO flow compares using device provided RX hash, if provided. From Tom Herbert. The wireless and netfilter folks have been busy little bees too. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2064 commits) net/cxgb4: Fix referencing freed adapter ipv6: reallocate addrconf router for ipv6 address when lo device up fib_frontend: fix possible NULL pointer dereference rtnetlink: remove IFLA_BOND_SLAVE definition rtnetlink: remove check for fill_slave_info in rtnl_have_link_slave_info qlcnic: update version to 5.3.55 qlcnic: Enhance logic to calculate msix vectors. qlcnic: Refactor interrupt coalescing code for all adapters. qlcnic: Update poll controller code path qlcnic: Interrupt code cleanup qlcnic: Enhance Tx timeout debugging. qlcnic: Use bool for rx_mac_learn. bonding: fix u64 division rtnetlink: add missing IFLA_BOND_AD_INFO_UNSPEC sfc: Use the correct maximum TX DMA ring size for SFC9100 Add Shradha Shah as the sfc driver maintainer. net/vxlan: Share RX skb de-marking and checksum checks with ovs tulip: cleanup by using ARRAY_SIZE() ip_tunnel: clear IPCB in ip_tunnel_xmit() in case dst_link_failure() is called net/cxgb4: Don't retrieve stats during recovery ...
Diffstat (limited to 'drivers/net/tun.c')
-rw-r--r--drivers/net/tun.c68
1 files changed, 51 insertions, 17 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index ecec8029c5e8..bcf01af4b879 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -110,7 +110,7 @@ struct tap_filter {
unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN];
};
-/* DEFAULT_MAX_NUM_RSS_QUEUES were choosed to let the rx/tx queues allocated for
+/* DEFAULT_MAX_NUM_RSS_QUEUES were chosen to let the rx/tx queues allocated for
* the netdevice to be fit in one page. So we can make sure the success of
* memory allocation. TODO: increase the limit. */
#define MAX_TAP_QUEUES DEFAULT_MAX_NUM_RSS_QUEUES
@@ -119,7 +119,7 @@ struct tap_filter {
#define TUN_FLOW_EXPIRE (3 * HZ)
/* A tun_file connects an open character device to a tuntap netdevice. It
- * also contains all socket related strctures (except sock_fprog and tap_filter)
+ * also contains all socket related structures (except sock_fprog and tap_filter)
* to serve as one transmit queue for tuntap device. The sock_fprog and
* tap_filter were kept in tun_struct since they were used for filtering for the
* netdevice not for a specific queue (at least I didn't see the requirement for
@@ -152,6 +152,7 @@ struct tun_flow_entry {
struct tun_struct *tun;
u32 rxhash;
+ u32 rps_rxhash;
int queue_index;
unsigned long updated;
};
@@ -220,6 +221,7 @@ static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun,
rxhash, queue_index);
e->updated = jiffies;
e->rxhash = rxhash;
+ e->rps_rxhash = 0;
e->queue_index = queue_index;
e->tun = tun;
hlist_add_head_rcu(&e->hash_link, head);
@@ -232,6 +234,7 @@ static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e)
{
tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n",
e->rxhash, e->queue_index);
+ sock_rps_reset_flow_hash(e->rps_rxhash);
hlist_del_rcu(&e->hash_link);
kfree_rcu(e, rcu);
--tun->flow_count;
@@ -325,6 +328,7 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
/* TODO: keep queueing to old queue until it's empty? */
e->queue_index = queue_index;
e->updated = jiffies;
+ sock_rps_record_flow_hash(e->rps_rxhash);
} else {
spin_lock_bh(&tun->lock);
if (!tun_flow_find(head, rxhash) &&
@@ -341,8 +345,20 @@ unlock:
rcu_read_unlock();
}
+/**
+ * Save the hash received in the stack receive path and update the
+ * flow_hash table accordingly.
+ */
+static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
+{
+ if (unlikely(e->rps_rxhash != hash)) {
+ sock_rps_reset_flow_hash(e->rps_rxhash);
+ e->rps_rxhash = hash;
+ }
+}
+
/* We try to identify a flow through its rxhash first. The reason that
- * we do not check rxq no. is becuase some cards(e.g 82599), chooses
+ * we do not check rxq no. is because some cards(e.g 82599), chooses
* the rxq based on the txq where the last packet of the flow comes. As
* the userspace application move between processors, we may get a
* different rxq no. here. If we could not get rxhash, then we would
@@ -359,12 +375,13 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
rcu_read_lock();
numqueues = ACCESS_ONCE(tun->numqueues);
- txq = skb_get_rxhash(skb);
+ txq = skb_get_hash(skb);
if (txq) {
e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
- if (e)
+ if (e) {
+ tun_flow_save_rps_rxhash(e, txq);
txq = e->queue_index;
- else
+ } else
/* use multiply and shift instead of expensive divide */
txq = ((u64)txq * numqueues) >> 32;
} else if (likely(skb_rx_queue_recorded(skb))) {
@@ -532,7 +549,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte
err = 0;
- /* Re-attach the filter to presist device */
+ /* Re-attach the filter to persist device */
if (!skip_filter && (tun->filter_attached == true)) {
err = sk_attach_filter(&tun->fprog, tfile->socket.sk);
if (!err)
@@ -721,14 +738,32 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
struct tun_struct *tun = netdev_priv(dev);
int txq = skb->queue_mapping;
struct tun_file *tfile;
+ u32 numqueues = 0;
rcu_read_lock();
tfile = rcu_dereference(tun->tfiles[txq]);
+ numqueues = ACCESS_ONCE(tun->numqueues);
/* Drop packet if interface is not attached */
- if (txq >= tun->numqueues)
+ if (txq >= numqueues)
goto drop;
+ if (numqueues == 1) {
+ /* Select queue was not called for the skbuff, so we extract the
+ * RPS hash and save it into the flow_table here.
+ */
+ __u32 rxhash;
+
+ rxhash = skb_get_hash(skb);
+ if (rxhash) {
+ struct tun_flow_entry *e;
+ e = tun_flow_find(&tun->flows[tun_hashfn(rxhash)],
+ rxhash);
+ if (e)
+ tun_flow_save_rps_rxhash(e, rxhash);
+ }
+ }
+
tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
BUG_ON(!tfile);
@@ -746,8 +781,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
/* Limit the number of packets queued by dividing txq length with the
* number of queues.
*/
- if (skb_queue_len(&tfile->socket.sk->sk_receive_queue)
- >= dev->tx_queue_len / tun->numqueues)
+ if (skb_queue_len(&tfile->socket.sk->sk_receive_queue) * numqueues
+ >= dev->tx_queue_len)
goto drop;
if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
@@ -820,9 +855,9 @@ static void tun_poll_controller(struct net_device *dev)
* Tun only receives frames when:
* 1) the char device endpoint gets data from user space
* 2) the tun socket gets a sendmsg call from user space
- * Since both of those are syncronous operations, we are guaranteed
+ * Since both of those are synchronous operations, we are guaranteed
* never to have pending data when we poll for it
- * so theres nothing to do here but return.
+ * so there is nothing to do here but return.
* We need this though so netpoll recognizes us as an interface that
* supports polling, which enables bridge devices in virt setups to
* still use netconsole
@@ -1147,7 +1182,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
skb_reset_network_header(skb);
skb_probe_transport_header(skb, 0);
- rxhash = skb_get_rxhash(skb);
+ rxhash = skb_get_hash(skb);
netif_rx_ni(skb);
tun->dev->stats.rx_packets++;
@@ -1292,8 +1327,7 @@ done:
}
static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
- struct kiocb *iocb, const struct iovec *iv,
- ssize_t len, int noblock)
+ const struct iovec *iv, ssize_t len, int noblock)
{
DECLARE_WAITQUEUE(wait, current);
struct sk_buff *skb;
@@ -1356,7 +1390,7 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
goto out;
}
- ret = tun_do_read(tun, tfile, iocb, iv, len,
+ ret = tun_do_read(tun, tfile, iv, len,
file->f_flags & O_NONBLOCK);
ret = min_t(ssize_t, ret, len);
if (ret > 0)
@@ -1457,7 +1491,7 @@ static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
SOL_PACKET, TUN_TX_TIMESTAMP);
goto out;
}
- ret = tun_do_read(tun, tfile, iocb, m->msg_iov, total_len,
+ ret = tun_do_read(tun, tfile, m->msg_iov, total_len,
flags & MSG_DONTWAIT);
if (ret > total_len) {
m->msg_flags |= MSG_TRUNC;