diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-06-22 07:23:35 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-06-22 07:23:35 +0200 |
commit | c356dc4b540edd6c02b409dd8cf3208ba2804c38 (patch) | |
tree | 457d971da033bfb11c85aaee260d9811937fa2c4 /net | |
parent | Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma (diff) | |
parent | tcp: refine memory limit test in tcp_fragment() (diff) | |
download | linux-c356dc4b540edd6c02b409dd8cf3208ba2804c38.tar.xz linux-c356dc4b540edd6c02b409dd8cf3208ba2804c38.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller:
1) Fix leak of unqueued fragments in ipv6 nf_defrag, from Guillaume
Nault.
2) Don't access the DDM interface unless the transceiver implements it
in bnx2x, from Mauro S. M. Rodrigues.
3) Don't double fetch 'len' from userspace in sock_getsockopt(), from
JingYi Hou.
4) Sign extension overflow in lio_core, from Colin Ian King.
5) Various netem bug fixes wrt. corrupted packets from Jakub Kicinski.
6) Fix epollout hang in hvsock, from Sunil Muthuswamy.
7) Fix regression in default fib6_type, from David Ahern.
8) Handle memory limits in tcp_fragment more appropriately, from Eric
Dumazet.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (24 commits)
tcp: refine memory limit test in tcp_fragment()
inet: clear num_timeout reqsk_alloc()
net: mvpp2: debugfs: Add pmap to fs dump
ipv6: Default fib6_type to RTN_UNICAST when not set
net: hns3: Fix inconsistent indenting
net/af_iucv: always register net_device notifier
net/af_iucv: build proper skbs for HiperTransport
net/af_iucv: remove GFP_DMA restriction for HiperTransport
net: dsa: mv88e6xxx: fix shift of FID bits in mv88e6185_g1_vtu_loadpurge()
hvsock: fix epollout hang from race condition
net/udp_gso: Allow TX timestamp with UDP GSO
net: netem: fix use after free and double free with packet corruption
net: netem: fix backlog accounting for corrupted GSO frames
net: lio_core: fix potential sign-extension overflow on large shift
tipc: pass tunnel dev as NULL to udp_tunnel(6)_xmit_skb
ip6_tunnel: allow not to count pkts on tstats by passing dev as NULL
ip_tunnel: allow not to count pkts on tstats by setting skb's dev to NULL
tun: wake up waitqueues after IFF_UP is set
net: remove duplicate fetch in sock_getsockopt
tipc: fix issues with early FAILOVER_MSG from peer
...
Diffstat (limited to 'net')
-rw-r--r-- | net/core/sock.c | 3 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 4 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel_core.c | 9 | ||||
-rw-r--r-- | net/ipv4/tcp_fastopen.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 3 | ||||
-rw-r--r-- | net/ipv4/udp_offload.c | 5 | ||||
-rw-r--r-- | net/ipv6/netfilter/nf_conntrack_reasm.c | 22 | ||||
-rw-r--r-- | net/ipv6/route.c | 2 | ||||
-rw-r--r-- | net/iucv/af_iucv.c | 49 | ||||
-rw-r--r-- | net/netfilter/nft_masq.c | 3 | ||||
-rw-r--r-- | net/netfilter/nft_redir.c | 3 | ||||
-rw-r--r-- | net/sched/sch_netem.c | 26 | ||||
-rw-r--r-- | net/tipc/link.c | 1 | ||||
-rw-r--r-- | net/tipc/node.c | 10 | ||||
-rw-r--r-- | net/tipc/udp_media.c | 8 | ||||
-rw-r--r-- | net/vmw_vsock/hyperv_transport.c | 39 |
16 files changed, 96 insertions, 95 deletions
diff --git a/net/core/sock.c b/net/core/sock.c index af09a23e4822..aa4a00d381e3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1477,9 +1477,6 @@ int sock_getsockopt(struct socket *sock, int level, int optname, { u32 meminfo[SK_MEMINFO_VARS]; - if (get_user(len, optlen)) - return -EFAULT; - sk_get_meminfo(sk, meminfo); len = min_t(unsigned int, len, sizeof(meminfo)); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 13ec7c3a9c49..7fd6db3fe366 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -752,10 +752,6 @@ drop: static void reqsk_queue_hash_req(struct request_sock *req, unsigned long timeout) { - req->num_retrans = 0; - req->num_timeout = 0; - req->sk = NULL; - timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED); mod_timer(&req->rsk_timer, jiffies + timeout); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 9e3846388fb3..1452a97914a0 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -76,9 +76,12 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1); err = ip_local_out(net, sk, skb); - if (unlikely(net_xmit_eval(err))) - pkt_len = 0; - iptunnel_xmit_stats(dev, pkt_len); + + if (dev) { + if (unlikely(net_xmit_eval(err))) + pkt_len = 0; + iptunnel_xmit_stats(dev, pkt_len); + } } EXPORT_SYMBOL_GPL(iptunnel_xmit); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 018a48477355..f5a45e1e1182 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -221,10 +221,6 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, struct sock *child; bool own_req; - req->num_retrans = 0; - req->num_timeout = 0; - req->sk = NULL; - child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, NULL, &own_req); if (!child) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 00c01a01b547..0ebc33d1c9e5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1296,7 +1296,8 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, if (nsize < 0) nsize = 0; - if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) { + if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf && + tcp_queue != TCP_FRAG_IN_WRITE_QUEUE)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG); return -ENOMEM; } diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 06b3e2c1fcdc..9763464a75d7 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -224,6 +224,11 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, seg = segs; uh = udp_hdr(seg); + /* preserve TX timestamp flags and TS key for first segment */ + skb_shinfo(seg)->tskey = skb_shinfo(gso_skb)->tskey; + skb_shinfo(seg)->tx_flags |= + (skb_shinfo(gso_skb)->tx_flags & SKBTX_ANY_TSTAMP); + /* compute checksum adjustment based on old length versus new */ newlen = htons(sizeof(*uh) + mss); check = csum16_add(csum16_sub(uh->check, uh->len), newlen); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 6f3abbb9e093..84322ce81d70 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -261,8 +261,14 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, prev = fq->q.fragments_tail; err = inet_frag_queue_insert(&fq->q, skb, offset, end); - if (err) + if (err) { + if (err == IPFRAG_DUP) { + /* No error for duplicates, pretend they got queued. */ + kfree_skb(skb); + return -EINPROGRESS; + } goto insert_error; + } if (dev) fq->iif = dev->ifindex; @@ -289,15 +295,17 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, skb->_skb_refdst = 0UL; err = nf_ct_frag6_reasm(fq, skb, prev, dev); skb->_skb_refdst = orefdst; - return err; + + /* After queue has assumed skb ownership, only 0 or + * -EINPROGRESS must be returned. + */ + return err ? -EINPROGRESS : 0; } skb_dst_drop(skb); return -EINPROGRESS; insert_error: - if (err == IPFRAG_DUP) - goto err; inet_frag_kill(&fq->q); err: skb_dst_drop(skb); @@ -476,12 +484,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) ret = 0; } - /* after queue has assumed skb ownership, only 0 or -EINPROGRESS - * must be returned. - */ - if (ret) - ret = -EINPROGRESS; - spin_unlock_bh(&fq->q.lock); inet_frag_put(&fq->q); return ret; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0f60eb3a2873..11ad62effd56 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3184,7 +3184,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, rt->fib6_table = table; rt->fib6_metric = cfg->fc_metric; - rt->fib6_type = cfg->fc_type; + rt->fib6_type = cfg->fc_type ? : RTN_UNICAST; rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY; ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 36eb8d1d9128..09e1694b6d34 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -14,6 +14,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/module.h> +#include <linux/netdevice.h> #include <linux/types.h> #include <linux/list.h> #include <linux/errno.h> @@ -347,14 +348,14 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, if (imsg) memcpy(&phs_hdr->iucv_hdr, imsg, sizeof(struct iucv_message)); - skb_push(skb, ETH_HLEN); - memset(skb->data, 0, ETH_HLEN); - skb->dev = iucv->hs_dev; if (!skb->dev) { err = -ENODEV; goto err_free; } + + dev_hard_header(skb, skb->dev, ETH_P_AF_IUCV, NULL, NULL, skb->len); + if (!(skb->dev->flags & IFF_UP) || !netif_carrier_ok(skb->dev)) { err = -ENETDOWN; goto err_free; @@ -367,6 +368,8 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, skb_trim(skb, skb->dev->mtu); } skb->protocol = cpu_to_be16(ETH_P_AF_IUCV); + + __skb_header_release(skb); nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) { err = -ENOMEM; @@ -466,12 +469,14 @@ static void iucv_sever_path(struct sock *sk, int with_user_data) /* Send controlling flags through an IUCV socket for HIPER transport */ static int iucv_send_ctrl(struct sock *sk, u8 flags) { + struct iucv_sock *iucv = iucv_sk(sk); int err = 0; int blen; struct sk_buff *skb; u8 shutdown = 0; - blen = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN; + blen = sizeof(struct af_iucv_trans_hdr) + + LL_RESERVED_SPACE(iucv->hs_dev); if (sk->sk_shutdown & SEND_SHUTDOWN) { /* controlling flags should be sent anyway */ shutdown = sk->sk_shutdown; @@ -588,7 +593,6 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio, sk->sk_destruct = iucv_sock_destruct; sk->sk_sndtimeo = IUCV_CONN_TIMEOUT; - sk->sk_allocation = GFP_DMA; sock_reset_flag(sk, SOCK_ZAPPED); @@ -782,6 +786,7 @@ vm_bind: memcpy(iucv->src_user_id, iucv_userid, 8); sk->sk_state = IUCV_BOUND; iucv->transport = AF_IUCV_TRANS_IUCV; + sk->sk_allocation |= GFP_DMA; if (!iucv->msglimit) iucv->msglimit = IUCV_QUEUELEN_DEFAULT; goto done_unlock; @@ -806,6 +811,8 @@ static int iucv_sock_autobind(struct sock *sk) return -EPROTO; memcpy(iucv->src_user_id, iucv_userid, 8); + iucv->transport = AF_IUCV_TRANS_IUCV; + sk->sk_allocation |= GFP_DMA; write_lock_bh(&iucv_sk_list.lock); __iucv_auto_name(iucv); @@ -1131,7 +1138,8 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, * segmented records using the MSG_EOR flag), but * for SOCK_STREAM we might want to improve it in future */ if (iucv->transport == AF_IUCV_TRANS_HIPER) { - headroom = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN; + headroom = sizeof(struct af_iucv_trans_hdr) + + LL_RESERVED_SPACE(iucv->hs_dev); linear = len; } else { if (len < PAGE_SIZE) { @@ -1781,6 +1789,8 @@ static int iucv_callback_connreq(struct iucv_path *path, niucv = iucv_sk(nsk); iucv_sock_init(nsk, sk); + niucv->transport = AF_IUCV_TRANS_IUCV; + nsk->sk_allocation |= GFP_DMA; /* Set the new iucv_sock */ memcpy(niucv->dst_name, ipuser + 8, 8); @@ -2430,6 +2440,13 @@ out: return err; } +static void afiucv_iucv_exit(void) +{ + device_unregister(af_iucv_dev); + driver_unregister(&af_iucv_driver); + pr_iucv->iucv_unregister(&af_iucv_handler, 0); +} + static int __init afiucv_init(void) { int err; @@ -2463,11 +2480,18 @@ static int __init afiucv_init(void) err = afiucv_iucv_init(); if (err) goto out_sock; - } else - register_netdevice_notifier(&afiucv_netdev_notifier); + } + + err = register_netdevice_notifier(&afiucv_netdev_notifier); + if (err) + goto out_notifier; + dev_add_pack(&iucv_packet_type); return 0; +out_notifier: + if (pr_iucv) + afiucv_iucv_exit(); out_sock: sock_unregister(PF_IUCV); out_proto: @@ -2481,12 +2505,11 @@ out: static void __exit afiucv_exit(void) { if (pr_iucv) { - device_unregister(af_iucv_dev); - driver_unregister(&af_iucv_driver); - pr_iucv->iucv_unregister(&af_iucv_handler, 0); + afiucv_iucv_exit(); symbol_put(iucv_if); - } else - unregister_netdevice_notifier(&afiucv_netdev_notifier); + } + + unregister_netdevice_notifier(&afiucv_netdev_notifier); dev_remove_pack(&iucv_packet_type); sock_unregister(PF_IUCV); proto_unregister(&iucv_proto); diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 922d47081080..39dc94f2491e 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -304,5 +304,4 @@ module_exit(nft_masq_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); -MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq"); -MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "masq"); +MODULE_ALIAS_NFT_EXPR("masq"); diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index 96930b429d5f..8487eeff5c0e 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -291,5 +291,4 @@ module_exit(nft_redir_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); -MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "redir"); -MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir"); +MODULE_ALIAS_NFT_EXPR("nat"); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 956ff3da81f4..b17f2ed970e2 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -439,8 +439,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct netem_skb_cb *cb; struct sk_buff *skb2; struct sk_buff *segs = NULL; - unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb); - int nb = 0; + unsigned int prev_len = qdisc_pkt_len(skb); int count = 1; int rc = NET_XMIT_SUCCESS; int rc_drop = NET_XMIT_DROP; @@ -494,16 +493,14 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, */ if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { if (skb_is_gso(skb)) { - segs = netem_segment(skb, sch, to_free); - if (!segs) + skb = netem_segment(skb, sch, to_free); + if (!skb) return rc_drop; - } else { - segs = skb; + segs = skb->next; + skb_mark_not_on_list(skb); + qdisc_skb_cb(skb)->pkt_len = skb->len; } - skb = segs; - segs = segs->next; - skb = skb_unshare(skb, GFP_ATOMIC); if (unlikely(!skb)) { qdisc_qstats_drop(sch); @@ -520,6 +517,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, } if (unlikely(sch->q.qlen >= sch->limit)) { + /* re-link segs, so that qdisc_drop_all() frees them all */ + skb->next = segs; qdisc_drop_all(skb, sch, to_free); return rc_drop; } @@ -593,6 +592,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, finish_segs: if (segs) { + unsigned int len, last_len; + int nb = 0; + + len = skb->len; + while (segs) { skb2 = segs->next; skb_mark_not_on_list(segs); @@ -608,9 +612,7 @@ finish_segs: } segs = skb2; } - sch->q.qlen += nb; - if (nb > 1) - qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len); + qdisc_tree_reduce_backlog(sch, -nb, prev_len - len); } return NET_XMIT_SUCCESS; } diff --git a/net/tipc/link.c b/net/tipc/link.c index f5cd986e1e50..2050fd386642 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1728,7 +1728,6 @@ void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl, * node has entered SELF_DOWN_PEER_LEAVING and both peer nodes * would have to start over from scratch instead. */ - WARN_ON(l && tipc_link_is_up(l)); tnl->drop_point = 1; tnl->failover_reasm_skb = NULL; diff --git a/net/tipc/node.c b/net/tipc/node.c index 9e106d3ed187..550581d47d51 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -766,9 +766,9 @@ static void tipc_node_link_up(struct tipc_node *n, int bearer_id, * disturbance, wrong session, etc.) * 3. Link <1B-2B> up * 4. Link endpoint 2A down (e.g. due to link tolerance timeout) - * 5. Node B starts failover onto link <1B-2B> + * 5. Node 2 starts failover onto link <1B-2B> * - * ==> Node A does never start link/node failover! + * ==> Node 1 does never start link/node failover! * * @n: tipc node structure * @l: link peer endpoint failingover (- can be NULL) @@ -783,6 +783,10 @@ static void tipc_node_link_failover(struct tipc_node *n, struct tipc_link *l, if (!tipc_link_is_up(tnl)) return; + /* Don't rush, failure link may be in the process of resetting */ + if (l && !tipc_link_is_reset(l)) + return; + tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); @@ -1706,7 +1710,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, /* Initiate or update failover mode if applicable */ if ((usr == TUNNEL_PROTOCOL) && (mtyp == FAILOVER_MSG)) { syncpt = oseqno + exp_pkts - 1; - if (pl && tipc_link_is_up(pl)) { + if (pl && !tipc_link_is_reset(pl)) { __tipc_node_link_down(n, &pb_id, xmitq, &maddr); trace_tipc_node_link_down(n, true, "node link down <- failover!"); diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 7fc02d84c4f1..1405ccc9101c 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -176,7 +176,6 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, goto tx_error; } - skb->dev = rt->dst.dev; ttl = ip4_dst_hoplimit(&rt->dst); udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr, dst->ipv4.s_addr, 0, ttl, 0, src->port, @@ -195,10 +194,9 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, if (err) goto tx_error; ttl = ip6_dst_hoplimit(ndst); - err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, - ndst->dev, &src->ipv6, - &dst->ipv6, 0, ttl, 0, src->port, - dst->port, false); + err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, NULL, + &src->ipv6, &dst->ipv6, 0, ttl, 0, + src->port, dst->port, false); #endif } return err; diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index fb2df6e068fa..62dcdf082349 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -211,18 +211,6 @@ static void hvs_set_channel_pending_send_size(struct vmbus_channel *chan) set_channel_pending_send_size(chan, HVS_PKT_LEN(HVS_SEND_BUF_SIZE)); - /* See hvs_stream_has_space(): we must make sure the host has seen - * the new pending send size, before we can re-check the writable - * bytes. - */ - virt_mb(); -} - -static void hvs_clear_channel_pending_send_size(struct vmbus_channel *chan) -{ - set_channel_pending_send_size(chan, 0); - - /* Ditto */ virt_mb(); } @@ -292,9 +280,6 @@ static void hvs_channel_cb(void *ctx) if (hvs_channel_readable(chan)) sk->sk_data_ready(sk); - /* See hvs_stream_has_space(): when we reach here, the writable bytes - * may be already less than HVS_PKT_LEN(HVS_SEND_BUF_SIZE). - */ if (hv_get_bytes_to_write(&chan->outbound) > 0) sk->sk_write_space(sk); } @@ -395,6 +380,13 @@ static void hvs_open_connection(struct vmbus_channel *chan) set_per_channel_state(chan, conn_from_host ? new : sk); vmbus_set_chn_rescind_callback(chan, hvs_close_connection); + /* Set the pending send size to max packet size to always get + * notifications from the host when there is enough writable space. + * The host is optimized to send notifications only when the pending + * size boundary is crossed, and not always. + */ + hvs_set_channel_pending_send_size(chan); + if (conn_from_host) { new->sk_state = TCP_ESTABLISHED; sk->sk_ack_backlog++; @@ -688,23 +680,8 @@ static s64 hvs_stream_has_data(struct vsock_sock *vsk) static s64 hvs_stream_has_space(struct vsock_sock *vsk) { struct hvsock *hvs = vsk->trans; - struct vmbus_channel *chan = hvs->chan; - s64 ret; - - ret = hvs_channel_writable_bytes(chan); - if (ret > 0) { - hvs_clear_channel_pending_send_size(chan); - } else { - /* See hvs_channel_cb() */ - hvs_set_channel_pending_send_size(chan); - - /* Re-check the writable bytes to avoid race */ - ret = hvs_channel_writable_bytes(chan); - if (ret > 0) - hvs_clear_channel_pending_send_size(chan); - } - return ret; + return hvs_channel_writable_bytes(hvs->chan); } static u64 hvs_stream_rcvhiwat(struct vsock_sock *vsk) |