From 7c4a54b963b68eee5ef3bd7ca740630d965616e2 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 1 Sep 2016 13:52:50 -0400 Subject: tipc: rate limit broadcast retransmissions As cluster sizes grow, so does the amount of identical or overlapping broadcast NACKs generated by the packet receivers. This often leads to 'NACK crunches' resulting in huge numbers of redundant retransmissions of the same packet ranges. In this commit, we introduce rate control of broadcast retransmissions, so that a retransmitted range cannot be retransmitted again until after at least 10 ms. This reduces the frequency of duplicate, redundant retransmissions by an order of magnitude, while having a significant positive impact on overall throughput and scalability. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 136316fb37ec..58bb44d95f95 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -181,7 +181,10 @@ struct tipc_link { u16 acked; struct tipc_link *bc_rcvlink; struct tipc_link *bc_sndlink; - int nack_state; + unsigned long prev_retr; + u16 prev_from; + u16 prev_to; + u8 nack_state; bool bc_peer_is_up; /* Statistics */ @@ -202,6 +205,8 @@ enum { BC_NACK_SND_SUPPRESS, }; +#define TIPC_BC_RETR_LIMIT 10 /* [ms] */ + /* * Interval between NACKs when packets arrive out of order */ @@ -1590,11 +1595,48 @@ void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr) l->rcv_nxt = peers_snd_nxt; } +/* link_bc_retr eval()- check if the indicated range can be retransmitted now + * - Adjust permitted range if there is overlap with previous retransmission + */ +static bool link_bc_retr_eval(struct tipc_link *l, u16 *from, u16 *to) +{ + unsigned long elapsed = jiffies_to_msecs(jiffies - l->prev_retr); + + if (less(*to, *from)) + return false; + + /* New retransmission request */ + if ((elapsed > TIPC_BC_RETR_LIMIT) || + less(*to, l->prev_from) || more(*from, l->prev_to)) { + l->prev_from = *from; + l->prev_to = *to; + l->prev_retr = jiffies; + return true; + } + + /* Inside range of previous retransmit */ + if (!less(*from, l->prev_from) && !more(*to, l->prev_to)) + return false; + + /* Fully or partially outside previous range => exclude overlap */ + if (less(*from, l->prev_from)) { + *to = l->prev_from - 1; + l->prev_from = *from; + } + if (more(*to, l->prev_to)) { + *from = l->prev_to + 1; + l->prev_to = *to; + } + l->prev_retr = jiffies; + return true; +} + /* tipc_link_bc_sync_rcv - update rcv link according to peer's send state */ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, struct sk_buff_head *xmitq) { + struct tipc_link *snd_l = l->bc_sndlink; u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); u16 from = msg_bcast_ack(hdr) + 1; u16 to = from + msg_bc_gap(hdr) - 1; @@ -1613,14 +1655,14 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, if (!l->bc_peer_is_up) return rc; + l->stats.recv_nacks++; + /* Ignore if peers_snd_nxt goes beyond receive window */ if (more(peers_snd_nxt, l->rcv_nxt + l->window)) return rc; - if (!less(to, from)) { - rc = tipc_link_retrans(l->bc_sndlink, from, to, xmitq); - l->stats.recv_nacks++; - } + if (link_bc_retr_eval(snd_l, &from, &to)) + rc = tipc_link_retrans(snd_l, from, to, xmitq); l->snd_nxt = peers_snd_nxt; if (link_bc_rcv_gap(l)) -- cgit v1.2.3