diff options
author | Arnaldo Carvalho de Melo <acme@mandriva.com> | 2005-08-10 05:45:03 +0200 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2005-08-30 00:55:58 +0200 |
commit | 696ab2d3bffc746fb8cf3712f066d42b9886aeed (patch) | |
tree | d0990b1d6f5fd6b3b7ddce553a16cccf6f029651 /net/ipv4/tcp_minisocks.c | |
parent | [TIMEWAIT]: Introduce inet_timewait_death_row (diff) | |
download | linux-696ab2d3bffc746fb8cf3712f066d42b9886aeed.tar.xz linux-696ab2d3bffc746fb8cf3712f066d42b9886aeed.zip |
[TIMEWAIT]: Move inet_timewait_death_row routines to net/ipv4/inet_timewait_sock.c
Also export the ones that will be used in the next changeset, when
DCCP uses this infrastructure.
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_minisocks.c')
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 281 |
1 files changed, 10 insertions, 271 deletions
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 81b9a52c50c6..dc085233d512 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -35,12 +35,6 @@ #define SYNC_INIT 1 #endif -/* New-style handling of TIME_WAIT sockets. */ - -static void inet_twdr_hangman(unsigned long data); -static void inet_twdr_twkill_work(void *data); -static void inet_twdr_twcal_tick(unsigned long data); - int sysctl_tcp_syncookies = SYNC_INIT; int sysctl_tcp_abort_on_overflow; @@ -63,10 +57,6 @@ struct inet_timewait_death_row tcp_death_row = { EXPORT_SYMBOL_GPL(tcp_death_row); -static void inet_twsk_schedule(struct inet_timewait_sock *tw, - struct inet_timewait_death_row *twdr, - const int timeo); - static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { if (seq == s_win) @@ -173,9 +163,11 @@ kill_with_rst: if (tw->tw_family == AF_INET && tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp && tcp_v4_tw_remember_stamp(tw)) - inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout); + inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout, + TCP_TIMEWAIT_LEN); else - inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN, + TCP_TIMEWAIT_LEN); return TCP_TW_ACK; } @@ -213,7 +205,8 @@ kill: return TCP_TW_SUCCESS; } } - inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN, + TCP_TIMEWAIT_LEN); if (tmp_opt.saw_tstamp) { tcptw->tw_ts_recent = tmp_opt.rcv_tsval; @@ -263,7 +256,8 @@ kill: * Do not reschedule in the last case. */ if (paws_reject || th->ack) - inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN, + TCP_TIMEWAIT_LEN); /* Send ACK. Note, we do not put the bucket, * it will be released by caller. @@ -326,7 +320,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) timeo = TCP_TIMEWAIT_LEN; } - inet_twsk_schedule(tw, &tcp_death_row, timeo); + inet_twsk_schedule(tw, &tcp_death_row, timeo, + TCP_TIMEWAIT_LEN); inet_twsk_put(tw); } else { /* Sorry, if we're out of memory, just CLOSE this @@ -341,261 +336,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tcp_done(sk); } -/* Returns non-zero if quota exceeded. */ -static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, - const int slot) -{ - struct inet_timewait_sock *tw; - struct hlist_node *node; - unsigned int killed; - int ret; - - /* NOTE: compare this to previous version where lock - * was released after detaching chain. It was racy, - * because tw buckets are scheduled in not serialized context - * in 2.3 (with netfilter), and with softnet it is common, because - * soft irqs are not sequenced. - */ - killed = 0; - ret = 0; -rescan: - inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) { - __inet_twsk_del_dead_node(tw); - spin_unlock(&twdr->death_lock); - __inet_twsk_kill(tw, twdr->hashinfo); - inet_twsk_put(tw); - killed++; - spin_lock(&twdr->death_lock); - if (killed > INET_TWDR_TWKILL_QUOTA) { - ret = 1; - break; - } - - /* While we dropped twdr->death_lock, another cpu may have - * killed off the next TW bucket in the list, therefore - * do a fresh re-read of the hlist head node with the - * lock reacquired. We still use the hlist traversal - * macro in order to get the prefetches. - */ - goto rescan; - } - - twdr->tw_count -= killed; - NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed); - - return ret; -} - -static void inet_twdr_hangman(unsigned long data) -{ - struct inet_timewait_death_row *twdr; - int unsigned need_timer; - - twdr = (struct inet_timewait_death_row *)data; - spin_lock(&twdr->death_lock); - - if (twdr->tw_count == 0) - goto out; - - need_timer = 0; - if (inet_twdr_do_twkill_work(twdr, twdr->slot)) { - twdr->thread_slots |= (1 << twdr->slot); - mb(); - schedule_work(&twdr->twkill_work); - need_timer = 1; - } else { - /* We purged the entire slot, anything left? */ - if (twdr->tw_count) - need_timer = 1; - } - twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1)); - if (need_timer) - mod_timer(&twdr->tw_timer, jiffies + twdr->period); -out: - spin_unlock(&twdr->death_lock); -} - -extern void twkill_slots_invalid(void); - -static void inet_twdr_twkill_work(void *data) -{ - struct inet_timewait_death_row *twdr = data; - int i; - - if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8)) - twkill_slots_invalid(); - - while (twdr->thread_slots) { - spin_lock_bh(&twdr->death_lock); - for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) { - if (!(twdr->thread_slots & (1 << i))) - continue; - - while (inet_twdr_do_twkill_work(twdr, i) != 0) { - if (need_resched()) { - spin_unlock_bh(&twdr->death_lock); - schedule(); - spin_lock_bh(&twdr->death_lock); - } - } - - twdr->thread_slots &= ~(1 << i); - } - spin_unlock_bh(&twdr->death_lock); - } -} - -/* These are always called from BH context. See callers in - * tcp_input.c to verify this. - */ - -/* This is for handling early-kills of TIME_WAIT sockets. */ -void inet_twsk_deschedule(struct inet_timewait_sock *tw, - struct inet_timewait_death_row *twdr) -{ - spin_lock(&twdr->death_lock); - if (inet_twsk_del_dead_node(tw)) { - inet_twsk_put(tw); - if (--twdr->tw_count == 0) - del_timer(&twdr->tw_timer); - } - spin_unlock(&twdr->death_lock); - __inet_twsk_kill(tw, twdr->hashinfo); -} - -static void inet_twsk_schedule(struct inet_timewait_sock *tw, - struct inet_timewait_death_row *twdr, - const int timeo) -{ - struct hlist_head *list; - int slot; - - /* timeout := RTO * 3.5 - * - * 3.5 = 1+2+0.5 to wait for two retransmits. - * - * RATIONALE: if FIN arrived and we entered TIME-WAIT state, - * our ACK acking that FIN can be lost. If N subsequent retransmitted - * FINs (or previous seqments) are lost (probability of such event - * is p^(N+1), where p is probability to lose single packet and - * time to detect the loss is about RTO*(2^N - 1) with exponential - * backoff). Normal timewait length is calculated so, that we - * waited at least for one retransmitted FIN (maximal RTO is 120sec). - * [ BTW Linux. following BSD, violates this requirement waiting - * only for 60sec, we should wait at least for 240 secs. - * Well, 240 consumes too much of resources 8) - * ] - * This interval is not reduced to catch old duplicate and - * responces to our wandering segments living for two MSLs. - * However, if we use PAWS to detect - * old duplicates, we can reduce the interval to bounds required - * by RTO, rather than MSL. So, if peer understands PAWS, we - * kill tw bucket after 3.5*RTO (it is important that this number - * is greater than TS tick!) and detect old duplicates with help - * of PAWS. - */ - slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK; - - spin_lock(&twdr->death_lock); - - /* Unlink it, if it was scheduled */ - if (inet_twsk_del_dead_node(tw)) - twdr->tw_count--; - else - atomic_inc(&tw->tw_refcnt); - - if (slot >= INET_TWDR_RECYCLE_SLOTS) { - /* Schedule to slow timer */ - if (timeo >= TCP_TIMEWAIT_LEN) { - slot = INET_TWDR_TWKILL_SLOTS - 1; - } else { - slot = (timeo + twdr->period - 1) / twdr->period; - if (slot >= INET_TWDR_TWKILL_SLOTS) - slot = INET_TWDR_TWKILL_SLOTS - 1; - } - tw->tw_ttd = jiffies + timeo; - slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1); - list = &twdr->cells[slot]; - } else { - tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK); - - if (twdr->twcal_hand < 0) { - twdr->twcal_hand = 0; - twdr->twcal_jiffie = jiffies; - twdr->twcal_timer.expires = twdr->twcal_jiffie + - (slot << INET_TWDR_RECYCLE_TICK); - add_timer(&twdr->twcal_timer); - } else { - if (time_after(twdr->twcal_timer.expires, - jiffies + (slot << INET_TWDR_RECYCLE_TICK))) - mod_timer(&twdr->twcal_timer, - jiffies + (slot << INET_TWDR_RECYCLE_TICK)); - slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1); - } - list = &twdr->twcal_row[slot]; - } - - hlist_add_head(&tw->tw_death_node, list); - - if (twdr->tw_count++ == 0) - mod_timer(&twdr->tw_timer, jiffies + twdr->period); - spin_unlock(&twdr->death_lock); -} - -void inet_twdr_twcal_tick(unsigned long data) -{ - struct inet_timewait_death_row *twdr; - int n, slot; - unsigned long j; - unsigned long now = jiffies; - int killed = 0; - int adv = 0; - - twdr = (struct inet_timewait_death_row *)data; - - spin_lock(&twdr->death_lock); - if (twdr->twcal_hand < 0) - goto out; - - slot = twdr->twcal_hand; - j = twdr->twcal_jiffie; - - for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) { - if (time_before_eq(j, now)) { - struct hlist_node *node, *safe; - struct inet_timewait_sock *tw; - - inet_twsk_for_each_inmate_safe(tw, node, safe, - &twdr->twcal_row[slot]) { - __inet_twsk_del_dead_node(tw); - __inet_twsk_kill(tw, twdr->hashinfo); - inet_twsk_put(tw); - killed++; - } - } else { - if (!adv) { - adv = 1; - twdr->twcal_jiffie = j; - twdr->twcal_hand = slot; - } - - if (!hlist_empty(&twdr->twcal_row[slot])) { - mod_timer(&twdr->twcal_timer, j); - goto out; - } - } - j += 1 << INET_TWDR_RECYCLE_TICK; - slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1); - } - twdr->twcal_hand = -1; - -out: - if ((twdr->tw_count -= killed) == 0) - del_timer(&twdr->tw_timer); - NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed); - spin_unlock(&twdr->death_lock); -} - /* This is not only more efficient than what we used to do, it eliminates * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM * @@ -933,4 +673,3 @@ EXPORT_SYMBOL(tcp_check_req); EXPORT_SYMBOL(tcp_child_process); EXPORT_SYMBOL(tcp_create_openreq_child); EXPORT_SYMBOL(tcp_timewait_state_process); -EXPORT_SYMBOL(inet_twsk_deschedule); |