1e48c414eSArnaldo Carvalho de Melo /* 2e48c414eSArnaldo Carvalho de Melo * INET An implementation of the TCP/IP protocol suite for the LINUX 3e48c414eSArnaldo Carvalho de Melo * operating system. INET is implemented using the BSD Socket 4e48c414eSArnaldo Carvalho de Melo * interface as the means of communication with the user level. 5e48c414eSArnaldo Carvalho de Melo * 6e48c414eSArnaldo Carvalho de Melo * Generic TIME_WAIT sockets functions 7e48c414eSArnaldo Carvalho de Melo * 8e48c414eSArnaldo Carvalho de Melo * From code orinally in TCP 9e48c414eSArnaldo Carvalho de Melo */ 10e48c414eSArnaldo Carvalho de Melo 11172589ccSIlpo Järvinen #include <linux/kernel.h> 12e48c414eSArnaldo Carvalho de Melo #include <net/inet_hashtables.h> 13e48c414eSArnaldo Carvalho de Melo #include <net/inet_timewait_sock.h> 14696ab2d3SArnaldo Carvalho de Melo #include <net/ip.h> 15e48c414eSArnaldo Carvalho de Melo 16e48c414eSArnaldo Carvalho de Melo /* Must be called with locally disabled BHs. */ 17acd159b6SAdrian Bunk static void __inet_twsk_kill(struct inet_timewait_sock *tw, 18acd159b6SAdrian Bunk struct inet_hashinfo *hashinfo) 19e48c414eSArnaldo Carvalho de Melo { 20e48c414eSArnaldo Carvalho de Melo struct inet_bind_hashbucket *bhead; 21e48c414eSArnaldo Carvalho de Melo struct inet_bind_bucket *tb; 22e48c414eSArnaldo Carvalho de Melo /* Unlink from established hashes. */ 239db66bdcSEric Dumazet spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); 24e48c414eSArnaldo Carvalho de Melo 259db66bdcSEric Dumazet spin_lock(lock); 263ab5aee7SEric Dumazet if (hlist_nulls_unhashed(&tw->tw_node)) { 279db66bdcSEric Dumazet spin_unlock(lock); 28e48c414eSArnaldo Carvalho de Melo return; 29e48c414eSArnaldo Carvalho de Melo } 303ab5aee7SEric Dumazet hlist_nulls_del_rcu(&tw->tw_node); 313ab5aee7SEric Dumazet sk_nulls_node_init(&tw->tw_node); 329db66bdcSEric Dumazet spin_unlock(lock); 33e48c414eSArnaldo Carvalho de Melo 34e48c414eSArnaldo Carvalho de Melo /* Disassociate with bind bucket. */ 357f635ab7SPavel Emelyanov bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, 367f635ab7SPavel Emelyanov hashinfo->bhash_size)]; 37e48c414eSArnaldo Carvalho de Melo spin_lock(&bhead->lock); 38e48c414eSArnaldo Carvalho de Melo tb = tw->tw_tb; 39e48c414eSArnaldo Carvalho de Melo __hlist_del(&tw->tw_bind_node); 40e48c414eSArnaldo Carvalho de Melo tw->tw_tb = NULL; 41e48c414eSArnaldo Carvalho de Melo inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); 42e48c414eSArnaldo Carvalho de Melo spin_unlock(&bhead->lock); 43e48c414eSArnaldo Carvalho de Melo #ifdef SOCK_REFCNT_DEBUG 44e48c414eSArnaldo Carvalho de Melo if (atomic_read(&tw->tw_refcnt) != 1) { 45e48c414eSArnaldo Carvalho de Melo printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n", 46e48c414eSArnaldo Carvalho de Melo tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); 47e48c414eSArnaldo Carvalho de Melo } 48e48c414eSArnaldo Carvalho de Melo #endif 49e48c414eSArnaldo Carvalho de Melo inet_twsk_put(tw); 50e48c414eSArnaldo Carvalho de Melo } 51e48c414eSArnaldo Carvalho de Melo 52*4dbc8ef7SArnaldo Carvalho de Melo static noinline void inet_twsk_free(struct inet_timewait_sock *tw) 537054fb93SPavel Emelyanov { 547054fb93SPavel Emelyanov struct module *owner = tw->tw_prot->owner; 557054fb93SPavel Emelyanov twsk_destructor((struct sock *)tw); 567054fb93SPavel Emelyanov #ifdef SOCK_REFCNT_DEBUG 57*4dbc8ef7SArnaldo Carvalho de Melo pr_debug("%s timewait_sock %p released\n", tw->tw_prot->name, tw); 587054fb93SPavel Emelyanov #endif 59cd5342d9SDenis V. Lunev release_net(twsk_net(tw)); 607054fb93SPavel Emelyanov kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw); 617054fb93SPavel Emelyanov module_put(owner); 627054fb93SPavel Emelyanov } 63*4dbc8ef7SArnaldo Carvalho de Melo 64*4dbc8ef7SArnaldo Carvalho de Melo void inet_twsk_put(struct inet_timewait_sock *tw) 65*4dbc8ef7SArnaldo Carvalho de Melo { 66*4dbc8ef7SArnaldo Carvalho de Melo if (atomic_dec_and_test(&tw->tw_refcnt)) 67*4dbc8ef7SArnaldo Carvalho de Melo inet_twsk_free(tw); 687054fb93SPavel Emelyanov } 697054fb93SPavel Emelyanov EXPORT_SYMBOL_GPL(inet_twsk_put); 707054fb93SPavel Emelyanov 71e48c414eSArnaldo Carvalho de Melo /* 72e48c414eSArnaldo Carvalho de Melo * Enter the time wait state. This is called with locally disabled BH. 73e48c414eSArnaldo Carvalho de Melo * Essentially we whip up a timewait bucket, copy the relevant info into it 74e48c414eSArnaldo Carvalho de Melo * from the SK, and mess with hash chains and list linkage. 75e48c414eSArnaldo Carvalho de Melo */ 76e48c414eSArnaldo Carvalho de Melo void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, 77e48c414eSArnaldo Carvalho de Melo struct inet_hashinfo *hashinfo) 78e48c414eSArnaldo Carvalho de Melo { 79e48c414eSArnaldo Carvalho de Melo const struct inet_sock *inet = inet_sk(sk); 80463c84b9SArnaldo Carvalho de Melo const struct inet_connection_sock *icsk = inet_csk(sk); 8181c3d547SEric Dumazet struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); 829db66bdcSEric Dumazet spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); 83e48c414eSArnaldo Carvalho de Melo struct inet_bind_hashbucket *bhead; 84e48c414eSArnaldo Carvalho de Melo /* Step 1: Put TW into bind hash. Original socket stays there too. 85e48c414eSArnaldo Carvalho de Melo Note, that any socket with inet->num != 0 MUST be bound in 86e48c414eSArnaldo Carvalho de Melo binding cache, even if it is closed. 87e48c414eSArnaldo Carvalho de Melo */ 887f635ab7SPavel Emelyanov bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->num, 897f635ab7SPavel Emelyanov hashinfo->bhash_size)]; 90e48c414eSArnaldo Carvalho de Melo spin_lock(&bhead->lock); 91463c84b9SArnaldo Carvalho de Melo tw->tw_tb = icsk->icsk_bind_hash; 92547b792cSIlpo Järvinen WARN_ON(!icsk->icsk_bind_hash); 93e48c414eSArnaldo Carvalho de Melo inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); 94e48c414eSArnaldo Carvalho de Melo spin_unlock(&bhead->lock); 95e48c414eSArnaldo Carvalho de Melo 969db66bdcSEric Dumazet spin_lock(lock); 97e48c414eSArnaldo Carvalho de Melo 983ab5aee7SEric Dumazet /* 993ab5aee7SEric Dumazet * Step 2: Hash TW into TIMEWAIT chain. 1003ab5aee7SEric Dumazet * Should be done before removing sk from established chain 1013ab5aee7SEric Dumazet * because readers are lockless and search established first. 1023ab5aee7SEric Dumazet */ 103e48c414eSArnaldo Carvalho de Melo atomic_inc(&tw->tw_refcnt); 1043ab5aee7SEric Dumazet inet_twsk_add_node_rcu(tw, &ehead->twchain); 1053ab5aee7SEric Dumazet 1063ab5aee7SEric Dumazet /* Step 3: Remove SK from established hash. */ 1073ab5aee7SEric Dumazet if (__sk_nulls_del_node_init_rcu(sk)) 1083ab5aee7SEric Dumazet sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 109e48c414eSArnaldo Carvalho de Melo 1109db66bdcSEric Dumazet spin_unlock(lock); 111e48c414eSArnaldo Carvalho de Melo } 112c676270bSArnaldo Carvalho de Melo 113696ab2d3SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); 114696ab2d3SArnaldo Carvalho de Melo 115c676270bSArnaldo Carvalho de Melo struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state) 116c676270bSArnaldo Carvalho de Melo { 1176d6ee43eSArnaldo Carvalho de Melo struct inet_timewait_sock *tw = 1186d6ee43eSArnaldo Carvalho de Melo kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, 11954e6ecb2SChristoph Lameter GFP_ATOMIC); 120c676270bSArnaldo Carvalho de Melo if (tw != NULL) { 121c676270bSArnaldo Carvalho de Melo const struct inet_sock *inet = inet_sk(sk); 122c676270bSArnaldo Carvalho de Melo 123c676270bSArnaldo Carvalho de Melo /* Give us an identity. */ 124c676270bSArnaldo Carvalho de Melo tw->tw_daddr = inet->daddr; 125c676270bSArnaldo Carvalho de Melo tw->tw_rcv_saddr = inet->rcv_saddr; 126c676270bSArnaldo Carvalho de Melo tw->tw_bound_dev_if = sk->sk_bound_dev_if; 127c676270bSArnaldo Carvalho de Melo tw->tw_num = inet->num; 128c676270bSArnaldo Carvalho de Melo tw->tw_state = TCP_TIME_WAIT; 129c676270bSArnaldo Carvalho de Melo tw->tw_substate = state; 130c676270bSArnaldo Carvalho de Melo tw->tw_sport = inet->sport; 131c676270bSArnaldo Carvalho de Melo tw->tw_dport = inet->dport; 132c676270bSArnaldo Carvalho de Melo tw->tw_family = sk->sk_family; 133c676270bSArnaldo Carvalho de Melo tw->tw_reuse = sk->sk_reuse; 13481c3d547SEric Dumazet tw->tw_hash = sk->sk_hash; 135c676270bSArnaldo Carvalho de Melo tw->tw_ipv6only = 0; 136f5715aeaSKOVACS Krisztian tw->tw_transparent = inet->transparent; 137c676270bSArnaldo Carvalho de Melo tw->tw_prot = sk->sk_prot_creator; 138cd5342d9SDenis V. Lunev twsk_net_set(tw, hold_net(sock_net(sk))); 139c676270bSArnaldo Carvalho de Melo atomic_set(&tw->tw_refcnt, 1); 140c676270bSArnaldo Carvalho de Melo inet_twsk_dead_node_init(tw); 141eeb2b856SArnaldo Carvalho de Melo __module_get(tw->tw_prot->owner); 142c676270bSArnaldo Carvalho de Melo } 143c676270bSArnaldo Carvalho de Melo 144c676270bSArnaldo Carvalho de Melo return tw; 145c676270bSArnaldo Carvalho de Melo } 146696ab2d3SArnaldo Carvalho de Melo 147696ab2d3SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_twsk_alloc); 148696ab2d3SArnaldo Carvalho de Melo 149696ab2d3SArnaldo Carvalho de Melo /* Returns non-zero if quota exceeded. */ 150696ab2d3SArnaldo Carvalho de Melo static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, 151696ab2d3SArnaldo Carvalho de Melo const int slot) 152696ab2d3SArnaldo Carvalho de Melo { 153696ab2d3SArnaldo Carvalho de Melo struct inet_timewait_sock *tw; 154696ab2d3SArnaldo Carvalho de Melo struct hlist_node *node; 155696ab2d3SArnaldo Carvalho de Melo unsigned int killed; 156696ab2d3SArnaldo Carvalho de Melo int ret; 157696ab2d3SArnaldo Carvalho de Melo 158696ab2d3SArnaldo Carvalho de Melo /* NOTE: compare this to previous version where lock 159696ab2d3SArnaldo Carvalho de Melo * was released after detaching chain. It was racy, 160696ab2d3SArnaldo Carvalho de Melo * because tw buckets are scheduled in not serialized context 161696ab2d3SArnaldo Carvalho de Melo * in 2.3 (with netfilter), and with softnet it is common, because 162696ab2d3SArnaldo Carvalho de Melo * soft irqs are not sequenced. 163696ab2d3SArnaldo Carvalho de Melo */ 164696ab2d3SArnaldo Carvalho de Melo killed = 0; 165696ab2d3SArnaldo Carvalho de Melo ret = 0; 166696ab2d3SArnaldo Carvalho de Melo rescan: 167696ab2d3SArnaldo Carvalho de Melo inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) { 168696ab2d3SArnaldo Carvalho de Melo __inet_twsk_del_dead_node(tw); 169696ab2d3SArnaldo Carvalho de Melo spin_unlock(&twdr->death_lock); 170696ab2d3SArnaldo Carvalho de Melo __inet_twsk_kill(tw, twdr->hashinfo); 171f2bf415cSPavel Emelyanov #ifdef CONFIG_NET_NS 172f2bf415cSPavel Emelyanov NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED); 173f2bf415cSPavel Emelyanov #endif 174696ab2d3SArnaldo Carvalho de Melo inet_twsk_put(tw); 175696ab2d3SArnaldo Carvalho de Melo killed++; 176696ab2d3SArnaldo Carvalho de Melo spin_lock(&twdr->death_lock); 177696ab2d3SArnaldo Carvalho de Melo if (killed > INET_TWDR_TWKILL_QUOTA) { 178696ab2d3SArnaldo Carvalho de Melo ret = 1; 179696ab2d3SArnaldo Carvalho de Melo break; 180696ab2d3SArnaldo Carvalho de Melo } 181696ab2d3SArnaldo Carvalho de Melo 182696ab2d3SArnaldo Carvalho de Melo /* While we dropped twdr->death_lock, another cpu may have 183696ab2d3SArnaldo Carvalho de Melo * killed off the next TW bucket in the list, therefore 184696ab2d3SArnaldo Carvalho de Melo * do a fresh re-read of the hlist head node with the 185696ab2d3SArnaldo Carvalho de Melo * lock reacquired. We still use the hlist traversal 186696ab2d3SArnaldo Carvalho de Melo * macro in order to get the prefetches. 187696ab2d3SArnaldo Carvalho de Melo */ 188696ab2d3SArnaldo Carvalho de Melo goto rescan; 189696ab2d3SArnaldo Carvalho de Melo } 190696ab2d3SArnaldo Carvalho de Melo 191696ab2d3SArnaldo Carvalho de Melo twdr->tw_count -= killed; 192f2bf415cSPavel Emelyanov #ifndef CONFIG_NET_NS 193f2bf415cSPavel Emelyanov NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITED, killed); 194f2bf415cSPavel Emelyanov #endif 195696ab2d3SArnaldo Carvalho de Melo return ret; 196696ab2d3SArnaldo Carvalho de Melo } 197696ab2d3SArnaldo Carvalho de Melo 198696ab2d3SArnaldo Carvalho de Melo void inet_twdr_hangman(unsigned long data) 199696ab2d3SArnaldo Carvalho de Melo { 200696ab2d3SArnaldo Carvalho de Melo struct inet_timewait_death_row *twdr; 201696ab2d3SArnaldo Carvalho de Melo int unsigned need_timer; 202696ab2d3SArnaldo Carvalho de Melo 203696ab2d3SArnaldo Carvalho de Melo twdr = (struct inet_timewait_death_row *)data; 204696ab2d3SArnaldo Carvalho de Melo spin_lock(&twdr->death_lock); 205696ab2d3SArnaldo Carvalho de Melo 206696ab2d3SArnaldo Carvalho de Melo if (twdr->tw_count == 0) 207696ab2d3SArnaldo Carvalho de Melo goto out; 208696ab2d3SArnaldo Carvalho de Melo 209696ab2d3SArnaldo Carvalho de Melo need_timer = 0; 210696ab2d3SArnaldo Carvalho de Melo if (inet_twdr_do_twkill_work(twdr, twdr->slot)) { 211696ab2d3SArnaldo Carvalho de Melo twdr->thread_slots |= (1 << twdr->slot); 212696ab2d3SArnaldo Carvalho de Melo schedule_work(&twdr->twkill_work); 213696ab2d3SArnaldo Carvalho de Melo need_timer = 1; 214696ab2d3SArnaldo Carvalho de Melo } else { 215696ab2d3SArnaldo Carvalho de Melo /* We purged the entire slot, anything left? */ 216696ab2d3SArnaldo Carvalho de Melo if (twdr->tw_count) 217696ab2d3SArnaldo Carvalho de Melo need_timer = 1; 218696ab2d3SArnaldo Carvalho de Melo } 219696ab2d3SArnaldo Carvalho de Melo twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1)); 220696ab2d3SArnaldo Carvalho de Melo if (need_timer) 221696ab2d3SArnaldo Carvalho de Melo mod_timer(&twdr->tw_timer, jiffies + twdr->period); 222696ab2d3SArnaldo Carvalho de Melo out: 223696ab2d3SArnaldo Carvalho de Melo spin_unlock(&twdr->death_lock); 224696ab2d3SArnaldo Carvalho de Melo } 225696ab2d3SArnaldo Carvalho de Melo 226696ab2d3SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_twdr_hangman); 227696ab2d3SArnaldo Carvalho de Melo 22865f27f38SDavid Howells void inet_twdr_twkill_work(struct work_struct *work) 229696ab2d3SArnaldo Carvalho de Melo { 23065f27f38SDavid Howells struct inet_timewait_death_row *twdr = 23165f27f38SDavid Howells container_of(work, struct inet_timewait_death_row, twkill_work); 232696ab2d3SArnaldo Carvalho de Melo int i; 233696ab2d3SArnaldo Carvalho de Melo 23495c9382aSPavel Emelyanov BUILD_BUG_ON((INET_TWDR_TWKILL_SLOTS - 1) > 23595c9382aSPavel Emelyanov (sizeof(twdr->thread_slots) * 8)); 236696ab2d3SArnaldo Carvalho de Melo 237696ab2d3SArnaldo Carvalho de Melo while (twdr->thread_slots) { 238696ab2d3SArnaldo Carvalho de Melo spin_lock_bh(&twdr->death_lock); 239696ab2d3SArnaldo Carvalho de Melo for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) { 240696ab2d3SArnaldo Carvalho de Melo if (!(twdr->thread_slots & (1 << i))) 241696ab2d3SArnaldo Carvalho de Melo continue; 242696ab2d3SArnaldo Carvalho de Melo 243696ab2d3SArnaldo Carvalho de Melo while (inet_twdr_do_twkill_work(twdr, i) != 0) { 244696ab2d3SArnaldo Carvalho de Melo if (need_resched()) { 245696ab2d3SArnaldo Carvalho de Melo spin_unlock_bh(&twdr->death_lock); 246696ab2d3SArnaldo Carvalho de Melo schedule(); 247696ab2d3SArnaldo Carvalho de Melo spin_lock_bh(&twdr->death_lock); 248696ab2d3SArnaldo Carvalho de Melo } 249696ab2d3SArnaldo Carvalho de Melo } 250696ab2d3SArnaldo Carvalho de Melo 251696ab2d3SArnaldo Carvalho de Melo twdr->thread_slots &= ~(1 << i); 252696ab2d3SArnaldo Carvalho de Melo } 253696ab2d3SArnaldo Carvalho de Melo spin_unlock_bh(&twdr->death_lock); 254696ab2d3SArnaldo Carvalho de Melo } 255696ab2d3SArnaldo Carvalho de Melo } 256696ab2d3SArnaldo Carvalho de Melo 257696ab2d3SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_twdr_twkill_work); 258696ab2d3SArnaldo Carvalho de Melo 259696ab2d3SArnaldo Carvalho de Melo /* These are always called from BH context. See callers in 260696ab2d3SArnaldo Carvalho de Melo * tcp_input.c to verify this. 261696ab2d3SArnaldo Carvalho de Melo */ 262696ab2d3SArnaldo Carvalho de Melo 263696ab2d3SArnaldo Carvalho de Melo /* This is for handling early-kills of TIME_WAIT sockets. */ 264696ab2d3SArnaldo Carvalho de Melo void inet_twsk_deschedule(struct inet_timewait_sock *tw, 265696ab2d3SArnaldo Carvalho de Melo struct inet_timewait_death_row *twdr) 266696ab2d3SArnaldo Carvalho de Melo { 267696ab2d3SArnaldo Carvalho de Melo spin_lock(&twdr->death_lock); 268696ab2d3SArnaldo Carvalho de Melo if (inet_twsk_del_dead_node(tw)) { 269696ab2d3SArnaldo Carvalho de Melo inet_twsk_put(tw); 270696ab2d3SArnaldo Carvalho de Melo if (--twdr->tw_count == 0) 271696ab2d3SArnaldo Carvalho de Melo del_timer(&twdr->tw_timer); 272696ab2d3SArnaldo Carvalho de Melo } 273696ab2d3SArnaldo Carvalho de Melo spin_unlock(&twdr->death_lock); 274696ab2d3SArnaldo Carvalho de Melo __inet_twsk_kill(tw, twdr->hashinfo); 275696ab2d3SArnaldo Carvalho de Melo } 276696ab2d3SArnaldo Carvalho de Melo 277696ab2d3SArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_twsk_deschedule); 278696ab2d3SArnaldo Carvalho de Melo 279696ab2d3SArnaldo Carvalho de Melo void inet_twsk_schedule(struct inet_timewait_sock *tw, 280696ab2d3SArnaldo Carvalho de Melo struct inet_timewait_death_row *twdr, 281696ab2d3SArnaldo Carvalho de Melo const int timeo, const int timewait_len) 282696ab2d3SArnaldo Carvalho de Melo { 283696ab2d3SArnaldo Carvalho de Melo struct hlist_head *list; 284696ab2d3SArnaldo Carvalho de Melo int slot; 285696ab2d3SArnaldo Carvalho de Melo 286696ab2d3SArnaldo Carvalho de Melo /* timeout := RTO * 3.5 287696ab2d3SArnaldo Carvalho de Melo * 288696ab2d3SArnaldo Carvalho de Melo * 3.5 = 1+2+0.5 to wait for two retransmits. 289696ab2d3SArnaldo Carvalho de Melo * 290696ab2d3SArnaldo Carvalho de Melo * RATIONALE: if FIN arrived and we entered TIME-WAIT state, 291696ab2d3SArnaldo Carvalho de Melo * our ACK acking that FIN can be lost. If N subsequent retransmitted 292696ab2d3SArnaldo Carvalho de Melo * FINs (or previous seqments) are lost (probability of such event 293696ab2d3SArnaldo Carvalho de Melo * is p^(N+1), where p is probability to lose single packet and 294696ab2d3SArnaldo Carvalho de Melo * time to detect the loss is about RTO*(2^N - 1) with exponential 295696ab2d3SArnaldo Carvalho de Melo * backoff). Normal timewait length is calculated so, that we 296696ab2d3SArnaldo Carvalho de Melo * waited at least for one retransmitted FIN (maximal RTO is 120sec). 297696ab2d3SArnaldo Carvalho de Melo * [ BTW Linux. following BSD, violates this requirement waiting 298696ab2d3SArnaldo Carvalho de Melo * only for 60sec, we should wait at least for 240 secs. 299696ab2d3SArnaldo Carvalho de Melo * Well, 240 consumes too much of resources 8) 300696ab2d3SArnaldo Carvalho de Melo * ] 301696ab2d3SArnaldo Carvalho de Melo * This interval is not reduced to catch old duplicate and 302696ab2d3SArnaldo Carvalho de Melo * responces to our wandering segments living for two MSLs. 303696ab2d3SArnaldo Carvalho de Melo * However, if we use PAWS to detect 304696ab2d3SArnaldo Carvalho de Melo * old duplicates, we can reduce the interval to bounds required 305696ab2d3SArnaldo Carvalho de Melo * by RTO, rather than MSL. So, if peer understands PAWS, we 306696ab2d3SArnaldo Carvalho de Melo * kill tw bucket after 3.5*RTO (it is important that this number 307696ab2d3SArnaldo Carvalho de Melo * is greater than TS tick!) and detect old duplicates with help 308696ab2d3SArnaldo Carvalho de Melo * of PAWS. 309696ab2d3SArnaldo Carvalho de Melo */ 310696ab2d3SArnaldo Carvalho de Melo slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK; 311696ab2d3SArnaldo Carvalho de Melo 312696ab2d3SArnaldo Carvalho de Melo spin_lock(&twdr->death_lock); 313696ab2d3SArnaldo Carvalho de Melo 314696ab2d3SArnaldo Carvalho de Melo /* Unlink it, if it was scheduled */ 315696ab2d3SArnaldo Carvalho de Melo if (inet_twsk_del_dead_node(tw)) 316696ab2d3SArnaldo Carvalho de Melo twdr->tw_count--; 317696ab2d3SArnaldo Carvalho de Melo else 318696ab2d3SArnaldo Carvalho de Melo atomic_inc(&tw->tw_refcnt); 319696ab2d3SArnaldo Carvalho de Melo 320696ab2d3SArnaldo Carvalho de Melo if (slot >= INET_TWDR_RECYCLE_SLOTS) { 321696ab2d3SArnaldo Carvalho de Melo /* Schedule to slow timer */ 322696ab2d3SArnaldo Carvalho de Melo if (timeo >= timewait_len) { 323696ab2d3SArnaldo Carvalho de Melo slot = INET_TWDR_TWKILL_SLOTS - 1; 324696ab2d3SArnaldo Carvalho de Melo } else { 325172589ccSIlpo Järvinen slot = DIV_ROUND_UP(timeo, twdr->period); 326696ab2d3SArnaldo Carvalho de Melo if (slot >= INET_TWDR_TWKILL_SLOTS) 327696ab2d3SArnaldo Carvalho de Melo slot = INET_TWDR_TWKILL_SLOTS - 1; 328696ab2d3SArnaldo Carvalho de Melo } 329696ab2d3SArnaldo Carvalho de Melo tw->tw_ttd = jiffies + timeo; 330696ab2d3SArnaldo Carvalho de Melo slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1); 331696ab2d3SArnaldo Carvalho de Melo list = &twdr->cells[slot]; 332696ab2d3SArnaldo Carvalho de Melo } else { 333696ab2d3SArnaldo Carvalho de Melo tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK); 334696ab2d3SArnaldo Carvalho de Melo 335696ab2d3SArnaldo Carvalho de Melo if (twdr->twcal_hand < 0) { 336696ab2d3SArnaldo Carvalho de Melo twdr->twcal_hand = 0; 337696ab2d3SArnaldo Carvalho de Melo twdr->twcal_jiffie = jiffies; 338696ab2d3SArnaldo Carvalho de Melo twdr->twcal_timer.expires = twdr->twcal_jiffie + 339696ab2d3SArnaldo Carvalho de Melo (slot << INET_TWDR_RECYCLE_TICK); 340696ab2d3SArnaldo Carvalho de Melo add_timer(&twdr->twcal_timer); 341696ab2d3SArnaldo Carvalho de Melo } else { 342696ab2d3SArnaldo Carvalho de Melo if (time_after(twdr->twcal_timer.expires, 343696ab2d3SArnaldo Carvalho de Melo jiffies + (slot << INET_TWDR_RECYCLE_TICK))) 344696ab2d3SArnaldo Carvalho de Melo mod_timer(&twdr->twcal_timer, 345696ab2d3SArnaldo Carvalho de Melo jiffies + (slot << INET_TWDR_RECYCLE_TICK)); 346696ab2d3SArnaldo Carvalho de Melo slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1); 347696ab2d3SArnaldo Carvalho de Melo } 348696ab2d3SArnaldo Carvalho de Melo list = &twdr->twcal_row[slot]; 349696ab2d3SArnaldo Carvalho de Melo } 350696ab2d3SArnaldo Carvalho de Melo 351696ab2d3SArnaldo Carvalho de Melo hlist_add_head(&tw->tw_death_node, list); 352696ab2d3SArnaldo Carvalho de Melo 353696ab2d3SArnaldo Carvalho de Melo if (twdr->tw_count++ == 0) 354696ab2d3SArnaldo Carvalho de Melo mod_timer(&twdr->tw_timer, jiffies + twdr->period); 355696ab2d3SArnaldo Carvalho de Melo spin_unlock(&twdr->death_lock); 356696ab2d3SArnaldo Carvalho de Melo } 357696ab2d3SArnaldo Carvalho de Melo 358696ab2d3SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_twsk_schedule); 359696ab2d3SArnaldo Carvalho de Melo 360696ab2d3SArnaldo Carvalho de Melo void inet_twdr_twcal_tick(unsigned long data) 361696ab2d3SArnaldo Carvalho de Melo { 362696ab2d3SArnaldo Carvalho de Melo struct inet_timewait_death_row *twdr; 363696ab2d3SArnaldo Carvalho de Melo int n, slot; 364696ab2d3SArnaldo Carvalho de Melo unsigned long j; 365696ab2d3SArnaldo Carvalho de Melo unsigned long now = jiffies; 366696ab2d3SArnaldo Carvalho de Melo int killed = 0; 367696ab2d3SArnaldo Carvalho de Melo int adv = 0; 368696ab2d3SArnaldo Carvalho de Melo 369696ab2d3SArnaldo Carvalho de Melo twdr = (struct inet_timewait_death_row *)data; 370696ab2d3SArnaldo Carvalho de Melo 371696ab2d3SArnaldo Carvalho de Melo spin_lock(&twdr->death_lock); 372696ab2d3SArnaldo Carvalho de Melo if (twdr->twcal_hand < 0) 373696ab2d3SArnaldo Carvalho de Melo goto out; 374696ab2d3SArnaldo Carvalho de Melo 375696ab2d3SArnaldo Carvalho de Melo slot = twdr->twcal_hand; 376696ab2d3SArnaldo Carvalho de Melo j = twdr->twcal_jiffie; 377696ab2d3SArnaldo Carvalho de Melo 378696ab2d3SArnaldo Carvalho de Melo for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) { 379696ab2d3SArnaldo Carvalho de Melo if (time_before_eq(j, now)) { 380696ab2d3SArnaldo Carvalho de Melo struct hlist_node *node, *safe; 381696ab2d3SArnaldo Carvalho de Melo struct inet_timewait_sock *tw; 382696ab2d3SArnaldo Carvalho de Melo 383696ab2d3SArnaldo Carvalho de Melo inet_twsk_for_each_inmate_safe(tw, node, safe, 384696ab2d3SArnaldo Carvalho de Melo &twdr->twcal_row[slot]) { 385696ab2d3SArnaldo Carvalho de Melo __inet_twsk_del_dead_node(tw); 386696ab2d3SArnaldo Carvalho de Melo __inet_twsk_kill(tw, twdr->hashinfo); 387f2bf415cSPavel Emelyanov #ifdef CONFIG_NET_NS 388f2bf415cSPavel Emelyanov NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED); 389f2bf415cSPavel Emelyanov #endif 390696ab2d3SArnaldo Carvalho de Melo inet_twsk_put(tw); 391696ab2d3SArnaldo Carvalho de Melo killed++; 392696ab2d3SArnaldo Carvalho de Melo } 393696ab2d3SArnaldo Carvalho de Melo } else { 394696ab2d3SArnaldo Carvalho de Melo if (!adv) { 395696ab2d3SArnaldo Carvalho de Melo adv = 1; 396696ab2d3SArnaldo Carvalho de Melo twdr->twcal_jiffie = j; 397696ab2d3SArnaldo Carvalho de Melo twdr->twcal_hand = slot; 398696ab2d3SArnaldo Carvalho de Melo } 399696ab2d3SArnaldo Carvalho de Melo 400696ab2d3SArnaldo Carvalho de Melo if (!hlist_empty(&twdr->twcal_row[slot])) { 401696ab2d3SArnaldo Carvalho de Melo mod_timer(&twdr->twcal_timer, j); 402696ab2d3SArnaldo Carvalho de Melo goto out; 403696ab2d3SArnaldo Carvalho de Melo } 404696ab2d3SArnaldo Carvalho de Melo } 405696ab2d3SArnaldo Carvalho de Melo j += 1 << INET_TWDR_RECYCLE_TICK; 406696ab2d3SArnaldo Carvalho de Melo slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1); 407696ab2d3SArnaldo Carvalho de Melo } 408696ab2d3SArnaldo Carvalho de Melo twdr->twcal_hand = -1; 409696ab2d3SArnaldo Carvalho de Melo 410696ab2d3SArnaldo Carvalho de Melo out: 411696ab2d3SArnaldo Carvalho de Melo if ((twdr->tw_count -= killed) == 0) 412696ab2d3SArnaldo Carvalho de Melo del_timer(&twdr->tw_timer); 413f2bf415cSPavel Emelyanov #ifndef CONFIG_NET_NS 414f2bf415cSPavel Emelyanov NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITKILLED, killed); 415f2bf415cSPavel Emelyanov #endif 416696ab2d3SArnaldo Carvalho de Melo spin_unlock(&twdr->death_lock); 417696ab2d3SArnaldo Carvalho de Melo } 418696ab2d3SArnaldo Carvalho de Melo 419696ab2d3SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); 420d315492bSDaniel Lezcano 421d315492bSDaniel Lezcano void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, 422d315492bSDaniel Lezcano struct inet_timewait_death_row *twdr, int family) 423d315492bSDaniel Lezcano { 424d315492bSDaniel Lezcano struct inet_timewait_sock *tw; 425d315492bSDaniel Lezcano struct sock *sk; 4263ab5aee7SEric Dumazet struct hlist_nulls_node *node; 427d315492bSDaniel Lezcano int h; 428d315492bSDaniel Lezcano 429d315492bSDaniel Lezcano local_bh_disable(); 430d315492bSDaniel Lezcano for (h = 0; h < (hashinfo->ehash_size); h++) { 431d315492bSDaniel Lezcano struct inet_ehash_bucket *head = 432d315492bSDaniel Lezcano inet_ehash_bucket(hashinfo, h); 4339db66bdcSEric Dumazet spinlock_t *lock = inet_ehash_lockp(hashinfo, h); 434d315492bSDaniel Lezcano restart: 4359db66bdcSEric Dumazet spin_lock(lock); 4363ab5aee7SEric Dumazet sk_nulls_for_each(sk, node, &head->twchain) { 437d315492bSDaniel Lezcano 438d315492bSDaniel Lezcano tw = inet_twsk(sk); 439d315492bSDaniel Lezcano if (!net_eq(twsk_net(tw), net) || 440d315492bSDaniel Lezcano tw->tw_family != family) 441d315492bSDaniel Lezcano continue; 442d315492bSDaniel Lezcano 443d315492bSDaniel Lezcano atomic_inc(&tw->tw_refcnt); 4449db66bdcSEric Dumazet spin_unlock(lock); 445d315492bSDaniel Lezcano inet_twsk_deschedule(tw, twdr); 446d315492bSDaniel Lezcano inet_twsk_put(tw); 447d315492bSDaniel Lezcano 448d315492bSDaniel Lezcano goto restart; 449d315492bSDaniel Lezcano } 4509db66bdcSEric Dumazet spin_unlock(lock); 451d315492bSDaniel Lezcano } 452d315492bSDaniel Lezcano local_bh_enable(); 453d315492bSDaniel Lezcano } 454d315492bSDaniel Lezcano EXPORT_SYMBOL_GPL(inet_twsk_purge); 455