1e48c414eSArnaldo Carvalho de Melo /* 2e48c414eSArnaldo Carvalho de Melo * INET An implementation of the TCP/IP protocol suite for the LINUX 3e48c414eSArnaldo Carvalho de Melo * operating system. INET is implemented using the BSD Socket 4e48c414eSArnaldo Carvalho de Melo * interface as the means of communication with the user level. 5e48c414eSArnaldo Carvalho de Melo * 6e48c414eSArnaldo Carvalho de Melo * Generic TIME_WAIT sockets functions 7e48c414eSArnaldo Carvalho de Melo * 8e48c414eSArnaldo Carvalho de Melo * From code orinally in TCP 9e48c414eSArnaldo Carvalho de Melo */ 10e48c414eSArnaldo Carvalho de Melo 11172589ccSIlpo Järvinen #include <linux/kernel.h> 12e48c414eSArnaldo Carvalho de Melo #include <net/inet_hashtables.h> 13e48c414eSArnaldo Carvalho de Melo #include <net/inet_timewait_sock.h> 14696ab2d3SArnaldo Carvalho de Melo #include <net/ip.h> 15e48c414eSArnaldo Carvalho de Melo 16e48c414eSArnaldo Carvalho de Melo /* Must be called with locally disabled BHs. */ 17acd159b6SAdrian Bunk static void __inet_twsk_kill(struct inet_timewait_sock *tw, 18acd159b6SAdrian Bunk struct inet_hashinfo *hashinfo) 19e48c414eSArnaldo Carvalho de Melo { 20e48c414eSArnaldo Carvalho de Melo struct inet_bind_hashbucket *bhead; 21e48c414eSArnaldo Carvalho de Melo struct inet_bind_bucket *tb; 22e48c414eSArnaldo Carvalho de Melo /* Unlink from established hashes. */ 23230140cfSEric Dumazet rwlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); 24e48c414eSArnaldo Carvalho de Melo 25230140cfSEric Dumazet write_lock(lock); 26e48c414eSArnaldo Carvalho de Melo if (hlist_unhashed(&tw->tw_node)) { 27230140cfSEric Dumazet write_unlock(lock); 28e48c414eSArnaldo Carvalho de Melo return; 29e48c414eSArnaldo Carvalho de Melo } 30e48c414eSArnaldo Carvalho de Melo __hlist_del(&tw->tw_node); 31e48c414eSArnaldo Carvalho de Melo sk_node_init(&tw->tw_node); 32230140cfSEric Dumazet write_unlock(lock); 33e48c414eSArnaldo Carvalho de Melo 34e48c414eSArnaldo Carvalho de Melo /* Disassociate with bind bucket. */ 357f635ab7SPavel Emelyanov bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, 367f635ab7SPavel Emelyanov hashinfo->bhash_size)]; 37e48c414eSArnaldo Carvalho de Melo spin_lock(&bhead->lock); 38e48c414eSArnaldo Carvalho de Melo tb = tw->tw_tb; 39e48c414eSArnaldo Carvalho de Melo __hlist_del(&tw->tw_bind_node); 40e48c414eSArnaldo Carvalho de Melo tw->tw_tb = NULL; 41e48c414eSArnaldo Carvalho de Melo inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); 42e48c414eSArnaldo Carvalho de Melo spin_unlock(&bhead->lock); 43e48c414eSArnaldo Carvalho de Melo #ifdef SOCK_REFCNT_DEBUG 44e48c414eSArnaldo Carvalho de Melo if (atomic_read(&tw->tw_refcnt) != 1) { 45e48c414eSArnaldo Carvalho de Melo printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n", 46e48c414eSArnaldo Carvalho de Melo tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); 47e48c414eSArnaldo Carvalho de Melo } 48e48c414eSArnaldo Carvalho de Melo #endif 49e48c414eSArnaldo Carvalho de Melo inet_twsk_put(tw); 50e48c414eSArnaldo Carvalho de Melo } 51e48c414eSArnaldo Carvalho de Melo 527054fb93SPavel Emelyanov void inet_twsk_put(struct inet_timewait_sock *tw) 537054fb93SPavel Emelyanov { 547054fb93SPavel Emelyanov if (atomic_dec_and_test(&tw->tw_refcnt)) { 557054fb93SPavel Emelyanov struct module *owner = tw->tw_prot->owner; 567054fb93SPavel Emelyanov twsk_destructor((struct sock *)tw); 577054fb93SPavel Emelyanov #ifdef SOCK_REFCNT_DEBUG 587054fb93SPavel Emelyanov printk(KERN_DEBUG "%s timewait_sock %p released\n", 597054fb93SPavel Emelyanov tw->tw_prot->name, tw); 607054fb93SPavel Emelyanov #endif 61cd5342d9SDenis V. Lunev release_net(twsk_net(tw)); 627054fb93SPavel Emelyanov kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw); 637054fb93SPavel Emelyanov module_put(owner); 647054fb93SPavel Emelyanov } 657054fb93SPavel Emelyanov } 667054fb93SPavel Emelyanov EXPORT_SYMBOL_GPL(inet_twsk_put); 677054fb93SPavel Emelyanov 68e48c414eSArnaldo Carvalho de Melo /* 69e48c414eSArnaldo Carvalho de Melo * Enter the time wait state. This is called with locally disabled BH. 70e48c414eSArnaldo Carvalho de Melo * Essentially we whip up a timewait bucket, copy the relevant info into it 71e48c414eSArnaldo Carvalho de Melo * from the SK, and mess with hash chains and list linkage. 72e48c414eSArnaldo Carvalho de Melo */ 73e48c414eSArnaldo Carvalho de Melo void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, 74e48c414eSArnaldo Carvalho de Melo struct inet_hashinfo *hashinfo) 75e48c414eSArnaldo Carvalho de Melo { 76e48c414eSArnaldo Carvalho de Melo const struct inet_sock *inet = inet_sk(sk); 77463c84b9SArnaldo Carvalho de Melo const struct inet_connection_sock *icsk = inet_csk(sk); 7881c3d547SEric Dumazet struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); 79230140cfSEric Dumazet rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); 80e48c414eSArnaldo Carvalho de Melo struct inet_bind_hashbucket *bhead; 81e48c414eSArnaldo Carvalho de Melo /* Step 1: Put TW into bind hash. Original socket stays there too. 82e48c414eSArnaldo Carvalho de Melo Note, that any socket with inet->num != 0 MUST be bound in 83e48c414eSArnaldo Carvalho de Melo binding cache, even if it is closed. 84e48c414eSArnaldo Carvalho de Melo */ 857f635ab7SPavel Emelyanov bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->num, 867f635ab7SPavel Emelyanov hashinfo->bhash_size)]; 87e48c414eSArnaldo Carvalho de Melo spin_lock(&bhead->lock); 88463c84b9SArnaldo Carvalho de Melo tw->tw_tb = icsk->icsk_bind_hash; 89463c84b9SArnaldo Carvalho de Melo BUG_TRAP(icsk->icsk_bind_hash); 90e48c414eSArnaldo Carvalho de Melo inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); 91e48c414eSArnaldo Carvalho de Melo spin_unlock(&bhead->lock); 92e48c414eSArnaldo Carvalho de Melo 93230140cfSEric Dumazet write_lock(lock); 94e48c414eSArnaldo Carvalho de Melo 95e48c414eSArnaldo Carvalho de Melo /* Step 2: Remove SK from established hash. */ 96e48c414eSArnaldo Carvalho de Melo if (__sk_del_node_init(sk)) 97c29a0bc4SPavel Emelyanov sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 98e48c414eSArnaldo Carvalho de Melo 99dbca9b27SEric Dumazet /* Step 3: Hash TW into TIMEWAIT chain. */ 100dbca9b27SEric Dumazet inet_twsk_add_node(tw, &ehead->twchain); 101e48c414eSArnaldo Carvalho de Melo atomic_inc(&tw->tw_refcnt); 102e48c414eSArnaldo Carvalho de Melo 103230140cfSEric Dumazet write_unlock(lock); 104e48c414eSArnaldo Carvalho de Melo } 105c676270bSArnaldo Carvalho de Melo 106696ab2d3SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); 107696ab2d3SArnaldo Carvalho de Melo 108c676270bSArnaldo Carvalho de Melo struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state) 109c676270bSArnaldo Carvalho de Melo { 1106d6ee43eSArnaldo Carvalho de Melo struct inet_timewait_sock *tw = 1116d6ee43eSArnaldo Carvalho de Melo kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, 11254e6ecb2SChristoph Lameter GFP_ATOMIC); 113c676270bSArnaldo Carvalho de Melo if (tw != NULL) { 114c676270bSArnaldo Carvalho de Melo const struct inet_sock *inet = inet_sk(sk); 115c676270bSArnaldo Carvalho de Melo 116c676270bSArnaldo Carvalho de Melo /* Give us an identity. */ 117c676270bSArnaldo Carvalho de Melo tw->tw_daddr = inet->daddr; 118c676270bSArnaldo Carvalho de Melo tw->tw_rcv_saddr = inet->rcv_saddr; 119c676270bSArnaldo Carvalho de Melo tw->tw_bound_dev_if = sk->sk_bound_dev_if; 120c676270bSArnaldo Carvalho de Melo tw->tw_num = inet->num; 121c676270bSArnaldo Carvalho de Melo tw->tw_state = TCP_TIME_WAIT; 122c676270bSArnaldo Carvalho de Melo tw->tw_substate = state; 123c676270bSArnaldo Carvalho de Melo tw->tw_sport = inet->sport; 124c676270bSArnaldo Carvalho de Melo tw->tw_dport = inet->dport; 125c676270bSArnaldo Carvalho de Melo tw->tw_family = sk->sk_family; 126c676270bSArnaldo Carvalho de Melo tw->tw_reuse = sk->sk_reuse; 12781c3d547SEric Dumazet tw->tw_hash = sk->sk_hash; 128c676270bSArnaldo Carvalho de Melo tw->tw_ipv6only = 0; 129c676270bSArnaldo Carvalho de Melo tw->tw_prot = sk->sk_prot_creator; 130cd5342d9SDenis V. Lunev twsk_net_set(tw, hold_net(sock_net(sk))); 131c676270bSArnaldo Carvalho de Melo atomic_set(&tw->tw_refcnt, 1); 132c676270bSArnaldo Carvalho de Melo inet_twsk_dead_node_init(tw); 133eeb2b856SArnaldo Carvalho de Melo __module_get(tw->tw_prot->owner); 134c676270bSArnaldo Carvalho de Melo } 135c676270bSArnaldo Carvalho de Melo 136c676270bSArnaldo Carvalho de Melo return tw; 137c676270bSArnaldo Carvalho de Melo } 138696ab2d3SArnaldo Carvalho de Melo 139696ab2d3SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_twsk_alloc); 140696ab2d3SArnaldo Carvalho de Melo 141696ab2d3SArnaldo Carvalho de Melo /* Returns non-zero if quota exceeded. */ 142696ab2d3SArnaldo Carvalho de Melo static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, 143696ab2d3SArnaldo Carvalho de Melo const int slot) 144696ab2d3SArnaldo Carvalho de Melo { 145696ab2d3SArnaldo Carvalho de Melo struct inet_timewait_sock *tw; 146696ab2d3SArnaldo Carvalho de Melo struct hlist_node *node; 147696ab2d3SArnaldo Carvalho de Melo unsigned int killed; 148696ab2d3SArnaldo Carvalho de Melo int ret; 149696ab2d3SArnaldo Carvalho de Melo 150696ab2d3SArnaldo Carvalho de Melo /* NOTE: compare this to previous version where lock 151696ab2d3SArnaldo Carvalho de Melo * was released after detaching chain. It was racy, 152696ab2d3SArnaldo Carvalho de Melo * because tw buckets are scheduled in not serialized context 153696ab2d3SArnaldo Carvalho de Melo * in 2.3 (with netfilter), and with softnet it is common, because 154696ab2d3SArnaldo Carvalho de Melo * soft irqs are not sequenced. 155696ab2d3SArnaldo Carvalho de Melo */ 156696ab2d3SArnaldo Carvalho de Melo killed = 0; 157696ab2d3SArnaldo Carvalho de Melo ret = 0; 158696ab2d3SArnaldo Carvalho de Melo rescan: 159696ab2d3SArnaldo Carvalho de Melo inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) { 160696ab2d3SArnaldo Carvalho de Melo __inet_twsk_del_dead_node(tw); 161696ab2d3SArnaldo Carvalho de Melo spin_unlock(&twdr->death_lock); 162696ab2d3SArnaldo Carvalho de Melo __inet_twsk_kill(tw, twdr->hashinfo); 163*f2bf415cSPavel Emelyanov #ifdef CONFIG_NET_NS 164*f2bf415cSPavel Emelyanov NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED); 165*f2bf415cSPavel Emelyanov #endif 166696ab2d3SArnaldo Carvalho de Melo inet_twsk_put(tw); 167696ab2d3SArnaldo Carvalho de Melo killed++; 168696ab2d3SArnaldo Carvalho de Melo spin_lock(&twdr->death_lock); 169696ab2d3SArnaldo Carvalho de Melo if (killed > INET_TWDR_TWKILL_QUOTA) { 170696ab2d3SArnaldo Carvalho de Melo ret = 1; 171696ab2d3SArnaldo Carvalho de Melo break; 172696ab2d3SArnaldo Carvalho de Melo } 173696ab2d3SArnaldo Carvalho de Melo 174696ab2d3SArnaldo Carvalho de Melo /* While we dropped twdr->death_lock, another cpu may have 175696ab2d3SArnaldo Carvalho de Melo * killed off the next TW bucket in the list, therefore 176696ab2d3SArnaldo Carvalho de Melo * do a fresh re-read of the hlist head node with the 177696ab2d3SArnaldo Carvalho de Melo * lock reacquired. We still use the hlist traversal 178696ab2d3SArnaldo Carvalho de Melo * macro in order to get the prefetches. 179696ab2d3SArnaldo Carvalho de Melo */ 180696ab2d3SArnaldo Carvalho de Melo goto rescan; 181696ab2d3SArnaldo Carvalho de Melo } 182696ab2d3SArnaldo Carvalho de Melo 183696ab2d3SArnaldo Carvalho de Melo twdr->tw_count -= killed; 184*f2bf415cSPavel Emelyanov #ifndef CONFIG_NET_NS 185*f2bf415cSPavel Emelyanov NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITED, killed); 186*f2bf415cSPavel Emelyanov #endif 187696ab2d3SArnaldo Carvalho de Melo return ret; 188696ab2d3SArnaldo Carvalho de Melo } 189696ab2d3SArnaldo Carvalho de Melo 190696ab2d3SArnaldo Carvalho de Melo void inet_twdr_hangman(unsigned long data) 191696ab2d3SArnaldo Carvalho de Melo { 192696ab2d3SArnaldo Carvalho de Melo struct inet_timewait_death_row *twdr; 193696ab2d3SArnaldo Carvalho de Melo int unsigned need_timer; 194696ab2d3SArnaldo Carvalho de Melo 195696ab2d3SArnaldo Carvalho de Melo twdr = (struct inet_timewait_death_row *)data; 196696ab2d3SArnaldo Carvalho de Melo spin_lock(&twdr->death_lock); 197696ab2d3SArnaldo Carvalho de Melo 198696ab2d3SArnaldo Carvalho de Melo if (twdr->tw_count == 0) 199696ab2d3SArnaldo Carvalho de Melo goto out; 200696ab2d3SArnaldo Carvalho de Melo 201696ab2d3SArnaldo Carvalho de Melo need_timer = 0; 202696ab2d3SArnaldo Carvalho de Melo if (inet_twdr_do_twkill_work(twdr, twdr->slot)) { 203696ab2d3SArnaldo Carvalho de Melo twdr->thread_slots |= (1 << twdr->slot); 204696ab2d3SArnaldo Carvalho de Melo schedule_work(&twdr->twkill_work); 205696ab2d3SArnaldo Carvalho de Melo need_timer = 1; 206696ab2d3SArnaldo Carvalho de Melo } else { 207696ab2d3SArnaldo Carvalho de Melo /* We purged the entire slot, anything left? */ 208696ab2d3SArnaldo Carvalho de Melo if (twdr->tw_count) 209696ab2d3SArnaldo Carvalho de Melo need_timer = 1; 210696ab2d3SArnaldo Carvalho de Melo } 211696ab2d3SArnaldo Carvalho de Melo twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1)); 212696ab2d3SArnaldo Carvalho de Melo if (need_timer) 213696ab2d3SArnaldo Carvalho de Melo mod_timer(&twdr->tw_timer, jiffies + twdr->period); 214696ab2d3SArnaldo Carvalho de Melo out: 215696ab2d3SArnaldo Carvalho de Melo spin_unlock(&twdr->death_lock); 216696ab2d3SArnaldo Carvalho de Melo } 217696ab2d3SArnaldo Carvalho de Melo 218696ab2d3SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_twdr_hangman); 219696ab2d3SArnaldo Carvalho de Melo 22065f27f38SDavid Howells void inet_twdr_twkill_work(struct work_struct *work) 221696ab2d3SArnaldo Carvalho de Melo { 22265f27f38SDavid Howells struct inet_timewait_death_row *twdr = 22365f27f38SDavid Howells container_of(work, struct inet_timewait_death_row, twkill_work); 224696ab2d3SArnaldo Carvalho de Melo int i; 225696ab2d3SArnaldo Carvalho de Melo 22695c9382aSPavel Emelyanov BUILD_BUG_ON((INET_TWDR_TWKILL_SLOTS - 1) > 22795c9382aSPavel Emelyanov (sizeof(twdr->thread_slots) * 8)); 228696ab2d3SArnaldo Carvalho de Melo 229696ab2d3SArnaldo Carvalho de Melo while (twdr->thread_slots) { 230696ab2d3SArnaldo Carvalho de Melo spin_lock_bh(&twdr->death_lock); 231696ab2d3SArnaldo Carvalho de Melo for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) { 232696ab2d3SArnaldo Carvalho de Melo if (!(twdr->thread_slots & (1 << i))) 233696ab2d3SArnaldo Carvalho de Melo continue; 234696ab2d3SArnaldo Carvalho de Melo 235696ab2d3SArnaldo Carvalho de Melo while (inet_twdr_do_twkill_work(twdr, i) != 0) { 236696ab2d3SArnaldo Carvalho de Melo if (need_resched()) { 237696ab2d3SArnaldo Carvalho de Melo spin_unlock_bh(&twdr->death_lock); 238696ab2d3SArnaldo Carvalho de Melo schedule(); 239696ab2d3SArnaldo Carvalho de Melo spin_lock_bh(&twdr->death_lock); 240696ab2d3SArnaldo Carvalho de Melo } 241696ab2d3SArnaldo Carvalho de Melo } 242696ab2d3SArnaldo Carvalho de Melo 243696ab2d3SArnaldo Carvalho de Melo twdr->thread_slots &= ~(1 << i); 244696ab2d3SArnaldo Carvalho de Melo } 245696ab2d3SArnaldo Carvalho de Melo spin_unlock_bh(&twdr->death_lock); 246696ab2d3SArnaldo Carvalho de Melo } 247696ab2d3SArnaldo Carvalho de Melo } 248696ab2d3SArnaldo Carvalho de Melo 249696ab2d3SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_twdr_twkill_work); 250696ab2d3SArnaldo Carvalho de Melo 251696ab2d3SArnaldo Carvalho de Melo /* These are always called from BH context. See callers in 252696ab2d3SArnaldo Carvalho de Melo * tcp_input.c to verify this. 253696ab2d3SArnaldo Carvalho de Melo */ 254696ab2d3SArnaldo Carvalho de Melo 255696ab2d3SArnaldo Carvalho de Melo /* This is for handling early-kills of TIME_WAIT sockets. */ 256696ab2d3SArnaldo Carvalho de Melo void inet_twsk_deschedule(struct inet_timewait_sock *tw, 257696ab2d3SArnaldo Carvalho de Melo struct inet_timewait_death_row *twdr) 258696ab2d3SArnaldo Carvalho de Melo { 259696ab2d3SArnaldo Carvalho de Melo spin_lock(&twdr->death_lock); 260696ab2d3SArnaldo Carvalho de Melo if (inet_twsk_del_dead_node(tw)) { 261696ab2d3SArnaldo Carvalho de Melo inet_twsk_put(tw); 262696ab2d3SArnaldo Carvalho de Melo if (--twdr->tw_count == 0) 263696ab2d3SArnaldo Carvalho de Melo del_timer(&twdr->tw_timer); 264696ab2d3SArnaldo Carvalho de Melo } 265696ab2d3SArnaldo Carvalho de Melo spin_unlock(&twdr->death_lock); 266696ab2d3SArnaldo Carvalho de Melo __inet_twsk_kill(tw, twdr->hashinfo); 267696ab2d3SArnaldo Carvalho de Melo } 268696ab2d3SArnaldo Carvalho de Melo 269696ab2d3SArnaldo Carvalho de Melo EXPORT_SYMBOL(inet_twsk_deschedule); 270696ab2d3SArnaldo Carvalho de Melo 271696ab2d3SArnaldo Carvalho de Melo void inet_twsk_schedule(struct inet_timewait_sock *tw, 272696ab2d3SArnaldo Carvalho de Melo struct inet_timewait_death_row *twdr, 273696ab2d3SArnaldo Carvalho de Melo const int timeo, const int timewait_len) 274696ab2d3SArnaldo Carvalho de Melo { 275696ab2d3SArnaldo Carvalho de Melo struct hlist_head *list; 276696ab2d3SArnaldo Carvalho de Melo int slot; 277696ab2d3SArnaldo Carvalho de Melo 278696ab2d3SArnaldo Carvalho de Melo /* timeout := RTO * 3.5 279696ab2d3SArnaldo Carvalho de Melo * 280696ab2d3SArnaldo Carvalho de Melo * 3.5 = 1+2+0.5 to wait for two retransmits. 281696ab2d3SArnaldo Carvalho de Melo * 282696ab2d3SArnaldo Carvalho de Melo * RATIONALE: if FIN arrived and we entered TIME-WAIT state, 283696ab2d3SArnaldo Carvalho de Melo * our ACK acking that FIN can be lost. If N subsequent retransmitted 284696ab2d3SArnaldo Carvalho de Melo * FINs (or previous seqments) are lost (probability of such event 285696ab2d3SArnaldo Carvalho de Melo * is p^(N+1), where p is probability to lose single packet and 286696ab2d3SArnaldo Carvalho de Melo * time to detect the loss is about RTO*(2^N - 1) with exponential 287696ab2d3SArnaldo Carvalho de Melo * backoff). Normal timewait length is calculated so, that we 288696ab2d3SArnaldo Carvalho de Melo * waited at least for one retransmitted FIN (maximal RTO is 120sec). 289696ab2d3SArnaldo Carvalho de Melo * [ BTW Linux. following BSD, violates this requirement waiting 290696ab2d3SArnaldo Carvalho de Melo * only for 60sec, we should wait at least for 240 secs. 291696ab2d3SArnaldo Carvalho de Melo * Well, 240 consumes too much of resources 8) 292696ab2d3SArnaldo Carvalho de Melo * ] 293696ab2d3SArnaldo Carvalho de Melo * This interval is not reduced to catch old duplicate and 294696ab2d3SArnaldo Carvalho de Melo * responces to our wandering segments living for two MSLs. 295696ab2d3SArnaldo Carvalho de Melo * However, if we use PAWS to detect 296696ab2d3SArnaldo Carvalho de Melo * old duplicates, we can reduce the interval to bounds required 297696ab2d3SArnaldo Carvalho de Melo * by RTO, rather than MSL. So, if peer understands PAWS, we 298696ab2d3SArnaldo Carvalho de Melo * kill tw bucket after 3.5*RTO (it is important that this number 299696ab2d3SArnaldo Carvalho de Melo * is greater than TS tick!) and detect old duplicates with help 300696ab2d3SArnaldo Carvalho de Melo * of PAWS. 301696ab2d3SArnaldo Carvalho de Melo */ 302696ab2d3SArnaldo Carvalho de Melo slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK; 303696ab2d3SArnaldo Carvalho de Melo 304696ab2d3SArnaldo Carvalho de Melo spin_lock(&twdr->death_lock); 305696ab2d3SArnaldo Carvalho de Melo 306696ab2d3SArnaldo Carvalho de Melo /* Unlink it, if it was scheduled */ 307696ab2d3SArnaldo Carvalho de Melo if (inet_twsk_del_dead_node(tw)) 308696ab2d3SArnaldo Carvalho de Melo twdr->tw_count--; 309696ab2d3SArnaldo Carvalho de Melo else 310696ab2d3SArnaldo Carvalho de Melo atomic_inc(&tw->tw_refcnt); 311696ab2d3SArnaldo Carvalho de Melo 312696ab2d3SArnaldo Carvalho de Melo if (slot >= INET_TWDR_RECYCLE_SLOTS) { 313696ab2d3SArnaldo Carvalho de Melo /* Schedule to slow timer */ 314696ab2d3SArnaldo Carvalho de Melo if (timeo >= timewait_len) { 315696ab2d3SArnaldo Carvalho de Melo slot = INET_TWDR_TWKILL_SLOTS - 1; 316696ab2d3SArnaldo Carvalho de Melo } else { 317172589ccSIlpo Järvinen slot = DIV_ROUND_UP(timeo, twdr->period); 318696ab2d3SArnaldo Carvalho de Melo if (slot >= INET_TWDR_TWKILL_SLOTS) 319696ab2d3SArnaldo Carvalho de Melo slot = INET_TWDR_TWKILL_SLOTS - 1; 320696ab2d3SArnaldo Carvalho de Melo } 321696ab2d3SArnaldo Carvalho de Melo tw->tw_ttd = jiffies + timeo; 322696ab2d3SArnaldo Carvalho de Melo slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1); 323696ab2d3SArnaldo Carvalho de Melo list = &twdr->cells[slot]; 324696ab2d3SArnaldo Carvalho de Melo } else { 325696ab2d3SArnaldo Carvalho de Melo tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK); 326696ab2d3SArnaldo Carvalho de Melo 327696ab2d3SArnaldo Carvalho de Melo if (twdr->twcal_hand < 0) { 328696ab2d3SArnaldo Carvalho de Melo twdr->twcal_hand = 0; 329696ab2d3SArnaldo Carvalho de Melo twdr->twcal_jiffie = jiffies; 330696ab2d3SArnaldo Carvalho de Melo twdr->twcal_timer.expires = twdr->twcal_jiffie + 331696ab2d3SArnaldo Carvalho de Melo (slot << INET_TWDR_RECYCLE_TICK); 332696ab2d3SArnaldo Carvalho de Melo add_timer(&twdr->twcal_timer); 333696ab2d3SArnaldo Carvalho de Melo } else { 334696ab2d3SArnaldo Carvalho de Melo if (time_after(twdr->twcal_timer.expires, 335696ab2d3SArnaldo Carvalho de Melo jiffies + (slot << INET_TWDR_RECYCLE_TICK))) 336696ab2d3SArnaldo Carvalho de Melo mod_timer(&twdr->twcal_timer, 337696ab2d3SArnaldo Carvalho de Melo jiffies + (slot << INET_TWDR_RECYCLE_TICK)); 338696ab2d3SArnaldo Carvalho de Melo slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1); 339696ab2d3SArnaldo Carvalho de Melo } 340696ab2d3SArnaldo Carvalho de Melo list = &twdr->twcal_row[slot]; 341696ab2d3SArnaldo Carvalho de Melo } 342696ab2d3SArnaldo Carvalho de Melo 343696ab2d3SArnaldo Carvalho de Melo hlist_add_head(&tw->tw_death_node, list); 344696ab2d3SArnaldo Carvalho de Melo 345696ab2d3SArnaldo Carvalho de Melo if (twdr->tw_count++ == 0) 346696ab2d3SArnaldo Carvalho de Melo mod_timer(&twdr->tw_timer, jiffies + twdr->period); 347696ab2d3SArnaldo Carvalho de Melo spin_unlock(&twdr->death_lock); 348696ab2d3SArnaldo Carvalho de Melo } 349696ab2d3SArnaldo Carvalho de Melo 350696ab2d3SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_twsk_schedule); 351696ab2d3SArnaldo Carvalho de Melo 352696ab2d3SArnaldo Carvalho de Melo void inet_twdr_twcal_tick(unsigned long data) 353696ab2d3SArnaldo Carvalho de Melo { 354696ab2d3SArnaldo Carvalho de Melo struct inet_timewait_death_row *twdr; 355696ab2d3SArnaldo Carvalho de Melo int n, slot; 356696ab2d3SArnaldo Carvalho de Melo unsigned long j; 357696ab2d3SArnaldo Carvalho de Melo unsigned long now = jiffies; 358696ab2d3SArnaldo Carvalho de Melo int killed = 0; 359696ab2d3SArnaldo Carvalho de Melo int adv = 0; 360696ab2d3SArnaldo Carvalho de Melo 361696ab2d3SArnaldo Carvalho de Melo twdr = (struct inet_timewait_death_row *)data; 362696ab2d3SArnaldo Carvalho de Melo 363696ab2d3SArnaldo Carvalho de Melo spin_lock(&twdr->death_lock); 364696ab2d3SArnaldo Carvalho de Melo if (twdr->twcal_hand < 0) 365696ab2d3SArnaldo Carvalho de Melo goto out; 366696ab2d3SArnaldo Carvalho de Melo 367696ab2d3SArnaldo Carvalho de Melo slot = twdr->twcal_hand; 368696ab2d3SArnaldo Carvalho de Melo j = twdr->twcal_jiffie; 369696ab2d3SArnaldo Carvalho de Melo 370696ab2d3SArnaldo Carvalho de Melo for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) { 371696ab2d3SArnaldo Carvalho de Melo if (time_before_eq(j, now)) { 372696ab2d3SArnaldo Carvalho de Melo struct hlist_node *node, *safe; 373696ab2d3SArnaldo Carvalho de Melo struct inet_timewait_sock *tw; 374696ab2d3SArnaldo Carvalho de Melo 375696ab2d3SArnaldo Carvalho de Melo inet_twsk_for_each_inmate_safe(tw, node, safe, 376696ab2d3SArnaldo Carvalho de Melo &twdr->twcal_row[slot]) { 377696ab2d3SArnaldo Carvalho de Melo __inet_twsk_del_dead_node(tw); 378696ab2d3SArnaldo Carvalho de Melo __inet_twsk_kill(tw, twdr->hashinfo); 379*f2bf415cSPavel Emelyanov #ifdef CONFIG_NET_NS 380*f2bf415cSPavel Emelyanov NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED); 381*f2bf415cSPavel Emelyanov #endif 382696ab2d3SArnaldo Carvalho de Melo inet_twsk_put(tw); 383696ab2d3SArnaldo Carvalho de Melo killed++; 384696ab2d3SArnaldo Carvalho de Melo } 385696ab2d3SArnaldo Carvalho de Melo } else { 386696ab2d3SArnaldo Carvalho de Melo if (!adv) { 387696ab2d3SArnaldo Carvalho de Melo adv = 1; 388696ab2d3SArnaldo Carvalho de Melo twdr->twcal_jiffie = j; 389696ab2d3SArnaldo Carvalho de Melo twdr->twcal_hand = slot; 390696ab2d3SArnaldo Carvalho de Melo } 391696ab2d3SArnaldo Carvalho de Melo 392696ab2d3SArnaldo Carvalho de Melo if (!hlist_empty(&twdr->twcal_row[slot])) { 393696ab2d3SArnaldo Carvalho de Melo mod_timer(&twdr->twcal_timer, j); 394696ab2d3SArnaldo Carvalho de Melo goto out; 395696ab2d3SArnaldo Carvalho de Melo } 396696ab2d3SArnaldo Carvalho de Melo } 397696ab2d3SArnaldo Carvalho de Melo j += 1 << INET_TWDR_RECYCLE_TICK; 398696ab2d3SArnaldo Carvalho de Melo slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1); 399696ab2d3SArnaldo Carvalho de Melo } 400696ab2d3SArnaldo Carvalho de Melo twdr->twcal_hand = -1; 401696ab2d3SArnaldo Carvalho de Melo 402696ab2d3SArnaldo Carvalho de Melo out: 403696ab2d3SArnaldo Carvalho de Melo if ((twdr->tw_count -= killed) == 0) 404696ab2d3SArnaldo Carvalho de Melo del_timer(&twdr->tw_timer); 405*f2bf415cSPavel Emelyanov #ifndef CONFIG_NET_NS 406*f2bf415cSPavel Emelyanov NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITKILLED, killed); 407*f2bf415cSPavel Emelyanov #endif 408696ab2d3SArnaldo Carvalho de Melo spin_unlock(&twdr->death_lock); 409696ab2d3SArnaldo Carvalho de Melo } 410696ab2d3SArnaldo Carvalho de Melo 411696ab2d3SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); 412