xref: /linux/net/ipv4/tcp_timer.c (revision 20d0021394c1b070bf04b22c5bc8fdb437edd4c5)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Implementation of the Transmission Control Protocol(TCP).
7  *
8  * Version:	$Id: tcp_timer.c,v 1.88 2002/02/01 22:01:04 davem Exp $
9  *
10  * Authors:	Ross Biro
11  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *		Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *		Corey Minyard <wf-rch!minyard@relay.EU.net>
14  *		Florian La Roche, <flla@stud.uni-sb.de>
15  *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
16  *		Linus Torvalds, <torvalds@cs.helsinki.fi>
17  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *		Matthew Dillon, <dillon@apollo.west.oic.com>
19  *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
20  *		Jorge Cwik, <jorge@laser.satlink.net>
21  */
22 
23 #include <linux/module.h>
24 #include <net/tcp.h>
25 
26 int sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
27 int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
28 int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
29 int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
30 int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
31 int sysctl_tcp_retries1 = TCP_RETR1;
32 int sysctl_tcp_retries2 = TCP_RETR2;
33 int sysctl_tcp_orphan_retries;
34 
35 static void tcp_write_timer(unsigned long);
36 static void tcp_delack_timer(unsigned long);
37 static void tcp_keepalive_timer (unsigned long data);
38 
39 #ifdef TCP_DEBUG
40 const char tcp_timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n";
41 EXPORT_SYMBOL(tcp_timer_bug_msg);
42 #endif
43 
44 /*
45  * Using different timers for retransmit, delayed acks and probes
46  * We may wish use just one timer maintaining a list of expire jiffies
47  * to optimize.
48  */
49 
50 void tcp_init_xmit_timers(struct sock *sk)
51 {
52 	struct tcp_sock *tp = tcp_sk(sk);
53 
54 	init_timer(&tp->retransmit_timer);
55 	tp->retransmit_timer.function=&tcp_write_timer;
56 	tp->retransmit_timer.data = (unsigned long) sk;
57 	tp->pending = 0;
58 
59 	init_timer(&tp->delack_timer);
60 	tp->delack_timer.function=&tcp_delack_timer;
61 	tp->delack_timer.data = (unsigned long) sk;
62 	tp->ack.pending = 0;
63 
64 	init_timer(&sk->sk_timer);
65 	sk->sk_timer.function	= &tcp_keepalive_timer;
66 	sk->sk_timer.data	= (unsigned long)sk;
67 }
68 
69 void tcp_clear_xmit_timers(struct sock *sk)
70 {
71 	struct tcp_sock *tp = tcp_sk(sk);
72 
73 	tp->pending = 0;
74 	sk_stop_timer(sk, &tp->retransmit_timer);
75 
76 	tp->ack.pending = 0;
77 	tp->ack.blocked = 0;
78 	sk_stop_timer(sk, &tp->delack_timer);
79 
80 	sk_stop_timer(sk, &sk->sk_timer);
81 }
82 
83 static void tcp_write_err(struct sock *sk)
84 {
85 	sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
86 	sk->sk_error_report(sk);
87 
88 	tcp_done(sk);
89 	NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT);
90 }
91 
92 /* Do not allow orphaned sockets to eat all our resources.
93  * This is direct violation of TCP specs, but it is required
94  * to prevent DoS attacks. It is called when a retransmission timeout
95  * or zero probe timeout occurs on orphaned socket.
96  *
97  * Criterium is still not confirmed experimentally and may change.
98  * We kill the socket, if:
99  * 1. If number of orphaned sockets exceeds an administratively configured
100  *    limit.
101  * 2. If we have strong memory pressure.
102  */
103 static int tcp_out_of_resources(struct sock *sk, int do_reset)
104 {
105 	struct tcp_sock *tp = tcp_sk(sk);
106 	int orphans = atomic_read(&tcp_orphan_count);
107 
108 	/* If peer does not open window for long time, or did not transmit
109 	 * anything for long time, penalize it. */
110 	if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
111 		orphans <<= 1;
112 
113 	/* If some dubious ICMP arrived, penalize even more. */
114 	if (sk->sk_err_soft)
115 		orphans <<= 1;
116 
117 	if (orphans >= sysctl_tcp_max_orphans ||
118 	    (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
119 	     atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
120 		if (net_ratelimit())
121 			printk(KERN_INFO "Out of socket memory\n");
122 
123 		/* Catch exceptional cases, when connection requires reset.
124 		 *      1. Last segment was sent recently. */
125 		if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
126 		    /*  2. Window is closed. */
127 		    (!tp->snd_wnd && !tp->packets_out))
128 			do_reset = 1;
129 		if (do_reset)
130 			tcp_send_active_reset(sk, GFP_ATOMIC);
131 		tcp_done(sk);
132 		NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
133 		return 1;
134 	}
135 	return 0;
136 }
137 
138 /* Calculate maximal number or retries on an orphaned socket. */
139 static int tcp_orphan_retries(struct sock *sk, int alive)
140 {
141 	int retries = sysctl_tcp_orphan_retries; /* May be zero. */
142 
143 	/* We know from an ICMP that something is wrong. */
144 	if (sk->sk_err_soft && !alive)
145 		retries = 0;
146 
147 	/* However, if socket sent something recently, select some safe
148 	 * number of retries. 8 corresponds to >100 seconds with minimal
149 	 * RTO of 200msec. */
150 	if (retries == 0 && alive)
151 		retries = 8;
152 	return retries;
153 }
154 
155 /* A write timeout has occurred. Process the after effects. */
156 static int tcp_write_timeout(struct sock *sk)
157 {
158 	struct tcp_sock *tp = tcp_sk(sk);
159 	int retry_until;
160 
161 	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
162 		if (tp->retransmits)
163 			dst_negative_advice(&sk->sk_dst_cache);
164 		retry_until = tp->syn_retries ? : sysctl_tcp_syn_retries;
165 	} else {
166 		if (tp->retransmits >= sysctl_tcp_retries1) {
167 			/* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black
168 			   hole detection. :-(
169 
170 			   It is place to make it. It is not made. I do not want
171 			   to make it. It is disguisting. It does not work in any
172 			   case. Let me to cite the same draft, which requires for
173 			   us to implement this:
174 
175    "The one security concern raised by this memo is that ICMP black holes
176    are often caused by over-zealous security administrators who block
177    all ICMP messages.  It is vitally important that those who design and
178    deploy security systems understand the impact of strict filtering on
179    upper-layer protocols.  The safest web site in the world is worthless
180    if most TCP implementations cannot transfer data from it.  It would
181    be far nicer to have all of the black holes fixed rather than fixing
182    all of the TCP implementations."
183 
184                            Golden words :-).
185 		   */
186 
187 			dst_negative_advice(&sk->sk_dst_cache);
188 		}
189 
190 		retry_until = sysctl_tcp_retries2;
191 		if (sock_flag(sk, SOCK_DEAD)) {
192 			int alive = (tp->rto < TCP_RTO_MAX);
193 
194 			retry_until = tcp_orphan_retries(sk, alive);
195 
196 			if (tcp_out_of_resources(sk, alive || tp->retransmits < retry_until))
197 				return 1;
198 		}
199 	}
200 
201 	if (tp->retransmits >= retry_until) {
202 		/* Has it gone just too far? */
203 		tcp_write_err(sk);
204 		return 1;
205 	}
206 	return 0;
207 }
208 
209 static void tcp_delack_timer(unsigned long data)
210 {
211 	struct sock *sk = (struct sock*)data;
212 	struct tcp_sock *tp = tcp_sk(sk);
213 
214 	bh_lock_sock(sk);
215 	if (sock_owned_by_user(sk)) {
216 		/* Try again later. */
217 		tp->ack.blocked = 1;
218 		NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
219 		sk_reset_timer(sk, &tp->delack_timer, jiffies + TCP_DELACK_MIN);
220 		goto out_unlock;
221 	}
222 
223 	sk_stream_mem_reclaim(sk);
224 
225 	if (sk->sk_state == TCP_CLOSE || !(tp->ack.pending & TCP_ACK_TIMER))
226 		goto out;
227 
228 	if (time_after(tp->ack.timeout, jiffies)) {
229 		sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout);
230 		goto out;
231 	}
232 	tp->ack.pending &= ~TCP_ACK_TIMER;
233 
234 	if (!skb_queue_empty(&tp->ucopy.prequeue)) {
235 		struct sk_buff *skb;
236 
237 		NET_INC_STATS_BH(LINUX_MIB_TCPSCHEDULERFAILED);
238 
239 		while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
240 			sk->sk_backlog_rcv(sk, skb);
241 
242 		tp->ucopy.memory = 0;
243 	}
244 
245 	if (tcp_ack_scheduled(tp)) {
246 		if (!tp->ack.pingpong) {
247 			/* Delayed ACK missed: inflate ATO. */
248 			tp->ack.ato = min(tp->ack.ato << 1, tp->rto);
249 		} else {
250 			/* Delayed ACK missed: leave pingpong mode and
251 			 * deflate ATO.
252 			 */
253 			tp->ack.pingpong = 0;
254 			tp->ack.ato = TCP_ATO_MIN;
255 		}
256 		tcp_send_ack(sk);
257 		NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
258 	}
259 	TCP_CHECK_TIMER(sk);
260 
261 out:
262 	if (tcp_memory_pressure)
263 		sk_stream_mem_reclaim(sk);
264 out_unlock:
265 	bh_unlock_sock(sk);
266 	sock_put(sk);
267 }
268 
269 static void tcp_probe_timer(struct sock *sk)
270 {
271 	struct tcp_sock *tp = tcp_sk(sk);
272 	int max_probes;
273 
274 	if (tp->packets_out || !sk->sk_send_head) {
275 		tp->probes_out = 0;
276 		return;
277 	}
278 
279 	/* *WARNING* RFC 1122 forbids this
280 	 *
281 	 * It doesn't AFAIK, because we kill the retransmit timer -AK
282 	 *
283 	 * FIXME: We ought not to do it, Solaris 2.5 actually has fixing
284 	 * this behaviour in Solaris down as a bug fix. [AC]
285 	 *
286 	 * Let me to explain. probes_out is zeroed by incoming ACKs
287 	 * even if they advertise zero window. Hence, connection is killed only
288 	 * if we received no ACKs for normal connection timeout. It is not killed
289 	 * only because window stays zero for some time, window may be zero
290 	 * until armageddon and even later. We are in full accordance
291 	 * with RFCs, only probe timer combines both retransmission timeout
292 	 * and probe timeout in one bottle.				--ANK
293 	 */
294 	max_probes = sysctl_tcp_retries2;
295 
296 	if (sock_flag(sk, SOCK_DEAD)) {
297 		int alive = ((tp->rto<<tp->backoff) < TCP_RTO_MAX);
298 
299 		max_probes = tcp_orphan_retries(sk, alive);
300 
301 		if (tcp_out_of_resources(sk, alive || tp->probes_out <= max_probes))
302 			return;
303 	}
304 
305 	if (tp->probes_out > max_probes) {
306 		tcp_write_err(sk);
307 	} else {
308 		/* Only send another probe if we didn't close things up. */
309 		tcp_send_probe0(sk);
310 	}
311 }
312 
313 /*
314  *	The TCP retransmit timer.
315  */
316 
317 static void tcp_retransmit_timer(struct sock *sk)
318 {
319 	struct tcp_sock *tp = tcp_sk(sk);
320 
321 	if (!tp->packets_out)
322 		goto out;
323 
324 	BUG_TRAP(!skb_queue_empty(&sk->sk_write_queue));
325 
326 	if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
327 	    !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
328 		/* Receiver dastardly shrinks window. Our retransmits
329 		 * become zero probes, but we should not timeout this
330 		 * connection. If the socket is an orphan, time it out,
331 		 * we cannot allow such beasts to hang infinitely.
332 		 */
333 #ifdef TCP_DEBUG
334 		if (net_ratelimit()) {
335 			struct inet_sock *inet = inet_sk(sk);
336 			printk(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n",
337 			       NIPQUAD(inet->daddr), htons(inet->dport),
338 			       inet->num, tp->snd_una, tp->snd_nxt);
339 		}
340 #endif
341 		if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
342 			tcp_write_err(sk);
343 			goto out;
344 		}
345 		tcp_enter_loss(sk, 0);
346 		tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));
347 		__sk_dst_reset(sk);
348 		goto out_reset_timer;
349 	}
350 
351 	if (tcp_write_timeout(sk))
352 		goto out;
353 
354 	if (tp->retransmits == 0) {
355 		if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) {
356 			if (tp->rx_opt.sack_ok) {
357 				if (tp->ca_state == TCP_CA_Recovery)
358 					NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL);
359 				else
360 					NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES);
361 			} else {
362 				if (tp->ca_state == TCP_CA_Recovery)
363 					NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL);
364 				else
365 					NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES);
366 			}
367 		} else if (tp->ca_state == TCP_CA_Loss) {
368 			NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES);
369 		} else {
370 			NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS);
371 		}
372 	}
373 
374 	if (tcp_use_frto(sk)) {
375 		tcp_enter_frto(sk);
376 	} else {
377 		tcp_enter_loss(sk, 0);
378 	}
379 
380 	if (tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)) > 0) {
381 		/* Retransmission failed because of local congestion,
382 		 * do not backoff.
383 		 */
384 		if (!tp->retransmits)
385 			tp->retransmits=1;
386 		tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS,
387 				     min(tp->rto, TCP_RESOURCE_PROBE_INTERVAL));
388 		goto out;
389 	}
390 
391 	/* Increase the timeout each time we retransmit.  Note that
392 	 * we do not increase the rtt estimate.  rto is initialized
393 	 * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
394 	 * that doubling rto each time is the least we can get away with.
395 	 * In KA9Q, Karn uses this for the first few times, and then
396 	 * goes to quadratic.  netBSD doubles, but only goes up to *64,
397 	 * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
398 	 * defined in the protocol as the maximum possible RTT.  I guess
399 	 * we'll have to use something other than TCP to talk to the
400 	 * University of Mars.
401 	 *
402 	 * PAWS allows us longer timeouts and large windows, so once
403 	 * implemented ftp to mars will work nicely. We will have to fix
404 	 * the 120 second clamps though!
405 	 */
406 	tp->backoff++;
407 	tp->retransmits++;
408 
409 out_reset_timer:
410 	tp->rto = min(tp->rto << 1, TCP_RTO_MAX);
411 	tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
412 	if (tp->retransmits > sysctl_tcp_retries1)
413 		__sk_dst_reset(sk);
414 
415 out:;
416 }
417 
418 static void tcp_write_timer(unsigned long data)
419 {
420 	struct sock *sk = (struct sock*)data;
421 	struct tcp_sock *tp = tcp_sk(sk);
422 	int event;
423 
424 	bh_lock_sock(sk);
425 	if (sock_owned_by_user(sk)) {
426 		/* Try again later */
427 		sk_reset_timer(sk, &tp->retransmit_timer, jiffies + (HZ / 20));
428 		goto out_unlock;
429 	}
430 
431 	if (sk->sk_state == TCP_CLOSE || !tp->pending)
432 		goto out;
433 
434 	if (time_after(tp->timeout, jiffies)) {
435 		sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout);
436 		goto out;
437 	}
438 
439 	event = tp->pending;
440 	tp->pending = 0;
441 
442 	switch (event) {
443 	case TCP_TIME_RETRANS:
444 		tcp_retransmit_timer(sk);
445 		break;
446 	case TCP_TIME_PROBE0:
447 		tcp_probe_timer(sk);
448 		break;
449 	}
450 	TCP_CHECK_TIMER(sk);
451 
452 out:
453 	sk_stream_mem_reclaim(sk);
454 out_unlock:
455 	bh_unlock_sock(sk);
456 	sock_put(sk);
457 }
458 
459 /*
460  *	Timer for listening sockets
461  */
462 
463 static void tcp_synack_timer(struct sock *sk)
464 {
465 	struct tcp_sock *tp = tcp_sk(sk);
466 	struct listen_sock *lopt = tp->accept_queue.listen_opt;
467 	int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries;
468 	int thresh = max_retries;
469 	unsigned long now = jiffies;
470 	struct request_sock **reqp, *req;
471 	int i, budget;
472 
473 	if (lopt == NULL || lopt->qlen == 0)
474 		return;
475 
476 	/* Normally all the openreqs are young and become mature
477 	 * (i.e. converted to established socket) for first timeout.
478 	 * If synack was not acknowledged for 3 seconds, it means
479 	 * one of the following things: synack was lost, ack was lost,
480 	 * rtt is high or nobody planned to ack (i.e. synflood).
481 	 * When server is a bit loaded, queue is populated with old
482 	 * open requests, reducing effective size of queue.
483 	 * When server is well loaded, queue size reduces to zero
484 	 * after several minutes of work. It is not synflood,
485 	 * it is normal operation. The solution is pruning
486 	 * too old entries overriding normal timeout, when
487 	 * situation becomes dangerous.
488 	 *
489 	 * Essentially, we reserve half of room for young
490 	 * embrions; and abort old ones without pity, if old
491 	 * ones are about to clog our table.
492 	 */
493 	if (lopt->qlen>>(lopt->max_qlen_log-1)) {
494 		int young = (lopt->qlen_young<<1);
495 
496 		while (thresh > 2) {
497 			if (lopt->qlen < young)
498 				break;
499 			thresh--;
500 			young <<= 1;
501 		}
502 	}
503 
504 	if (tp->defer_accept)
505 		max_retries = tp->defer_accept;
506 
507 	budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL));
508 	i = lopt->clock_hand;
509 
510 	do {
511 		reqp=&lopt->syn_table[i];
512 		while ((req = *reqp) != NULL) {
513 			if (time_after_eq(now, req->expires)) {
514 				if ((req->retrans < thresh ||
515 				     (inet_rsk(req)->acked && req->retrans < max_retries))
516 				    && !req->rsk_ops->rtx_syn_ack(sk, req, NULL)) {
517 					unsigned long timeo;
518 
519 					if (req->retrans++ == 0)
520 						lopt->qlen_young--;
521 					timeo = min((TCP_TIMEOUT_INIT << req->retrans),
522 						    TCP_RTO_MAX);
523 					req->expires = now + timeo;
524 					reqp = &req->dl_next;
525 					continue;
526 				}
527 
528 				/* Drop this request */
529 				tcp_synq_unlink(tp, req, reqp);
530 				reqsk_queue_removed(&tp->accept_queue, req);
531 				reqsk_free(req);
532 				continue;
533 			}
534 			reqp = &req->dl_next;
535 		}
536 
537 		i = (i+1)&(TCP_SYNQ_HSIZE-1);
538 
539 	} while (--budget > 0);
540 
541 	lopt->clock_hand = i;
542 
543 	if (lopt->qlen)
544 		tcp_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL);
545 }
546 
547 void tcp_delete_keepalive_timer (struct sock *sk)
548 {
549 	sk_stop_timer(sk, &sk->sk_timer);
550 }
551 
552 void tcp_reset_keepalive_timer (struct sock *sk, unsigned long len)
553 {
554 	sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
555 }
556 
557 void tcp_set_keepalive(struct sock *sk, int val)
558 {
559 	if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
560 		return;
561 
562 	if (val && !sock_flag(sk, SOCK_KEEPOPEN))
563 		tcp_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
564 	else if (!val)
565 		tcp_delete_keepalive_timer(sk);
566 }
567 
568 
569 static void tcp_keepalive_timer (unsigned long data)
570 {
571 	struct sock *sk = (struct sock *) data;
572 	struct tcp_sock *tp = tcp_sk(sk);
573 	__u32 elapsed;
574 
575 	/* Only process if socket is not in use. */
576 	bh_lock_sock(sk);
577 	if (sock_owned_by_user(sk)) {
578 		/* Try again later. */
579 		tcp_reset_keepalive_timer (sk, HZ/20);
580 		goto out;
581 	}
582 
583 	if (sk->sk_state == TCP_LISTEN) {
584 		tcp_synack_timer(sk);
585 		goto out;
586 	}
587 
588 	if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
589 		if (tp->linger2 >= 0) {
590 			int tmo = tcp_fin_time(tp) - TCP_TIMEWAIT_LEN;
591 
592 			if (tmo > 0) {
593 				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
594 				goto out;
595 			}
596 		}
597 		tcp_send_active_reset(sk, GFP_ATOMIC);
598 		goto death;
599 	}
600 
601 	if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
602 		goto out;
603 
604 	elapsed = keepalive_time_when(tp);
605 
606 	/* It is alive without keepalive 8) */
607 	if (tp->packets_out || sk->sk_send_head)
608 		goto resched;
609 
610 	elapsed = tcp_time_stamp - tp->rcv_tstamp;
611 
612 	if (elapsed >= keepalive_time_when(tp)) {
613 		if ((!tp->keepalive_probes && tp->probes_out >= sysctl_tcp_keepalive_probes) ||
614 		     (tp->keepalive_probes && tp->probes_out >= tp->keepalive_probes)) {
615 			tcp_send_active_reset(sk, GFP_ATOMIC);
616 			tcp_write_err(sk);
617 			goto out;
618 		}
619 		if (tcp_write_wakeup(sk) <= 0) {
620 			tp->probes_out++;
621 			elapsed = keepalive_intvl_when(tp);
622 		} else {
623 			/* If keepalive was lost due to local congestion,
624 			 * try harder.
625 			 */
626 			elapsed = TCP_RESOURCE_PROBE_INTERVAL;
627 		}
628 	} else {
629 		/* It is tp->rcv_tstamp + keepalive_time_when(tp) */
630 		elapsed = keepalive_time_when(tp) - elapsed;
631 	}
632 
633 	TCP_CHECK_TIMER(sk);
634 	sk_stream_mem_reclaim(sk);
635 
636 resched:
637 	tcp_reset_keepalive_timer (sk, elapsed);
638 	goto out;
639 
640 death:
641 	tcp_done(sk);
642 
643 out:
644 	bh_unlock_sock(sk);
645 	sock_put(sk);
646 }
647 
648 EXPORT_SYMBOL(tcp_clear_xmit_timers);
649 EXPORT_SYMBOL(tcp_delete_keepalive_timer);
650 EXPORT_SYMBOL(tcp_init_xmit_timers);
651 EXPORT_SYMBOL(tcp_reset_keepalive_timer);
652