xref: /linux/net/ipv4/tcp_ipv4.c (revision 6e8331ac6973435b1e7604c30f2ad394035b46e1)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Implementation of the Transmission Control Protocol(TCP).
7  *
8  * Version:	$Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
9  *
10  *		IPv4 specific functions
11  *
12  *
13  *		code split from:
14  *		linux/ipv4/tcp.c
15  *		linux/ipv4/tcp_input.c
16  *		linux/ipv4/tcp_output.c
17  *
18  *		See tcp.c for author information
19  *
20  *	This program is free software; you can redistribute it and/or
21  *      modify it under the terms of the GNU General Public License
22  *      as published by the Free Software Foundation; either version
23  *      2 of the License, or (at your option) any later version.
24  */
25 
26 /*
27  * Changes:
28  *		David S. Miller	:	New socket lookup architecture.
29  *					This code is dedicated to John Dyson.
30  *		David S. Miller :	Change semantics of established hash,
31  *					half is devoted to TIME_WAIT sockets
32  *					and the rest go in the other half.
33  *		Andi Kleen :		Add support for syncookies and fixed
34  *					some bugs: ip options weren't passed to
35  *					the TCP layer, missed a check for an
36  *					ACK bit.
37  *		Andi Kleen :		Implemented fast path mtu discovery.
38  *	     				Fixed many serious bugs in the
39  *					request_sock handling and moved
40  *					most of it into the af independent code.
41  *					Added tail drop and some other bugfixes.
42  *					Added new listen semantics.
43  *		Mike McLagan	:	Routing by source
44  *	Juan Jose Ciarlante:		ip_dynaddr bits
45  *		Andi Kleen:		various fixes.
46  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
47  *					coma.
48  *	Andi Kleen		:	Fix new listen.
49  *	Andi Kleen		:	Fix accept error reporting.
50  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
51  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
52  *					a single port at the same time.
53  */
54 
55 
56 #include <linux/types.h>
57 #include <linux/fcntl.h>
58 #include <linux/module.h>
59 #include <linux/random.h>
60 #include <linux/cache.h>
61 #include <linux/jhash.h>
62 #include <linux/init.h>
63 #include <linux/times.h>
64 
65 #include <net/icmp.h>
66 #include <net/inet_hashtables.h>
67 #include <net/tcp.h>
68 #include <net/transp_v6.h>
69 #include <net/ipv6.h>
70 #include <net/inet_common.h>
71 #include <net/timewait_sock.h>
72 #include <net/xfrm.h>
73 #include <net/netdma.h>
74 
75 #include <linux/inet.h>
76 #include <linux/ipv6.h>
77 #include <linux/stddef.h>
78 #include <linux/proc_fs.h>
79 #include <linux/seq_file.h>
80 
81 int sysctl_tcp_tw_reuse;
82 int sysctl_tcp_low_latency;
83 
84 /* Check TCP sequence numbers in ICMP packets. */
85 #define ICMP_MIN_LENGTH 8
86 
87 /* Socket used for sending RSTs */
88 static struct socket *tcp_socket;
89 
90 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
91 
92 struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
93 	.lhash_lock	= __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
94 	.lhash_users	= ATOMIC_INIT(0),
95 	.lhash_wait	= __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
96 };
97 
98 static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
99 {
100 	return inet_csk_get_port(&tcp_hashinfo, sk, snum,
101 				 inet_csk_bind_conflict);
102 }
103 
104 static void tcp_v4_hash(struct sock *sk)
105 {
106 	inet_hash(&tcp_hashinfo, sk);
107 }
108 
109 void tcp_unhash(struct sock *sk)
110 {
111 	inet_unhash(&tcp_hashinfo, sk);
112 }
113 
114 static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
115 {
116 	return secure_tcp_sequence_number(skb->nh.iph->daddr,
117 					  skb->nh.iph->saddr,
118 					  skb->h.th->dest,
119 					  skb->h.th->source);
120 }
121 
122 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
123 {
124 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
125 	struct tcp_sock *tp = tcp_sk(sk);
126 
127 	/* With PAWS, it is safe from the viewpoint
128 	   of data integrity. Even without PAWS it is safe provided sequence
129 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
130 
131 	   Actually, the idea is close to VJ's one, only timestamp cache is
132 	   held not per host, but per port pair and TW bucket is used as state
133 	   holder.
134 
135 	   If TW bucket has been already destroyed we fall back to VJ's scheme
136 	   and use initial timestamp retrieved from peer table.
137 	 */
138 	if (tcptw->tw_ts_recent_stamp &&
139 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
140 			     xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
141 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
142 		if (tp->write_seq == 0)
143 			tp->write_seq = 1;
144 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
145 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
146 		sock_hold(sktw);
147 		return 1;
148 	}
149 
150 	return 0;
151 }
152 
153 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
154 
155 /* This will initiate an outgoing connection. */
156 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
157 {
158 	struct inet_sock *inet = inet_sk(sk);
159 	struct tcp_sock *tp = tcp_sk(sk);
160 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
161 	struct rtable *rt;
162 	u32 daddr, nexthop;
163 	int tmp;
164 	int err;
165 
166 	if (addr_len < sizeof(struct sockaddr_in))
167 		return -EINVAL;
168 
169 	if (usin->sin_family != AF_INET)
170 		return -EAFNOSUPPORT;
171 
172 	nexthop = daddr = usin->sin_addr.s_addr;
173 	if (inet->opt && inet->opt->srr) {
174 		if (!daddr)
175 			return -EINVAL;
176 		nexthop = inet->opt->faddr;
177 	}
178 
179 	tmp = ip_route_connect(&rt, nexthop, inet->saddr,
180 			       RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
181 			       IPPROTO_TCP,
182 			       inet->sport, usin->sin_port, sk);
183 	if (tmp < 0)
184 		return tmp;
185 
186 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
187 		ip_rt_put(rt);
188 		return -ENETUNREACH;
189 	}
190 
191 	if (!inet->opt || !inet->opt->srr)
192 		daddr = rt->rt_dst;
193 
194 	if (!inet->saddr)
195 		inet->saddr = rt->rt_src;
196 	inet->rcv_saddr = inet->saddr;
197 
198 	if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
199 		/* Reset inherited state */
200 		tp->rx_opt.ts_recent	   = 0;
201 		tp->rx_opt.ts_recent_stamp = 0;
202 		tp->write_seq		   = 0;
203 	}
204 
205 	if (tcp_death_row.sysctl_tw_recycle &&
206 	    !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
207 		struct inet_peer *peer = rt_get_peer(rt);
208 
209 		/* VJ's idea. We save last timestamp seen from
210 		 * the destination in peer table, when entering state TIME-WAIT
211 		 * and initialize rx_opt.ts_recent from it, when trying new connection.
212 		 */
213 
214 		if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
215 			tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
216 			tp->rx_opt.ts_recent = peer->tcp_ts;
217 		}
218 	}
219 
220 	inet->dport = usin->sin_port;
221 	inet->daddr = daddr;
222 
223 	inet_csk(sk)->icsk_ext_hdr_len = 0;
224 	if (inet->opt)
225 		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
226 
227 	tp->rx_opt.mss_clamp = 536;
228 
229 	/* Socket identity is still unknown (sport may be zero).
230 	 * However we set state to SYN-SENT and not releasing socket
231 	 * lock select source port, enter ourselves into the hash tables and
232 	 * complete initialization after this.
233 	 */
234 	tcp_set_state(sk, TCP_SYN_SENT);
235 	err = inet_hash_connect(&tcp_death_row, sk);
236 	if (err)
237 		goto failure;
238 
239 	err = ip_route_newports(&rt, IPPROTO_TCP, inet->sport, inet->dport, sk);
240 	if (err)
241 		goto failure;
242 
243 	/* OK, now commit destination to socket.  */
244 	sk->sk_gso_type = SKB_GSO_TCPV4;
245 	sk_setup_caps(sk, &rt->u.dst);
246 
247 	if (!tp->write_seq)
248 		tp->write_seq = secure_tcp_sequence_number(inet->saddr,
249 							   inet->daddr,
250 							   inet->sport,
251 							   usin->sin_port);
252 
253 	inet->id = tp->write_seq ^ jiffies;
254 
255 	err = tcp_connect(sk);
256 	rt = NULL;
257 	if (err)
258 		goto failure;
259 
260 	return 0;
261 
262 failure:
263 	/* This unhashes the socket and releases the local port, if necessary. */
264 	tcp_set_state(sk, TCP_CLOSE);
265 	ip_rt_put(rt);
266 	sk->sk_route_caps = 0;
267 	inet->dport = 0;
268 	return err;
269 }
270 
271 /*
272  * This routine does path mtu discovery as defined in RFC1191.
273  */
274 static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
275 {
276 	struct dst_entry *dst;
277 	struct inet_sock *inet = inet_sk(sk);
278 
279 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
280 	 * send out by Linux are always <576bytes so they should go through
281 	 * unfragmented).
282 	 */
283 	if (sk->sk_state == TCP_LISTEN)
284 		return;
285 
286 	/* We don't check in the destentry if pmtu discovery is forbidden
287 	 * on this route. We just assume that no packet_to_big packets
288 	 * are send back when pmtu discovery is not active.
289      	 * There is a small race when the user changes this flag in the
290 	 * route, but I think that's acceptable.
291 	 */
292 	if ((dst = __sk_dst_check(sk, 0)) == NULL)
293 		return;
294 
295 	dst->ops->update_pmtu(dst, mtu);
296 
297 	/* Something is about to be wrong... Remember soft error
298 	 * for the case, if this connection will not able to recover.
299 	 */
300 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
301 		sk->sk_err_soft = EMSGSIZE;
302 
303 	mtu = dst_mtu(dst);
304 
305 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
306 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
307 		tcp_sync_mss(sk, mtu);
308 
309 		/* Resend the TCP packet because it's
310 		 * clear that the old packet has been
311 		 * dropped. This is the new "fast" path mtu
312 		 * discovery.
313 		 */
314 		tcp_simple_retransmit(sk);
315 	} /* else let the usual retransmit timer handle it */
316 }
317 
318 /*
319  * This routine is called by the ICMP module when it gets some
320  * sort of error condition.  If err < 0 then the socket should
321  * be closed and the error returned to the user.  If err > 0
322  * it's just the icmp type << 8 | icmp code.  After adjustment
323  * header points to the first 8 bytes of the tcp header.  We need
324  * to find the appropriate port.
325  *
326  * The locking strategy used here is very "optimistic". When
327  * someone else accesses the socket the ICMP is just dropped
328  * and for some paths there is no check at all.
329  * A more general error queue to queue errors for later handling
330  * is probably better.
331  *
332  */
333 
334 void tcp_v4_err(struct sk_buff *skb, u32 info)
335 {
336 	struct iphdr *iph = (struct iphdr *)skb->data;
337 	struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
338 	struct tcp_sock *tp;
339 	struct inet_sock *inet;
340 	int type = skb->h.icmph->type;
341 	int code = skb->h.icmph->code;
342 	struct sock *sk;
343 	__u32 seq;
344 	int err;
345 
346 	if (skb->len < (iph->ihl << 2) + 8) {
347 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
348 		return;
349 	}
350 
351 	sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
352 			 th->source, inet_iif(skb));
353 	if (!sk) {
354 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
355 		return;
356 	}
357 	if (sk->sk_state == TCP_TIME_WAIT) {
358 		inet_twsk_put((struct inet_timewait_sock *)sk);
359 		return;
360 	}
361 
362 	bh_lock_sock(sk);
363 	/* If too many ICMPs get dropped on busy
364 	 * servers this needs to be solved differently.
365 	 */
366 	if (sock_owned_by_user(sk))
367 		NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
368 
369 	if (sk->sk_state == TCP_CLOSE)
370 		goto out;
371 
372 	tp = tcp_sk(sk);
373 	seq = ntohl(th->seq);
374 	if (sk->sk_state != TCP_LISTEN &&
375 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
376 		NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS);
377 		goto out;
378 	}
379 
380 	switch (type) {
381 	case ICMP_SOURCE_QUENCH:
382 		/* Just silently ignore these. */
383 		goto out;
384 	case ICMP_PARAMETERPROB:
385 		err = EPROTO;
386 		break;
387 	case ICMP_DEST_UNREACH:
388 		if (code > NR_ICMP_UNREACH)
389 			goto out;
390 
391 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
392 			if (!sock_owned_by_user(sk))
393 				do_pmtu_discovery(sk, iph, info);
394 			goto out;
395 		}
396 
397 		err = icmp_err_convert[code].errno;
398 		break;
399 	case ICMP_TIME_EXCEEDED:
400 		err = EHOSTUNREACH;
401 		break;
402 	default:
403 		goto out;
404 	}
405 
406 	switch (sk->sk_state) {
407 		struct request_sock *req, **prev;
408 	case TCP_LISTEN:
409 		if (sock_owned_by_user(sk))
410 			goto out;
411 
412 		req = inet_csk_search_req(sk, &prev, th->dest,
413 					  iph->daddr, iph->saddr);
414 		if (!req)
415 			goto out;
416 
417 		/* ICMPs are not backlogged, hence we cannot get
418 		   an established socket here.
419 		 */
420 		BUG_TRAP(!req->sk);
421 
422 		if (seq != tcp_rsk(req)->snt_isn) {
423 			NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
424 			goto out;
425 		}
426 
427 		/*
428 		 * Still in SYN_RECV, just remove it silently.
429 		 * There is no good way to pass the error to the newly
430 		 * created socket, and POSIX does not want network
431 		 * errors returned from accept().
432 		 */
433 		inet_csk_reqsk_queue_drop(sk, req, prev);
434 		goto out;
435 
436 	case TCP_SYN_SENT:
437 	case TCP_SYN_RECV:  /* Cannot happen.
438 			       It can f.e. if SYNs crossed.
439 			     */
440 		if (!sock_owned_by_user(sk)) {
441 			sk->sk_err = err;
442 
443 			sk->sk_error_report(sk);
444 
445 			tcp_done(sk);
446 		} else {
447 			sk->sk_err_soft = err;
448 		}
449 		goto out;
450 	}
451 
452 	/* If we've already connected we will keep trying
453 	 * until we time out, or the user gives up.
454 	 *
455 	 * rfc1122 4.2.3.9 allows to consider as hard errors
456 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
457 	 * but it is obsoleted by pmtu discovery).
458 	 *
459 	 * Note, that in modern internet, where routing is unreliable
460 	 * and in each dark corner broken firewalls sit, sending random
461 	 * errors ordered by their masters even this two messages finally lose
462 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
463 	 *
464 	 * Now we are in compliance with RFCs.
465 	 *							--ANK (980905)
466 	 */
467 
468 	inet = inet_sk(sk);
469 	if (!sock_owned_by_user(sk) && inet->recverr) {
470 		sk->sk_err = err;
471 		sk->sk_error_report(sk);
472 	} else	{ /* Only an error on timeout */
473 		sk->sk_err_soft = err;
474 	}
475 
476 out:
477 	bh_unlock_sock(sk);
478 	sock_put(sk);
479 }
480 
481 /* This routine computes an IPv4 TCP checksum. */
482 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
483 {
484 	struct inet_sock *inet = inet_sk(sk);
485 	struct tcphdr *th = skb->h.th;
486 
487 	if (skb->ip_summed == CHECKSUM_HW) {
488 		th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
489 		skb->csum = offsetof(struct tcphdr, check);
490 	} else {
491 		th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr,
492 					 csum_partial((char *)th,
493 						      th->doff << 2,
494 						      skb->csum));
495 	}
496 }
497 
498 int tcp_v4_gso_send_check(struct sk_buff *skb)
499 {
500 	struct iphdr *iph;
501 	struct tcphdr *th;
502 
503 	if (!pskb_may_pull(skb, sizeof(*th)))
504 		return -EINVAL;
505 
506 	iph = skb->nh.iph;
507 	th = skb->h.th;
508 
509 	th->check = 0;
510 	th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
511 	skb->csum = offsetof(struct tcphdr, check);
512 	skb->ip_summed = CHECKSUM_HW;
513 	return 0;
514 }
515 
516 /*
517  *	This routine will send an RST to the other tcp.
518  *
519  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
520  *		      for reset.
521  *	Answer: if a packet caused RST, it is not for a socket
522  *		existing in our system, if it is matched to a socket,
523  *		it is just duplicate segment or bug in other side's TCP.
524  *		So that we build reply only basing on parameters
525  *		arrived with segment.
526  *	Exception: precedence violation. We do not implement it in any case.
527  */
528 
529 static void tcp_v4_send_reset(struct sk_buff *skb)
530 {
531 	struct tcphdr *th = skb->h.th;
532 	struct tcphdr rth;
533 	struct ip_reply_arg arg;
534 
535 	/* Never send a reset in response to a reset. */
536 	if (th->rst)
537 		return;
538 
539 	if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
540 		return;
541 
542 	/* Swap the send and the receive. */
543 	memset(&rth, 0, sizeof(struct tcphdr));
544 	rth.dest   = th->source;
545 	rth.source = th->dest;
546 	rth.doff   = sizeof(struct tcphdr) / 4;
547 	rth.rst    = 1;
548 
549 	if (th->ack) {
550 		rth.seq = th->ack_seq;
551 	} else {
552 		rth.ack = 1;
553 		rth.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
554 				    skb->len - (th->doff << 2));
555 	}
556 
557 	memset(&arg, 0, sizeof arg);
558 	arg.iov[0].iov_base = (unsigned char *)&rth;
559 	arg.iov[0].iov_len  = sizeof rth;
560 	arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
561 				      skb->nh.iph->saddr, /*XXX*/
562 				      sizeof(struct tcphdr), IPPROTO_TCP, 0);
563 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
564 
565 	ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);
566 
567 	TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
568 	TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
569 }
570 
571 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
572    outside socket context is ugly, certainly. What can I do?
573  */
574 
575 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
576 			    u32 win, u32 ts)
577 {
578 	struct tcphdr *th = skb->h.th;
579 	struct {
580 		struct tcphdr th;
581 		u32 tsopt[3];
582 	} rep;
583 	struct ip_reply_arg arg;
584 
585 	memset(&rep.th, 0, sizeof(struct tcphdr));
586 	memset(&arg, 0, sizeof arg);
587 
588 	arg.iov[0].iov_base = (unsigned char *)&rep;
589 	arg.iov[0].iov_len  = sizeof(rep.th);
590 	if (ts) {
591 		rep.tsopt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
592 				     (TCPOPT_TIMESTAMP << 8) |
593 				     TCPOLEN_TIMESTAMP);
594 		rep.tsopt[1] = htonl(tcp_time_stamp);
595 		rep.tsopt[2] = htonl(ts);
596 		arg.iov[0].iov_len = sizeof(rep);
597 	}
598 
599 	/* Swap the send and the receive. */
600 	rep.th.dest    = th->source;
601 	rep.th.source  = th->dest;
602 	rep.th.doff    = arg.iov[0].iov_len / 4;
603 	rep.th.seq     = htonl(seq);
604 	rep.th.ack_seq = htonl(ack);
605 	rep.th.ack     = 1;
606 	rep.th.window  = htons(win);
607 
608 	arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
609 				      skb->nh.iph->saddr, /*XXX*/
610 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
611 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
612 
613 	ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
614 
615 	TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
616 }
617 
618 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
619 {
620 	struct inet_timewait_sock *tw = inet_twsk(sk);
621 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
622 
623 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
624 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent);
625 
626 	inet_twsk_put(tw);
627 }
628 
629 static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
630 {
631 	tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
632 			req->ts_recent);
633 }
634 
635 /*
636  *	Send a SYN-ACK after having received an ACK.
637  *	This still operates on a request_sock only, not on a big
638  *	socket.
639  */
640 static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
641 			      struct dst_entry *dst)
642 {
643 	const struct inet_request_sock *ireq = inet_rsk(req);
644 	int err = -1;
645 	struct sk_buff * skb;
646 
647 	/* First, grab a route. */
648 	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
649 		goto out;
650 
651 	skb = tcp_make_synack(sk, dst, req);
652 
653 	if (skb) {
654 		struct tcphdr *th = skb->h.th;
655 
656 		th->check = tcp_v4_check(th, skb->len,
657 					 ireq->loc_addr,
658 					 ireq->rmt_addr,
659 					 csum_partial((char *)th, skb->len,
660 						      skb->csum));
661 
662 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
663 					    ireq->rmt_addr,
664 					    ireq->opt);
665 		if (err == NET_XMIT_CN)
666 			err = 0;
667 	}
668 
669 out:
670 	dst_release(dst);
671 	return err;
672 }
673 
674 /*
675  *	IPv4 request_sock destructor.
676  */
677 static void tcp_v4_reqsk_destructor(struct request_sock *req)
678 {
679 	kfree(inet_rsk(req)->opt);
680 }
681 
682 #ifdef CONFIG_SYN_COOKIES
683 static void syn_flood_warning(struct sk_buff *skb)
684 {
685 	static unsigned long warntime;
686 
687 	if (time_after(jiffies, (warntime + HZ * 60))) {
688 		warntime = jiffies;
689 		printk(KERN_INFO
690 		       "possible SYN flooding on port %d. Sending cookies.\n",
691 		       ntohs(skb->h.th->dest));
692 	}
693 }
694 #endif
695 
696 /*
697  * Save and compile IPv4 options into the request_sock if needed.
698  */
699 static struct ip_options *tcp_v4_save_options(struct sock *sk,
700 					      struct sk_buff *skb)
701 {
702 	struct ip_options *opt = &(IPCB(skb)->opt);
703 	struct ip_options *dopt = NULL;
704 
705 	if (opt && opt->optlen) {
706 		int opt_size = optlength(opt);
707 		dopt = kmalloc(opt_size, GFP_ATOMIC);
708 		if (dopt) {
709 			if (ip_options_echo(dopt, skb)) {
710 				kfree(dopt);
711 				dopt = NULL;
712 			}
713 		}
714 	}
715 	return dopt;
716 }
717 
718 struct request_sock_ops tcp_request_sock_ops = {
719 	.family		=	PF_INET,
720 	.obj_size	=	sizeof(struct tcp_request_sock),
721 	.rtx_syn_ack	=	tcp_v4_send_synack,
722 	.send_ack	=	tcp_v4_reqsk_send_ack,
723 	.destructor	=	tcp_v4_reqsk_destructor,
724 	.send_reset	=	tcp_v4_send_reset,
725 };
726 
727 static struct timewait_sock_ops tcp_timewait_sock_ops = {
728 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
729 	.twsk_unique	= tcp_twsk_unique,
730 };
731 
732 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
733 {
734 	struct inet_request_sock *ireq;
735 	struct tcp_options_received tmp_opt;
736 	struct request_sock *req;
737 	__u32 saddr = skb->nh.iph->saddr;
738 	__u32 daddr = skb->nh.iph->daddr;
739 	__u32 isn = TCP_SKB_CB(skb)->when;
740 	struct dst_entry *dst = NULL;
741 #ifdef CONFIG_SYN_COOKIES
742 	int want_cookie = 0;
743 #else
744 #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
745 #endif
746 
747 	/* Never answer to SYNs send to broadcast or multicast */
748 	if (((struct rtable *)skb->dst)->rt_flags &
749 	    (RTCF_BROADCAST | RTCF_MULTICAST))
750 		goto drop;
751 
752 	/* TW buckets are converted to open requests without
753 	 * limitations, they conserve resources and peer is
754 	 * evidently real one.
755 	 */
756 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
757 #ifdef CONFIG_SYN_COOKIES
758 		if (sysctl_tcp_syncookies) {
759 			want_cookie = 1;
760 		} else
761 #endif
762 		goto drop;
763 	}
764 
765 	/* Accept backlog is full. If we have already queued enough
766 	 * of warm entries in syn queue, drop request. It is better than
767 	 * clogging syn queue with openreqs with exponentially increasing
768 	 * timeout.
769 	 */
770 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
771 		goto drop;
772 
773 	req = reqsk_alloc(&tcp_request_sock_ops);
774 	if (!req)
775 		goto drop;
776 
777 	tcp_clear_options(&tmp_opt);
778 	tmp_opt.mss_clamp = 536;
779 	tmp_opt.user_mss  = tcp_sk(sk)->rx_opt.user_mss;
780 
781 	tcp_parse_options(skb, &tmp_opt, 0);
782 
783 	if (want_cookie) {
784 		tcp_clear_options(&tmp_opt);
785 		tmp_opt.saw_tstamp = 0;
786 	}
787 
788 	if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
789 		/* Some OSes (unknown ones, but I see them on web server, which
790 		 * contains information interesting only for windows'
791 		 * users) do not send their stamp in SYN. It is easy case.
792 		 * We simply do not advertise TS support.
793 		 */
794 		tmp_opt.saw_tstamp = 0;
795 		tmp_opt.tstamp_ok  = 0;
796 	}
797 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
798 
799 	tcp_openreq_init(req, &tmp_opt, skb);
800 
801 	ireq = inet_rsk(req);
802 	ireq->loc_addr = daddr;
803 	ireq->rmt_addr = saddr;
804 	ireq->opt = tcp_v4_save_options(sk, skb);
805 	if (!want_cookie)
806 		TCP_ECN_create_request(req, skb->h.th);
807 
808 	if (want_cookie) {
809 #ifdef CONFIG_SYN_COOKIES
810 		syn_flood_warning(skb);
811 #endif
812 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
813 	} else if (!isn) {
814 		struct inet_peer *peer = NULL;
815 
816 		/* VJ's idea. We save last timestamp seen
817 		 * from the destination in peer table, when entering
818 		 * state TIME-WAIT, and check against it before
819 		 * accepting new connection request.
820 		 *
821 		 * If "isn" is not zero, this request hit alive
822 		 * timewait bucket, so that all the necessary checks
823 		 * are made in the function processing timewait state.
824 		 */
825 		if (tmp_opt.saw_tstamp &&
826 		    tcp_death_row.sysctl_tw_recycle &&
827 		    (dst = inet_csk_route_req(sk, req)) != NULL &&
828 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
829 		    peer->v4daddr == saddr) {
830 			if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
831 			    (s32)(peer->tcp_ts - req->ts_recent) >
832 							TCP_PAWS_WINDOW) {
833 				NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
834 				dst_release(dst);
835 				goto drop_and_free;
836 			}
837 		}
838 		/* Kill the following clause, if you dislike this way. */
839 		else if (!sysctl_tcp_syncookies &&
840 			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
841 			  (sysctl_max_syn_backlog >> 2)) &&
842 			 (!peer || !peer->tcp_ts_stamp) &&
843 			 (!dst || !dst_metric(dst, RTAX_RTT))) {
844 			/* Without syncookies last quarter of
845 			 * backlog is filled with destinations,
846 			 * proven to be alive.
847 			 * It means that we continue to communicate
848 			 * to destinations, already remembered
849 			 * to the moment of synflood.
850 			 */
851 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
852 				       "request from %u.%u.%u.%u/%u\n",
853 				       NIPQUAD(saddr),
854 				       ntohs(skb->h.th->source));
855 			dst_release(dst);
856 			goto drop_and_free;
857 		}
858 
859 		isn = tcp_v4_init_sequence(sk, skb);
860 	}
861 	tcp_rsk(req)->snt_isn = isn;
862 
863 	if (tcp_v4_send_synack(sk, req, dst))
864 		goto drop_and_free;
865 
866 	if (want_cookie) {
867 	   	reqsk_free(req);
868 	} else {
869 		inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
870 	}
871 	return 0;
872 
873 drop_and_free:
874 	reqsk_free(req);
875 drop:
876 	return 0;
877 }
878 
879 
880 /*
881  * The three way handshake has completed - we got a valid synack -
882  * now create the new socket.
883  */
884 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
885 				  struct request_sock *req,
886 				  struct dst_entry *dst)
887 {
888 	struct inet_request_sock *ireq;
889 	struct inet_sock *newinet;
890 	struct tcp_sock *newtp;
891 	struct sock *newsk;
892 
893 	if (sk_acceptq_is_full(sk))
894 		goto exit_overflow;
895 
896 	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
897 		goto exit;
898 
899 	newsk = tcp_create_openreq_child(sk, req, skb);
900 	if (!newsk)
901 		goto exit;
902 
903 	newsk->sk_gso_type = SKB_GSO_TCPV4;
904 	sk_setup_caps(newsk, dst);
905 
906 	newtp		      = tcp_sk(newsk);
907 	newinet		      = inet_sk(newsk);
908 	ireq		      = inet_rsk(req);
909 	newinet->daddr	      = ireq->rmt_addr;
910 	newinet->rcv_saddr    = ireq->loc_addr;
911 	newinet->saddr	      = ireq->loc_addr;
912 	newinet->opt	      = ireq->opt;
913 	ireq->opt	      = NULL;
914 	newinet->mc_index     = inet_iif(skb);
915 	newinet->mc_ttl	      = skb->nh.iph->ttl;
916 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
917 	if (newinet->opt)
918 		inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
919 	newinet->id = newtp->write_seq ^ jiffies;
920 
921 	tcp_mtup_init(newsk);
922 	tcp_sync_mss(newsk, dst_mtu(dst));
923 	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
924 	tcp_initialize_rcv_mss(newsk);
925 
926 	__inet_hash(&tcp_hashinfo, newsk, 0);
927 	__inet_inherit_port(&tcp_hashinfo, sk, newsk);
928 
929 	return newsk;
930 
931 exit_overflow:
932 	NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
933 exit:
934 	NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
935 	dst_release(dst);
936 	return NULL;
937 }
938 
939 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
940 {
941 	struct tcphdr *th = skb->h.th;
942 	struct iphdr *iph = skb->nh.iph;
943 	struct sock *nsk;
944 	struct request_sock **prev;
945 	/* Find possible connection requests. */
946 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
947 						       iph->saddr, iph->daddr);
948 	if (req)
949 		return tcp_check_req(sk, skb, req, prev);
950 
951 	nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
952 					th->source, skb->nh.iph->daddr,
953 					ntohs(th->dest), inet_iif(skb));
954 
955 	if (nsk) {
956 		if (nsk->sk_state != TCP_TIME_WAIT) {
957 			bh_lock_sock(nsk);
958 			return nsk;
959 		}
960 		inet_twsk_put((struct inet_timewait_sock *)nsk);
961 		return NULL;
962 	}
963 
964 #ifdef CONFIG_SYN_COOKIES
965 	if (!th->rst && !th->syn && th->ack)
966 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
967 #endif
968 	return sk;
969 }
970 
971 static int tcp_v4_checksum_init(struct sk_buff *skb)
972 {
973 	if (skb->ip_summed == CHECKSUM_HW) {
974 		if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
975 				  skb->nh.iph->daddr, skb->csum)) {
976 			skb->ip_summed = CHECKSUM_UNNECESSARY;
977 			return 0;
978 		}
979 	}
980 
981 	skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, skb->nh.iph->daddr,
982 				       skb->len, IPPROTO_TCP, 0);
983 
984 	if (skb->len <= 76) {
985 		return __skb_checksum_complete(skb);
986 	}
987 	return 0;
988 }
989 
990 
991 /* The socket must have it's spinlock held when we get
992  * here.
993  *
994  * We have a potential double-lock case here, so even when
995  * doing backlog processing we use the BH locking scheme.
996  * This is because we cannot sleep with the original spinlock
997  * held.
998  */
999 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1000 {
1001 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1002 		TCP_CHECK_TIMER(sk);
1003 		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1004 			goto reset;
1005 		TCP_CHECK_TIMER(sk);
1006 		return 0;
1007 	}
1008 
1009 	if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
1010 		goto csum_err;
1011 
1012 	if (sk->sk_state == TCP_LISTEN) {
1013 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1014 		if (!nsk)
1015 			goto discard;
1016 
1017 		if (nsk != sk) {
1018 			if (tcp_child_process(sk, nsk, skb))
1019 				goto reset;
1020 			return 0;
1021 		}
1022 	}
1023 
1024 	TCP_CHECK_TIMER(sk);
1025 	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1026 		goto reset;
1027 	TCP_CHECK_TIMER(sk);
1028 	return 0;
1029 
1030 reset:
1031 	tcp_v4_send_reset(skb);
1032 discard:
1033 	kfree_skb(skb);
1034 	/* Be careful here. If this function gets more complicated and
1035 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
1036 	 * might be destroyed here. This current version compiles correctly,
1037 	 * but you have been warned.
1038 	 */
1039 	return 0;
1040 
1041 csum_err:
1042 	TCP_INC_STATS_BH(TCP_MIB_INERRS);
1043 	goto discard;
1044 }
1045 
1046 /*
1047  *	From tcp_input.c
1048  */
1049 
1050 int tcp_v4_rcv(struct sk_buff *skb)
1051 {
1052 	struct tcphdr *th;
1053 	struct sock *sk;
1054 	int ret;
1055 
1056 	if (skb->pkt_type != PACKET_HOST)
1057 		goto discard_it;
1058 
1059 	/* Count it even if it's bad */
1060 	TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1061 
1062 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1063 		goto discard_it;
1064 
1065 	th = skb->h.th;
1066 
1067 	if (th->doff < sizeof(struct tcphdr) / 4)
1068 		goto bad_packet;
1069 	if (!pskb_may_pull(skb, th->doff * 4))
1070 		goto discard_it;
1071 
1072 	/* An explanation is required here, I think.
1073 	 * Packet length and doff are validated by header prediction,
1074 	 * provided case of th->doff==0 is eliminated.
1075 	 * So, we defer the checks. */
1076 	if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1077 	     tcp_v4_checksum_init(skb)))
1078 		goto bad_packet;
1079 
1080 	th = skb->h.th;
1081 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1082 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1083 				    skb->len - th->doff * 4);
1084 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1085 	TCP_SKB_CB(skb)->when	 = 0;
1086 	TCP_SKB_CB(skb)->flags	 = skb->nh.iph->tos;
1087 	TCP_SKB_CB(skb)->sacked	 = 0;
1088 
1089 	sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
1090 			   skb->nh.iph->daddr, ntohs(th->dest),
1091 			   inet_iif(skb));
1092 
1093 	if (!sk)
1094 		goto no_tcp_socket;
1095 
1096 process:
1097 	if (sk->sk_state == TCP_TIME_WAIT)
1098 		goto do_time_wait;
1099 
1100 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1101 		goto discard_and_relse;
1102 	nf_reset(skb);
1103 
1104 	if (sk_filter(sk, skb, 0))
1105 		goto discard_and_relse;
1106 
1107 	skb->dev = NULL;
1108 
1109 	bh_lock_sock_nested(sk);
1110 	ret = 0;
1111 	if (!sock_owned_by_user(sk)) {
1112 #ifdef CONFIG_NET_DMA
1113 		struct tcp_sock *tp = tcp_sk(sk);
1114 		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1115 			tp->ucopy.dma_chan = get_softnet_dma();
1116 		if (tp->ucopy.dma_chan)
1117 			ret = tcp_v4_do_rcv(sk, skb);
1118 		else
1119 #endif
1120 		{
1121 			if (!tcp_prequeue(sk, skb))
1122 			ret = tcp_v4_do_rcv(sk, skb);
1123 		}
1124 	} else
1125 		sk_add_backlog(sk, skb);
1126 	bh_unlock_sock(sk);
1127 
1128 	sock_put(sk);
1129 
1130 	return ret;
1131 
1132 no_tcp_socket:
1133 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1134 		goto discard_it;
1135 
1136 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1137 bad_packet:
1138 		TCP_INC_STATS_BH(TCP_MIB_INERRS);
1139 	} else {
1140 		tcp_v4_send_reset(skb);
1141 	}
1142 
1143 discard_it:
1144 	/* Discard frame. */
1145 	kfree_skb(skb);
1146   	return 0;
1147 
1148 discard_and_relse:
1149 	sock_put(sk);
1150 	goto discard_it;
1151 
1152 do_time_wait:
1153 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1154 		inet_twsk_put((struct inet_timewait_sock *) sk);
1155 		goto discard_it;
1156 	}
1157 
1158 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1159 		TCP_INC_STATS_BH(TCP_MIB_INERRS);
1160 		inet_twsk_put((struct inet_timewait_sock *) sk);
1161 		goto discard_it;
1162 	}
1163 	switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1164 					   skb, th)) {
1165 	case TCP_TW_SYN: {
1166 		struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
1167 							skb->nh.iph->daddr,
1168 							ntohs(th->dest),
1169 							inet_iif(skb));
1170 		if (sk2) {
1171 			inet_twsk_deschedule((struct inet_timewait_sock *)sk,
1172 					     &tcp_death_row);
1173 			inet_twsk_put((struct inet_timewait_sock *)sk);
1174 			sk = sk2;
1175 			goto process;
1176 		}
1177 		/* Fall through to ACK */
1178 	}
1179 	case TCP_TW_ACK:
1180 		tcp_v4_timewait_ack(sk, skb);
1181 		break;
1182 	case TCP_TW_RST:
1183 		goto no_tcp_socket;
1184 	case TCP_TW_SUCCESS:;
1185 	}
1186 	goto discard_it;
1187 }
1188 
1189 /* VJ's idea. Save last timestamp seen from this destination
1190  * and hold it at least for normal timewait interval to use for duplicate
1191  * segment detection in subsequent connections, before they enter synchronized
1192  * state.
1193  */
1194 
1195 int tcp_v4_remember_stamp(struct sock *sk)
1196 {
1197 	struct inet_sock *inet = inet_sk(sk);
1198 	struct tcp_sock *tp = tcp_sk(sk);
1199 	struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1200 	struct inet_peer *peer = NULL;
1201 	int release_it = 0;
1202 
1203 	if (!rt || rt->rt_dst != inet->daddr) {
1204 		peer = inet_getpeer(inet->daddr, 1);
1205 		release_it = 1;
1206 	} else {
1207 		if (!rt->peer)
1208 			rt_bind_peer(rt, 1);
1209 		peer = rt->peer;
1210 	}
1211 
1212 	if (peer) {
1213 		if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1214 		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
1215 		     peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1216 			peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1217 			peer->tcp_ts = tp->rx_opt.ts_recent;
1218 		}
1219 		if (release_it)
1220 			inet_putpeer(peer);
1221 		return 1;
1222 	}
1223 
1224 	return 0;
1225 }
1226 
1227 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1228 {
1229 	struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1230 
1231 	if (peer) {
1232 		const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1233 
1234 		if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1235 		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
1236 		     peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
1237 			peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
1238 			peer->tcp_ts	   = tcptw->tw_ts_recent;
1239 		}
1240 		inet_putpeer(peer);
1241 		return 1;
1242 	}
1243 
1244 	return 0;
1245 }
1246 
1247 struct inet_connection_sock_af_ops ipv4_specific = {
1248 	.queue_xmit	   = ip_queue_xmit,
1249 	.send_check	   = tcp_v4_send_check,
1250 	.rebuild_header	   = inet_sk_rebuild_header,
1251 	.conn_request	   = tcp_v4_conn_request,
1252 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
1253 	.remember_stamp	   = tcp_v4_remember_stamp,
1254 	.net_header_len	   = sizeof(struct iphdr),
1255 	.setsockopt	   = ip_setsockopt,
1256 	.getsockopt	   = ip_getsockopt,
1257 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1258 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1259 #ifdef CONFIG_COMPAT
1260 	.compat_setsockopt = compat_ip_setsockopt,
1261 	.compat_getsockopt = compat_ip_getsockopt,
1262 #endif
1263 };
1264 
1265 /* NOTE: A lot of things set to zero explicitly by call to
1266  *       sk_alloc() so need not be done here.
1267  */
1268 static int tcp_v4_init_sock(struct sock *sk)
1269 {
1270 	struct inet_connection_sock *icsk = inet_csk(sk);
1271 	struct tcp_sock *tp = tcp_sk(sk);
1272 
1273 	skb_queue_head_init(&tp->out_of_order_queue);
1274 	tcp_init_xmit_timers(sk);
1275 	tcp_prequeue_init(tp);
1276 
1277 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
1278 	tp->mdev = TCP_TIMEOUT_INIT;
1279 
1280 	/* So many TCP implementations out there (incorrectly) count the
1281 	 * initial SYN frame in their delayed-ACK and congestion control
1282 	 * algorithms that we must have the following bandaid to talk
1283 	 * efficiently to them.  -DaveM
1284 	 */
1285 	tp->snd_cwnd = 2;
1286 
1287 	/* See draft-stevens-tcpca-spec-01 for discussion of the
1288 	 * initialization of these values.
1289 	 */
1290 	tp->snd_ssthresh = 0x7fffffff;	/* Infinity */
1291 	tp->snd_cwnd_clamp = ~0;
1292 	tp->mss_cache = 536;
1293 
1294 	tp->reordering = sysctl_tcp_reordering;
1295 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1296 
1297 	sk->sk_state = TCP_CLOSE;
1298 
1299 	sk->sk_write_space = sk_stream_write_space;
1300 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1301 
1302 	icsk->icsk_af_ops = &ipv4_specific;
1303 	icsk->icsk_sync_mss = tcp_sync_mss;
1304 
1305 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
1306 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1307 
1308 	atomic_inc(&tcp_sockets_allocated);
1309 
1310 	return 0;
1311 }
1312 
1313 int tcp_v4_destroy_sock(struct sock *sk)
1314 {
1315 	struct tcp_sock *tp = tcp_sk(sk);
1316 
1317 	tcp_clear_xmit_timers(sk);
1318 
1319 	tcp_cleanup_congestion_control(sk);
1320 
1321 	/* Cleanup up the write buffer. */
1322   	sk_stream_writequeue_purge(sk);
1323 
1324 	/* Cleans up our, hopefully empty, out_of_order_queue. */
1325   	__skb_queue_purge(&tp->out_of_order_queue);
1326 
1327 #ifdef CONFIG_NET_DMA
1328 	/* Cleans up our sk_async_wait_queue */
1329   	__skb_queue_purge(&sk->sk_async_wait_queue);
1330 #endif
1331 
1332 	/* Clean prequeue, it must be empty really */
1333 	__skb_queue_purge(&tp->ucopy.prequeue);
1334 
1335 	/* Clean up a referenced TCP bind bucket. */
1336 	if (inet_csk(sk)->icsk_bind_hash)
1337 		inet_put_port(&tcp_hashinfo, sk);
1338 
1339 	/*
1340 	 * If sendmsg cached page exists, toss it.
1341 	 */
1342 	if (sk->sk_sndmsg_page) {
1343 		__free_page(sk->sk_sndmsg_page);
1344 		sk->sk_sndmsg_page = NULL;
1345 	}
1346 
1347 	atomic_dec(&tcp_sockets_allocated);
1348 
1349 	return 0;
1350 }
1351 
1352 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1353 
1354 #ifdef CONFIG_PROC_FS
1355 /* Proc filesystem TCP sock list dumping. */
1356 
1357 static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
1358 {
1359 	return hlist_empty(head) ? NULL :
1360 		list_entry(head->first, struct inet_timewait_sock, tw_node);
1361 }
1362 
1363 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1364 {
1365 	return tw->tw_node.next ?
1366 		hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1367 }
1368 
1369 static void *listening_get_next(struct seq_file *seq, void *cur)
1370 {
1371 	struct inet_connection_sock *icsk;
1372 	struct hlist_node *node;
1373 	struct sock *sk = cur;
1374 	struct tcp_iter_state* st = seq->private;
1375 
1376 	if (!sk) {
1377 		st->bucket = 0;
1378 		sk = sk_head(&tcp_hashinfo.listening_hash[0]);
1379 		goto get_sk;
1380 	}
1381 
1382 	++st->num;
1383 
1384 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
1385 		struct request_sock *req = cur;
1386 
1387 	       	icsk = inet_csk(st->syn_wait_sk);
1388 		req = req->dl_next;
1389 		while (1) {
1390 			while (req) {
1391 				if (req->rsk_ops->family == st->family) {
1392 					cur = req;
1393 					goto out;
1394 				}
1395 				req = req->dl_next;
1396 			}
1397 			if (++st->sbucket >= TCP_SYNQ_HSIZE)
1398 				break;
1399 get_req:
1400 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1401 		}
1402 		sk	  = sk_next(st->syn_wait_sk);
1403 		st->state = TCP_SEQ_STATE_LISTENING;
1404 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1405 	} else {
1406 	       	icsk = inet_csk(sk);
1407 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1408 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
1409 			goto start_req;
1410 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1411 		sk = sk_next(sk);
1412 	}
1413 get_sk:
1414 	sk_for_each_from(sk, node) {
1415 		if (sk->sk_family == st->family) {
1416 			cur = sk;
1417 			goto out;
1418 		}
1419 	       	icsk = inet_csk(sk);
1420 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1421 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1422 start_req:
1423 			st->uid		= sock_i_uid(sk);
1424 			st->syn_wait_sk = sk;
1425 			st->state	= TCP_SEQ_STATE_OPENREQ;
1426 			st->sbucket	= 0;
1427 			goto get_req;
1428 		}
1429 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1430 	}
1431 	if (++st->bucket < INET_LHTABLE_SIZE) {
1432 		sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
1433 		goto get_sk;
1434 	}
1435 	cur = NULL;
1436 out:
1437 	return cur;
1438 }
1439 
1440 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1441 {
1442 	void *rc = listening_get_next(seq, NULL);
1443 
1444 	while (rc && *pos) {
1445 		rc = listening_get_next(seq, rc);
1446 		--*pos;
1447 	}
1448 	return rc;
1449 }
1450 
1451 static void *established_get_first(struct seq_file *seq)
1452 {
1453 	struct tcp_iter_state* st = seq->private;
1454 	void *rc = NULL;
1455 
1456 	for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
1457 		struct sock *sk;
1458 		struct hlist_node *node;
1459 		struct inet_timewait_sock *tw;
1460 
1461 		/* We can reschedule _before_ having picked the target: */
1462 		cond_resched_softirq();
1463 
1464 		read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
1465 		sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
1466 			if (sk->sk_family != st->family) {
1467 				continue;
1468 			}
1469 			rc = sk;
1470 			goto out;
1471 		}
1472 		st->state = TCP_SEQ_STATE_TIME_WAIT;
1473 		inet_twsk_for_each(tw, node,
1474 				   &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) {
1475 			if (tw->tw_family != st->family) {
1476 				continue;
1477 			}
1478 			rc = tw;
1479 			goto out;
1480 		}
1481 		read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
1482 		st->state = TCP_SEQ_STATE_ESTABLISHED;
1483 	}
1484 out:
1485 	return rc;
1486 }
1487 
1488 static void *established_get_next(struct seq_file *seq, void *cur)
1489 {
1490 	struct sock *sk = cur;
1491 	struct inet_timewait_sock *tw;
1492 	struct hlist_node *node;
1493 	struct tcp_iter_state* st = seq->private;
1494 
1495 	++st->num;
1496 
1497 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
1498 		tw = cur;
1499 		tw = tw_next(tw);
1500 get_tw:
1501 		while (tw && tw->tw_family != st->family) {
1502 			tw = tw_next(tw);
1503 		}
1504 		if (tw) {
1505 			cur = tw;
1506 			goto out;
1507 		}
1508 		read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
1509 		st->state = TCP_SEQ_STATE_ESTABLISHED;
1510 
1511 		/* We can reschedule between buckets: */
1512 		cond_resched_softirq();
1513 
1514 		if (++st->bucket < tcp_hashinfo.ehash_size) {
1515 			read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
1516 			sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
1517 		} else {
1518 			cur = NULL;
1519 			goto out;
1520 		}
1521 	} else
1522 		sk = sk_next(sk);
1523 
1524 	sk_for_each_from(sk, node) {
1525 		if (sk->sk_family == st->family)
1526 			goto found;
1527 	}
1528 
1529 	st->state = TCP_SEQ_STATE_TIME_WAIT;
1530 	tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain);
1531 	goto get_tw;
1532 found:
1533 	cur = sk;
1534 out:
1535 	return cur;
1536 }
1537 
1538 static void *established_get_idx(struct seq_file *seq, loff_t pos)
1539 {
1540 	void *rc = established_get_first(seq);
1541 
1542 	while (rc && pos) {
1543 		rc = established_get_next(seq, rc);
1544 		--pos;
1545 	}
1546 	return rc;
1547 }
1548 
1549 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
1550 {
1551 	void *rc;
1552 	struct tcp_iter_state* st = seq->private;
1553 
1554 	inet_listen_lock(&tcp_hashinfo);
1555 	st->state = TCP_SEQ_STATE_LISTENING;
1556 	rc	  = listening_get_idx(seq, &pos);
1557 
1558 	if (!rc) {
1559 		inet_listen_unlock(&tcp_hashinfo);
1560 		local_bh_disable();
1561 		st->state = TCP_SEQ_STATE_ESTABLISHED;
1562 		rc	  = established_get_idx(seq, pos);
1563 	}
1564 
1565 	return rc;
1566 }
1567 
1568 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
1569 {
1570 	struct tcp_iter_state* st = seq->private;
1571 	st->state = TCP_SEQ_STATE_LISTENING;
1572 	st->num = 0;
1573 	return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
1574 }
1575 
1576 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1577 {
1578 	void *rc = NULL;
1579 	struct tcp_iter_state* st;
1580 
1581 	if (v == SEQ_START_TOKEN) {
1582 		rc = tcp_get_idx(seq, 0);
1583 		goto out;
1584 	}
1585 	st = seq->private;
1586 
1587 	switch (st->state) {
1588 	case TCP_SEQ_STATE_OPENREQ:
1589 	case TCP_SEQ_STATE_LISTENING:
1590 		rc = listening_get_next(seq, v);
1591 		if (!rc) {
1592 			inet_listen_unlock(&tcp_hashinfo);
1593 			local_bh_disable();
1594 			st->state = TCP_SEQ_STATE_ESTABLISHED;
1595 			rc	  = established_get_first(seq);
1596 		}
1597 		break;
1598 	case TCP_SEQ_STATE_ESTABLISHED:
1599 	case TCP_SEQ_STATE_TIME_WAIT:
1600 		rc = established_get_next(seq, v);
1601 		break;
1602 	}
1603 out:
1604 	++*pos;
1605 	return rc;
1606 }
1607 
1608 static void tcp_seq_stop(struct seq_file *seq, void *v)
1609 {
1610 	struct tcp_iter_state* st = seq->private;
1611 
1612 	switch (st->state) {
1613 	case TCP_SEQ_STATE_OPENREQ:
1614 		if (v) {
1615 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
1616 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1617 		}
1618 	case TCP_SEQ_STATE_LISTENING:
1619 		if (v != SEQ_START_TOKEN)
1620 			inet_listen_unlock(&tcp_hashinfo);
1621 		break;
1622 	case TCP_SEQ_STATE_TIME_WAIT:
1623 	case TCP_SEQ_STATE_ESTABLISHED:
1624 		if (v)
1625 			read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
1626 		local_bh_enable();
1627 		break;
1628 	}
1629 }
1630 
1631 static int tcp_seq_open(struct inode *inode, struct file *file)
1632 {
1633 	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
1634 	struct seq_file *seq;
1635 	struct tcp_iter_state *s;
1636 	int rc;
1637 
1638 	if (unlikely(afinfo == NULL))
1639 		return -EINVAL;
1640 
1641 	s = kzalloc(sizeof(*s), GFP_KERNEL);
1642 	if (!s)
1643 		return -ENOMEM;
1644 	s->family		= afinfo->family;
1645 	s->seq_ops.start	= tcp_seq_start;
1646 	s->seq_ops.next		= tcp_seq_next;
1647 	s->seq_ops.show		= afinfo->seq_show;
1648 	s->seq_ops.stop		= tcp_seq_stop;
1649 
1650 	rc = seq_open(file, &s->seq_ops);
1651 	if (rc)
1652 		goto out_kfree;
1653 	seq	     = file->private_data;
1654 	seq->private = s;
1655 out:
1656 	return rc;
1657 out_kfree:
1658 	kfree(s);
1659 	goto out;
1660 }
1661 
1662 int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
1663 {
1664 	int rc = 0;
1665 	struct proc_dir_entry *p;
1666 
1667 	if (!afinfo)
1668 		return -EINVAL;
1669 	afinfo->seq_fops->owner		= afinfo->owner;
1670 	afinfo->seq_fops->open		= tcp_seq_open;
1671 	afinfo->seq_fops->read		= seq_read;
1672 	afinfo->seq_fops->llseek	= seq_lseek;
1673 	afinfo->seq_fops->release	= seq_release_private;
1674 
1675 	p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
1676 	if (p)
1677 		p->data = afinfo;
1678 	else
1679 		rc = -ENOMEM;
1680 	return rc;
1681 }
1682 
1683 void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
1684 {
1685 	if (!afinfo)
1686 		return;
1687 	proc_net_remove(afinfo->name);
1688 	memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
1689 }
1690 
1691 static void get_openreq4(struct sock *sk, struct request_sock *req,
1692 			 char *tmpbuf, int i, int uid)
1693 {
1694 	const struct inet_request_sock *ireq = inet_rsk(req);
1695 	int ttd = req->expires - jiffies;
1696 
1697 	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
1698 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
1699 		i,
1700 		ireq->loc_addr,
1701 		ntohs(inet_sk(sk)->sport),
1702 		ireq->rmt_addr,
1703 		ntohs(ireq->rmt_port),
1704 		TCP_SYN_RECV,
1705 		0, 0, /* could print option size, but that is af dependent. */
1706 		1,    /* timers active (only the expire timer) */
1707 		jiffies_to_clock_t(ttd),
1708 		req->retrans,
1709 		uid,
1710 		0,  /* non standard timer */
1711 		0, /* open_requests have no inode */
1712 		atomic_read(&sk->sk_refcnt),
1713 		req);
1714 }
1715 
1716 static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
1717 {
1718 	int timer_active;
1719 	unsigned long timer_expires;
1720 	struct tcp_sock *tp = tcp_sk(sp);
1721 	const struct inet_connection_sock *icsk = inet_csk(sp);
1722 	struct inet_sock *inet = inet_sk(sp);
1723 	unsigned int dest = inet->daddr;
1724 	unsigned int src = inet->rcv_saddr;
1725 	__u16 destp = ntohs(inet->dport);
1726 	__u16 srcp = ntohs(inet->sport);
1727 
1728 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1729 		timer_active	= 1;
1730 		timer_expires	= icsk->icsk_timeout;
1731 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1732 		timer_active	= 4;
1733 		timer_expires	= icsk->icsk_timeout;
1734 	} else if (timer_pending(&sp->sk_timer)) {
1735 		timer_active	= 2;
1736 		timer_expires	= sp->sk_timer.expires;
1737 	} else {
1738 		timer_active	= 0;
1739 		timer_expires = jiffies;
1740 	}
1741 
1742 	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
1743 			"%08X %5d %8d %lu %d %p %u %u %u %u %d",
1744 		i, src, srcp, dest, destp, sp->sk_state,
1745 		tp->write_seq - tp->snd_una,
1746 		(sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1747 		timer_active,
1748 		jiffies_to_clock_t(timer_expires - jiffies),
1749 		icsk->icsk_retransmits,
1750 		sock_i_uid(sp),
1751 		icsk->icsk_probes_out,
1752 		sock_i_ino(sp),
1753 		atomic_read(&sp->sk_refcnt), sp,
1754 		icsk->icsk_rto,
1755 		icsk->icsk_ack.ato,
1756 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1757 		tp->snd_cwnd,
1758 		tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
1759 }
1760 
1761 static void get_timewait4_sock(struct inet_timewait_sock *tw, char *tmpbuf, int i)
1762 {
1763 	unsigned int dest, src;
1764 	__u16 destp, srcp;
1765 	int ttd = tw->tw_ttd - jiffies;
1766 
1767 	if (ttd < 0)
1768 		ttd = 0;
1769 
1770 	dest  = tw->tw_daddr;
1771 	src   = tw->tw_rcv_saddr;
1772 	destp = ntohs(tw->tw_dport);
1773 	srcp  = ntohs(tw->tw_sport);
1774 
1775 	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
1776 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
1777 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
1778 		3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
1779 		atomic_read(&tw->tw_refcnt), tw);
1780 }
1781 
1782 #define TMPSZ 150
1783 
1784 static int tcp4_seq_show(struct seq_file *seq, void *v)
1785 {
1786 	struct tcp_iter_state* st;
1787 	char tmpbuf[TMPSZ + 1];
1788 
1789 	if (v == SEQ_START_TOKEN) {
1790 		seq_printf(seq, "%-*s\n", TMPSZ - 1,
1791 			   "  sl  local_address rem_address   st tx_queue "
1792 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
1793 			   "inode");
1794 		goto out;
1795 	}
1796 	st = seq->private;
1797 
1798 	switch (st->state) {
1799 	case TCP_SEQ_STATE_LISTENING:
1800 	case TCP_SEQ_STATE_ESTABLISHED:
1801 		get_tcp4_sock(v, tmpbuf, st->num);
1802 		break;
1803 	case TCP_SEQ_STATE_OPENREQ:
1804 		get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid);
1805 		break;
1806 	case TCP_SEQ_STATE_TIME_WAIT:
1807 		get_timewait4_sock(v, tmpbuf, st->num);
1808 		break;
1809 	}
1810 	seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
1811 out:
1812 	return 0;
1813 }
1814 
1815 static struct file_operations tcp4_seq_fops;
1816 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1817 	.owner		= THIS_MODULE,
1818 	.name		= "tcp",
1819 	.family		= AF_INET,
1820 	.seq_show	= tcp4_seq_show,
1821 	.seq_fops	= &tcp4_seq_fops,
1822 };
1823 
1824 int __init tcp4_proc_init(void)
1825 {
1826 	return tcp_proc_register(&tcp4_seq_afinfo);
1827 }
1828 
1829 void tcp4_proc_exit(void)
1830 {
1831 	tcp_proc_unregister(&tcp4_seq_afinfo);
1832 }
1833 #endif /* CONFIG_PROC_FS */
1834 
1835 struct proto tcp_prot = {
1836 	.name			= "TCP",
1837 	.owner			= THIS_MODULE,
1838 	.close			= tcp_close,
1839 	.connect		= tcp_v4_connect,
1840 	.disconnect		= tcp_disconnect,
1841 	.accept			= inet_csk_accept,
1842 	.ioctl			= tcp_ioctl,
1843 	.init			= tcp_v4_init_sock,
1844 	.destroy		= tcp_v4_destroy_sock,
1845 	.shutdown		= tcp_shutdown,
1846 	.setsockopt		= tcp_setsockopt,
1847 	.getsockopt		= tcp_getsockopt,
1848 	.sendmsg		= tcp_sendmsg,
1849 	.recvmsg		= tcp_recvmsg,
1850 	.backlog_rcv		= tcp_v4_do_rcv,
1851 	.hash			= tcp_v4_hash,
1852 	.unhash			= tcp_unhash,
1853 	.get_port		= tcp_v4_get_port,
1854 	.enter_memory_pressure	= tcp_enter_memory_pressure,
1855 	.sockets_allocated	= &tcp_sockets_allocated,
1856 	.orphan_count		= &tcp_orphan_count,
1857 	.memory_allocated	= &tcp_memory_allocated,
1858 	.memory_pressure	= &tcp_memory_pressure,
1859 	.sysctl_mem		= sysctl_tcp_mem,
1860 	.sysctl_wmem		= sysctl_tcp_wmem,
1861 	.sysctl_rmem		= sysctl_tcp_rmem,
1862 	.max_header		= MAX_TCP_HEADER,
1863 	.obj_size		= sizeof(struct tcp_sock),
1864 	.twsk_prot		= &tcp_timewait_sock_ops,
1865 	.rsk_prot		= &tcp_request_sock_ops,
1866 #ifdef CONFIG_COMPAT
1867 	.compat_setsockopt	= compat_tcp_setsockopt,
1868 	.compat_getsockopt	= compat_tcp_getsockopt,
1869 #endif
1870 };
1871 
1872 void __init tcp_v4_init(struct net_proto_family *ops)
1873 {
1874 	if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW, IPPROTO_TCP) < 0)
1875 		panic("Failed to create the TCP control socket.\n");
1876 }
1877 
1878 EXPORT_SYMBOL(ipv4_specific);
1879 EXPORT_SYMBOL(tcp_hashinfo);
1880 EXPORT_SYMBOL(tcp_prot);
1881 EXPORT_SYMBOL(tcp_unhash);
1882 EXPORT_SYMBOL(tcp_v4_conn_request);
1883 EXPORT_SYMBOL(tcp_v4_connect);
1884 EXPORT_SYMBOL(tcp_v4_do_rcv);
1885 EXPORT_SYMBOL(tcp_v4_remember_stamp);
1886 EXPORT_SYMBOL(tcp_v4_send_check);
1887 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1888 
1889 #ifdef CONFIG_PROC_FS
1890 EXPORT_SYMBOL(tcp_proc_register);
1891 EXPORT_SYMBOL(tcp_proc_unregister);
1892 #endif
1893 EXPORT_SYMBOL(sysctl_local_port_range);
1894 EXPORT_SYMBOL(sysctl_tcp_low_latency);
1895 
1896