xref: /linux/net/ipv4/tcp_ipv4.c (revision 14b42963f64b98ab61fa9723c03d71aa5ef4f862)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Implementation of the Transmission Control Protocol(TCP).
7  *
8  * Version:	$Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
9  *
10  *		IPv4 specific functions
11  *
12  *
13  *		code split from:
14  *		linux/ipv4/tcp.c
15  *		linux/ipv4/tcp_input.c
16  *		linux/ipv4/tcp_output.c
17  *
18  *		See tcp.c for author information
19  *
20  *	This program is free software; you can redistribute it and/or
21  *      modify it under the terms of the GNU General Public License
22  *      as published by the Free Software Foundation; either version
23  *      2 of the License, or (at your option) any later version.
24  */
25 
26 /*
27  * Changes:
28  *		David S. Miller	:	New socket lookup architecture.
29  *					This code is dedicated to John Dyson.
30  *		David S. Miller :	Change semantics of established hash,
31  *					half is devoted to TIME_WAIT sockets
32  *					and the rest go in the other half.
33  *		Andi Kleen :		Add support for syncookies and fixed
34  *					some bugs: ip options weren't passed to
35  *					the TCP layer, missed a check for an
36  *					ACK bit.
37  *		Andi Kleen :		Implemented fast path mtu discovery.
38  *	     				Fixed many serious bugs in the
39  *					request_sock handling and moved
40  *					most of it into the af independent code.
41  *					Added tail drop and some other bugfixes.
42  *					Added new listen semantics.
43  *		Mike McLagan	:	Routing by source
44  *	Juan Jose Ciarlante:		ip_dynaddr bits
45  *		Andi Kleen:		various fixes.
46  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
47  *					coma.
48  *	Andi Kleen		:	Fix new listen.
49  *	Andi Kleen		:	Fix accept error reporting.
50  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
51  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
52  *					a single port at the same time.
53  */
54 
55 
56 #include <linux/types.h>
57 #include <linux/fcntl.h>
58 #include <linux/module.h>
59 #include <linux/random.h>
60 #include <linux/cache.h>
61 #include <linux/jhash.h>
62 #include <linux/init.h>
63 #include <linux/times.h>
64 
65 #include <net/icmp.h>
66 #include <net/inet_hashtables.h>
67 #include <net/tcp.h>
68 #include <net/transp_v6.h>
69 #include <net/ipv6.h>
70 #include <net/inet_common.h>
71 #include <net/timewait_sock.h>
72 #include <net/xfrm.h>
73 #include <net/netdma.h>
74 
75 #include <linux/inet.h>
76 #include <linux/ipv6.h>
77 #include <linux/stddef.h>
78 #include <linux/proc_fs.h>
79 #include <linux/seq_file.h>
80 
81 int sysctl_tcp_tw_reuse;
82 int sysctl_tcp_low_latency;
83 
84 /* Check TCP sequence numbers in ICMP packets. */
85 #define ICMP_MIN_LENGTH 8
86 
87 /* Socket used for sending RSTs */
88 static struct socket *tcp_socket;
89 
90 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
91 
92 struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
93 	.lhash_lock	= __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
94 	.lhash_users	= ATOMIC_INIT(0),
95 	.lhash_wait	= __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
96 };
97 
98 static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
99 {
100 	return inet_csk_get_port(&tcp_hashinfo, sk, snum,
101 				 inet_csk_bind_conflict);
102 }
103 
104 static void tcp_v4_hash(struct sock *sk)
105 {
106 	inet_hash(&tcp_hashinfo, sk);
107 }
108 
109 void tcp_unhash(struct sock *sk)
110 {
111 	inet_unhash(&tcp_hashinfo, sk);
112 }
113 
114 static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
115 {
116 	return secure_tcp_sequence_number(skb->nh.iph->daddr,
117 					  skb->nh.iph->saddr,
118 					  skb->h.th->dest,
119 					  skb->h.th->source);
120 }
121 
122 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
123 {
124 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
125 	struct tcp_sock *tp = tcp_sk(sk);
126 
127 	/* With PAWS, it is safe from the viewpoint
128 	   of data integrity. Even without PAWS it is safe provided sequence
129 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
130 
131 	   Actually, the idea is close to VJ's one, only timestamp cache is
132 	   held not per host, but per port pair and TW bucket is used as state
133 	   holder.
134 
135 	   If TW bucket has been already destroyed we fall back to VJ's scheme
136 	   and use initial timestamp retrieved from peer table.
137 	 */
138 	if (tcptw->tw_ts_recent_stamp &&
139 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
140 			     xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
141 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
142 		if (tp->write_seq == 0)
143 			tp->write_seq = 1;
144 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
145 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
146 		sock_hold(sktw);
147 		return 1;
148 	}
149 
150 	return 0;
151 }
152 
153 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
154 
155 /* This will initiate an outgoing connection. */
156 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
157 {
158 	struct inet_sock *inet = inet_sk(sk);
159 	struct tcp_sock *tp = tcp_sk(sk);
160 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
161 	struct rtable *rt;
162 	u32 daddr, nexthop;
163 	int tmp;
164 	int err;
165 
166 	if (addr_len < sizeof(struct sockaddr_in))
167 		return -EINVAL;
168 
169 	if (usin->sin_family != AF_INET)
170 		return -EAFNOSUPPORT;
171 
172 	nexthop = daddr = usin->sin_addr.s_addr;
173 	if (inet->opt && inet->opt->srr) {
174 		if (!daddr)
175 			return -EINVAL;
176 		nexthop = inet->opt->faddr;
177 	}
178 
179 	tmp = ip_route_connect(&rt, nexthop, inet->saddr,
180 			       RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
181 			       IPPROTO_TCP,
182 			       inet->sport, usin->sin_port, sk);
183 	if (tmp < 0)
184 		return tmp;
185 
186 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
187 		ip_rt_put(rt);
188 		return -ENETUNREACH;
189 	}
190 
191 	if (!inet->opt || !inet->opt->srr)
192 		daddr = rt->rt_dst;
193 
194 	if (!inet->saddr)
195 		inet->saddr = rt->rt_src;
196 	inet->rcv_saddr = inet->saddr;
197 
198 	if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
199 		/* Reset inherited state */
200 		tp->rx_opt.ts_recent	   = 0;
201 		tp->rx_opt.ts_recent_stamp = 0;
202 		tp->write_seq		   = 0;
203 	}
204 
205 	if (tcp_death_row.sysctl_tw_recycle &&
206 	    !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
207 		struct inet_peer *peer = rt_get_peer(rt);
208 
209 		/* VJ's idea. We save last timestamp seen from
210 		 * the destination in peer table, when entering state TIME-WAIT
211 		 * and initialize rx_opt.ts_recent from it, when trying new connection.
212 		 */
213 
214 		if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
215 			tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
216 			tp->rx_opt.ts_recent = peer->tcp_ts;
217 		}
218 	}
219 
220 	inet->dport = usin->sin_port;
221 	inet->daddr = daddr;
222 
223 	inet_csk(sk)->icsk_ext_hdr_len = 0;
224 	if (inet->opt)
225 		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
226 
227 	tp->rx_opt.mss_clamp = 536;
228 
229 	/* Socket identity is still unknown (sport may be zero).
230 	 * However we set state to SYN-SENT and not releasing socket
231 	 * lock select source port, enter ourselves into the hash tables and
232 	 * complete initialization after this.
233 	 */
234 	tcp_set_state(sk, TCP_SYN_SENT);
235 	err = inet_hash_connect(&tcp_death_row, sk);
236 	if (err)
237 		goto failure;
238 
239 	err = ip_route_newports(&rt, IPPROTO_TCP, inet->sport, inet->dport, sk);
240 	if (err)
241 		goto failure;
242 
243 	/* OK, now commit destination to socket.  */
244 	sk->sk_gso_type = SKB_GSO_TCPV4;
245 	sk_setup_caps(sk, &rt->u.dst);
246 
247 	if (!tp->write_seq)
248 		tp->write_seq = secure_tcp_sequence_number(inet->saddr,
249 							   inet->daddr,
250 							   inet->sport,
251 							   usin->sin_port);
252 
253 	inet->id = tp->write_seq ^ jiffies;
254 
255 	err = tcp_connect(sk);
256 	rt = NULL;
257 	if (err)
258 		goto failure;
259 
260 	return 0;
261 
262 failure:
263 	/* This unhashes the socket and releases the local port, if necessary. */
264 	tcp_set_state(sk, TCP_CLOSE);
265 	ip_rt_put(rt);
266 	sk->sk_route_caps = 0;
267 	inet->dport = 0;
268 	return err;
269 }
270 
271 /*
272  * This routine does path mtu discovery as defined in RFC1191.
273  */
274 static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
275 {
276 	struct dst_entry *dst;
277 	struct inet_sock *inet = inet_sk(sk);
278 
279 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
280 	 * send out by Linux are always <576bytes so they should go through
281 	 * unfragmented).
282 	 */
283 	if (sk->sk_state == TCP_LISTEN)
284 		return;
285 
286 	/* We don't check in the destentry if pmtu discovery is forbidden
287 	 * on this route. We just assume that no packet_to_big packets
288 	 * are send back when pmtu discovery is not active.
289      	 * There is a small race when the user changes this flag in the
290 	 * route, but I think that's acceptable.
291 	 */
292 	if ((dst = __sk_dst_check(sk, 0)) == NULL)
293 		return;
294 
295 	dst->ops->update_pmtu(dst, mtu);
296 
297 	/* Something is about to be wrong... Remember soft error
298 	 * for the case, if this connection will not able to recover.
299 	 */
300 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
301 		sk->sk_err_soft = EMSGSIZE;
302 
303 	mtu = dst_mtu(dst);
304 
305 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
306 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
307 		tcp_sync_mss(sk, mtu);
308 
309 		/* Resend the TCP packet because it's
310 		 * clear that the old packet has been
311 		 * dropped. This is the new "fast" path mtu
312 		 * discovery.
313 		 */
314 		tcp_simple_retransmit(sk);
315 	} /* else let the usual retransmit timer handle it */
316 }
317 
318 /*
319  * This routine is called by the ICMP module when it gets some
320  * sort of error condition.  If err < 0 then the socket should
321  * be closed and the error returned to the user.  If err > 0
322  * it's just the icmp type << 8 | icmp code.  After adjustment
323  * header points to the first 8 bytes of the tcp header.  We need
324  * to find the appropriate port.
325  *
326  * The locking strategy used here is very "optimistic". When
327  * someone else accesses the socket the ICMP is just dropped
328  * and for some paths there is no check at all.
329  * A more general error queue to queue errors for later handling
330  * is probably better.
331  *
332  */
333 
334 void tcp_v4_err(struct sk_buff *skb, u32 info)
335 {
336 	struct iphdr *iph = (struct iphdr *)skb->data;
337 	struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
338 	struct tcp_sock *tp;
339 	struct inet_sock *inet;
340 	int type = skb->h.icmph->type;
341 	int code = skb->h.icmph->code;
342 	struct sock *sk;
343 	__u32 seq;
344 	int err;
345 
346 	if (skb->len < (iph->ihl << 2) + 8) {
347 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
348 		return;
349 	}
350 
351 	sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
352 			 th->source, inet_iif(skb));
353 	if (!sk) {
354 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
355 		return;
356 	}
357 	if (sk->sk_state == TCP_TIME_WAIT) {
358 		inet_twsk_put((struct inet_timewait_sock *)sk);
359 		return;
360 	}
361 
362 	bh_lock_sock(sk);
363 	/* If too many ICMPs get dropped on busy
364 	 * servers this needs to be solved differently.
365 	 */
366 	if (sock_owned_by_user(sk))
367 		NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
368 
369 	if (sk->sk_state == TCP_CLOSE)
370 		goto out;
371 
372 	tp = tcp_sk(sk);
373 	seq = ntohl(th->seq);
374 	if (sk->sk_state != TCP_LISTEN &&
375 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
376 		NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS);
377 		goto out;
378 	}
379 
380 	switch (type) {
381 	case ICMP_SOURCE_QUENCH:
382 		/* Just silently ignore these. */
383 		goto out;
384 	case ICMP_PARAMETERPROB:
385 		err = EPROTO;
386 		break;
387 	case ICMP_DEST_UNREACH:
388 		if (code > NR_ICMP_UNREACH)
389 			goto out;
390 
391 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
392 			if (!sock_owned_by_user(sk))
393 				do_pmtu_discovery(sk, iph, info);
394 			goto out;
395 		}
396 
397 		err = icmp_err_convert[code].errno;
398 		break;
399 	case ICMP_TIME_EXCEEDED:
400 		err = EHOSTUNREACH;
401 		break;
402 	default:
403 		goto out;
404 	}
405 
406 	switch (sk->sk_state) {
407 		struct request_sock *req, **prev;
408 	case TCP_LISTEN:
409 		if (sock_owned_by_user(sk))
410 			goto out;
411 
412 		req = inet_csk_search_req(sk, &prev, th->dest,
413 					  iph->daddr, iph->saddr);
414 		if (!req)
415 			goto out;
416 
417 		/* ICMPs are not backlogged, hence we cannot get
418 		   an established socket here.
419 		 */
420 		BUG_TRAP(!req->sk);
421 
422 		if (seq != tcp_rsk(req)->snt_isn) {
423 			NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
424 			goto out;
425 		}
426 
427 		/*
428 		 * Still in SYN_RECV, just remove it silently.
429 		 * There is no good way to pass the error to the newly
430 		 * created socket, and POSIX does not want network
431 		 * errors returned from accept().
432 		 */
433 		inet_csk_reqsk_queue_drop(sk, req, prev);
434 		goto out;
435 
436 	case TCP_SYN_SENT:
437 	case TCP_SYN_RECV:  /* Cannot happen.
438 			       It can f.e. if SYNs crossed.
439 			     */
440 		if (!sock_owned_by_user(sk)) {
441 			TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
442 			sk->sk_err = err;
443 
444 			sk->sk_error_report(sk);
445 
446 			tcp_done(sk);
447 		} else {
448 			sk->sk_err_soft = err;
449 		}
450 		goto out;
451 	}
452 
453 	/* If we've already connected we will keep trying
454 	 * until we time out, or the user gives up.
455 	 *
456 	 * rfc1122 4.2.3.9 allows to consider as hard errors
457 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
458 	 * but it is obsoleted by pmtu discovery).
459 	 *
460 	 * Note, that in modern internet, where routing is unreliable
461 	 * and in each dark corner broken firewalls sit, sending random
462 	 * errors ordered by their masters even this two messages finally lose
463 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
464 	 *
465 	 * Now we are in compliance with RFCs.
466 	 *							--ANK (980905)
467 	 */
468 
469 	inet = inet_sk(sk);
470 	if (!sock_owned_by_user(sk) && inet->recverr) {
471 		sk->sk_err = err;
472 		sk->sk_error_report(sk);
473 	} else	{ /* Only an error on timeout */
474 		sk->sk_err_soft = err;
475 	}
476 
477 out:
478 	bh_unlock_sock(sk);
479 	sock_put(sk);
480 }
481 
482 /* This routine computes an IPv4 TCP checksum. */
483 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
484 {
485 	struct inet_sock *inet = inet_sk(sk);
486 	struct tcphdr *th = skb->h.th;
487 
488 	if (skb->ip_summed == CHECKSUM_HW) {
489 		th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
490 		skb->csum = offsetof(struct tcphdr, check);
491 	} else {
492 		th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr,
493 					 csum_partial((char *)th,
494 						      th->doff << 2,
495 						      skb->csum));
496 	}
497 }
498 
499 int tcp_v4_gso_send_check(struct sk_buff *skb)
500 {
501 	struct iphdr *iph;
502 	struct tcphdr *th;
503 
504 	if (!pskb_may_pull(skb, sizeof(*th)))
505 		return -EINVAL;
506 
507 	iph = skb->nh.iph;
508 	th = skb->h.th;
509 
510 	th->check = 0;
511 	th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
512 	skb->csum = offsetof(struct tcphdr, check);
513 	skb->ip_summed = CHECKSUM_HW;
514 	return 0;
515 }
516 
517 /*
518  *	This routine will send an RST to the other tcp.
519  *
520  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
521  *		      for reset.
522  *	Answer: if a packet caused RST, it is not for a socket
523  *		existing in our system, if it is matched to a socket,
524  *		it is just duplicate segment or bug in other side's TCP.
525  *		So that we build reply only basing on parameters
526  *		arrived with segment.
527  *	Exception: precedence violation. We do not implement it in any case.
528  */
529 
530 static void tcp_v4_send_reset(struct sk_buff *skb)
531 {
532 	struct tcphdr *th = skb->h.th;
533 	struct tcphdr rth;
534 	struct ip_reply_arg arg;
535 
536 	/* Never send a reset in response to a reset. */
537 	if (th->rst)
538 		return;
539 
540 	if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
541 		return;
542 
543 	/* Swap the send and the receive. */
544 	memset(&rth, 0, sizeof(struct tcphdr));
545 	rth.dest   = th->source;
546 	rth.source = th->dest;
547 	rth.doff   = sizeof(struct tcphdr) / 4;
548 	rth.rst    = 1;
549 
550 	if (th->ack) {
551 		rth.seq = th->ack_seq;
552 	} else {
553 		rth.ack = 1;
554 		rth.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
555 				    skb->len - (th->doff << 2));
556 	}
557 
558 	memset(&arg, 0, sizeof arg);
559 	arg.iov[0].iov_base = (unsigned char *)&rth;
560 	arg.iov[0].iov_len  = sizeof rth;
561 	arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
562 				      skb->nh.iph->saddr, /*XXX*/
563 				      sizeof(struct tcphdr), IPPROTO_TCP, 0);
564 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
565 
566 	ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);
567 
568 	TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
569 	TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
570 }
571 
572 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
573    outside socket context is ugly, certainly. What can I do?
574  */
575 
576 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
577 			    u32 win, u32 ts)
578 {
579 	struct tcphdr *th = skb->h.th;
580 	struct {
581 		struct tcphdr th;
582 		u32 tsopt[3];
583 	} rep;
584 	struct ip_reply_arg arg;
585 
586 	memset(&rep.th, 0, sizeof(struct tcphdr));
587 	memset(&arg, 0, sizeof arg);
588 
589 	arg.iov[0].iov_base = (unsigned char *)&rep;
590 	arg.iov[0].iov_len  = sizeof(rep.th);
591 	if (ts) {
592 		rep.tsopt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
593 				     (TCPOPT_TIMESTAMP << 8) |
594 				     TCPOLEN_TIMESTAMP);
595 		rep.tsopt[1] = htonl(tcp_time_stamp);
596 		rep.tsopt[2] = htonl(ts);
597 		arg.iov[0].iov_len = sizeof(rep);
598 	}
599 
600 	/* Swap the send and the receive. */
601 	rep.th.dest    = th->source;
602 	rep.th.source  = th->dest;
603 	rep.th.doff    = arg.iov[0].iov_len / 4;
604 	rep.th.seq     = htonl(seq);
605 	rep.th.ack_seq = htonl(ack);
606 	rep.th.ack     = 1;
607 	rep.th.window  = htons(win);
608 
609 	arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
610 				      skb->nh.iph->saddr, /*XXX*/
611 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
612 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
613 
614 	ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
615 
616 	TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
617 }
618 
619 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
620 {
621 	struct inet_timewait_sock *tw = inet_twsk(sk);
622 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
623 
624 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
625 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent);
626 
627 	inet_twsk_put(tw);
628 }
629 
630 static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
631 {
632 	tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
633 			req->ts_recent);
634 }
635 
636 /*
637  *	Send a SYN-ACK after having received an ACK.
638  *	This still operates on a request_sock only, not on a big
639  *	socket.
640  */
641 static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
642 			      struct dst_entry *dst)
643 {
644 	const struct inet_request_sock *ireq = inet_rsk(req);
645 	int err = -1;
646 	struct sk_buff * skb;
647 
648 	/* First, grab a route. */
649 	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
650 		goto out;
651 
652 	skb = tcp_make_synack(sk, dst, req);
653 
654 	if (skb) {
655 		struct tcphdr *th = skb->h.th;
656 
657 		th->check = tcp_v4_check(th, skb->len,
658 					 ireq->loc_addr,
659 					 ireq->rmt_addr,
660 					 csum_partial((char *)th, skb->len,
661 						      skb->csum));
662 
663 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
664 					    ireq->rmt_addr,
665 					    ireq->opt);
666 		if (err == NET_XMIT_CN)
667 			err = 0;
668 	}
669 
670 out:
671 	dst_release(dst);
672 	return err;
673 }
674 
675 /*
676  *	IPv4 request_sock destructor.
677  */
678 static void tcp_v4_reqsk_destructor(struct request_sock *req)
679 {
680 	kfree(inet_rsk(req)->opt);
681 }
682 
683 #ifdef CONFIG_SYN_COOKIES
684 static void syn_flood_warning(struct sk_buff *skb)
685 {
686 	static unsigned long warntime;
687 
688 	if (time_after(jiffies, (warntime + HZ * 60))) {
689 		warntime = jiffies;
690 		printk(KERN_INFO
691 		       "possible SYN flooding on port %d. Sending cookies.\n",
692 		       ntohs(skb->h.th->dest));
693 	}
694 }
695 #endif
696 
697 /*
698  * Save and compile IPv4 options into the request_sock if needed.
699  */
700 static struct ip_options *tcp_v4_save_options(struct sock *sk,
701 					      struct sk_buff *skb)
702 {
703 	struct ip_options *opt = &(IPCB(skb)->opt);
704 	struct ip_options *dopt = NULL;
705 
706 	if (opt && opt->optlen) {
707 		int opt_size = optlength(opt);
708 		dopt = kmalloc(opt_size, GFP_ATOMIC);
709 		if (dopt) {
710 			if (ip_options_echo(dopt, skb)) {
711 				kfree(dopt);
712 				dopt = NULL;
713 			}
714 		}
715 	}
716 	return dopt;
717 }
718 
719 struct request_sock_ops tcp_request_sock_ops = {
720 	.family		=	PF_INET,
721 	.obj_size	=	sizeof(struct tcp_request_sock),
722 	.rtx_syn_ack	=	tcp_v4_send_synack,
723 	.send_ack	=	tcp_v4_reqsk_send_ack,
724 	.destructor	=	tcp_v4_reqsk_destructor,
725 	.send_reset	=	tcp_v4_send_reset,
726 };
727 
728 static struct timewait_sock_ops tcp_timewait_sock_ops = {
729 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
730 	.twsk_unique	= tcp_twsk_unique,
731 };
732 
733 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
734 {
735 	struct inet_request_sock *ireq;
736 	struct tcp_options_received tmp_opt;
737 	struct request_sock *req;
738 	__u32 saddr = skb->nh.iph->saddr;
739 	__u32 daddr = skb->nh.iph->daddr;
740 	__u32 isn = TCP_SKB_CB(skb)->when;
741 	struct dst_entry *dst = NULL;
742 #ifdef CONFIG_SYN_COOKIES
743 	int want_cookie = 0;
744 #else
745 #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
746 #endif
747 
748 	/* Never answer to SYNs send to broadcast or multicast */
749 	if (((struct rtable *)skb->dst)->rt_flags &
750 	    (RTCF_BROADCAST | RTCF_MULTICAST))
751 		goto drop;
752 
753 	/* TW buckets are converted to open requests without
754 	 * limitations, they conserve resources and peer is
755 	 * evidently real one.
756 	 */
757 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
758 #ifdef CONFIG_SYN_COOKIES
759 		if (sysctl_tcp_syncookies) {
760 			want_cookie = 1;
761 		} else
762 #endif
763 		goto drop;
764 	}
765 
766 	/* Accept backlog is full. If we have already queued enough
767 	 * of warm entries in syn queue, drop request. It is better than
768 	 * clogging syn queue with openreqs with exponentially increasing
769 	 * timeout.
770 	 */
771 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
772 		goto drop;
773 
774 	req = reqsk_alloc(&tcp_request_sock_ops);
775 	if (!req)
776 		goto drop;
777 
778 	tcp_clear_options(&tmp_opt);
779 	tmp_opt.mss_clamp = 536;
780 	tmp_opt.user_mss  = tcp_sk(sk)->rx_opt.user_mss;
781 
782 	tcp_parse_options(skb, &tmp_opt, 0);
783 
784 	if (want_cookie) {
785 		tcp_clear_options(&tmp_opt);
786 		tmp_opt.saw_tstamp = 0;
787 	}
788 
789 	if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
790 		/* Some OSes (unknown ones, but I see them on web server, which
791 		 * contains information interesting only for windows'
792 		 * users) do not send their stamp in SYN. It is easy case.
793 		 * We simply do not advertise TS support.
794 		 */
795 		tmp_opt.saw_tstamp = 0;
796 		tmp_opt.tstamp_ok  = 0;
797 	}
798 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
799 
800 	tcp_openreq_init(req, &tmp_opt, skb);
801 
802 	ireq = inet_rsk(req);
803 	ireq->loc_addr = daddr;
804 	ireq->rmt_addr = saddr;
805 	ireq->opt = tcp_v4_save_options(sk, skb);
806 	if (!want_cookie)
807 		TCP_ECN_create_request(req, skb->h.th);
808 
809 	if (want_cookie) {
810 #ifdef CONFIG_SYN_COOKIES
811 		syn_flood_warning(skb);
812 #endif
813 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
814 	} else if (!isn) {
815 		struct inet_peer *peer = NULL;
816 
817 		/* VJ's idea. We save last timestamp seen
818 		 * from the destination in peer table, when entering
819 		 * state TIME-WAIT, and check against it before
820 		 * accepting new connection request.
821 		 *
822 		 * If "isn" is not zero, this request hit alive
823 		 * timewait bucket, so that all the necessary checks
824 		 * are made in the function processing timewait state.
825 		 */
826 		if (tmp_opt.saw_tstamp &&
827 		    tcp_death_row.sysctl_tw_recycle &&
828 		    (dst = inet_csk_route_req(sk, req)) != NULL &&
829 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
830 		    peer->v4daddr == saddr) {
831 			if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
832 			    (s32)(peer->tcp_ts - req->ts_recent) >
833 							TCP_PAWS_WINDOW) {
834 				NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
835 				dst_release(dst);
836 				goto drop_and_free;
837 			}
838 		}
839 		/* Kill the following clause, if you dislike this way. */
840 		else if (!sysctl_tcp_syncookies &&
841 			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
842 			  (sysctl_max_syn_backlog >> 2)) &&
843 			 (!peer || !peer->tcp_ts_stamp) &&
844 			 (!dst || !dst_metric(dst, RTAX_RTT))) {
845 			/* Without syncookies last quarter of
846 			 * backlog is filled with destinations,
847 			 * proven to be alive.
848 			 * It means that we continue to communicate
849 			 * to destinations, already remembered
850 			 * to the moment of synflood.
851 			 */
852 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
853 				       "request from %u.%u.%u.%u/%u\n",
854 				       NIPQUAD(saddr),
855 				       ntohs(skb->h.th->source));
856 			dst_release(dst);
857 			goto drop_and_free;
858 		}
859 
860 		isn = tcp_v4_init_sequence(sk, skb);
861 	}
862 	tcp_rsk(req)->snt_isn = isn;
863 
864 	if (tcp_v4_send_synack(sk, req, dst))
865 		goto drop_and_free;
866 
867 	if (want_cookie) {
868 	   	reqsk_free(req);
869 	} else {
870 		inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
871 	}
872 	return 0;
873 
874 drop_and_free:
875 	reqsk_free(req);
876 drop:
877 	TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
878 	return 0;
879 }
880 
881 
882 /*
883  * The three way handshake has completed - we got a valid synack -
884  * now create the new socket.
885  */
886 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
887 				  struct request_sock *req,
888 				  struct dst_entry *dst)
889 {
890 	struct inet_request_sock *ireq;
891 	struct inet_sock *newinet;
892 	struct tcp_sock *newtp;
893 	struct sock *newsk;
894 
895 	if (sk_acceptq_is_full(sk))
896 		goto exit_overflow;
897 
898 	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
899 		goto exit;
900 
901 	newsk = tcp_create_openreq_child(sk, req, skb);
902 	if (!newsk)
903 		goto exit;
904 
905 	newsk->sk_gso_type = SKB_GSO_TCPV4;
906 	sk_setup_caps(newsk, dst);
907 
908 	newtp		      = tcp_sk(newsk);
909 	newinet		      = inet_sk(newsk);
910 	ireq		      = inet_rsk(req);
911 	newinet->daddr	      = ireq->rmt_addr;
912 	newinet->rcv_saddr    = ireq->loc_addr;
913 	newinet->saddr	      = ireq->loc_addr;
914 	newinet->opt	      = ireq->opt;
915 	ireq->opt	      = NULL;
916 	newinet->mc_index     = inet_iif(skb);
917 	newinet->mc_ttl	      = skb->nh.iph->ttl;
918 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
919 	if (newinet->opt)
920 		inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
921 	newinet->id = newtp->write_seq ^ jiffies;
922 
923 	tcp_mtup_init(newsk);
924 	tcp_sync_mss(newsk, dst_mtu(dst));
925 	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
926 	tcp_initialize_rcv_mss(newsk);
927 
928 	__inet_hash(&tcp_hashinfo, newsk, 0);
929 	__inet_inherit_port(&tcp_hashinfo, sk, newsk);
930 
931 	return newsk;
932 
933 exit_overflow:
934 	NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
935 exit:
936 	NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
937 	dst_release(dst);
938 	return NULL;
939 }
940 
941 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
942 {
943 	struct tcphdr *th = skb->h.th;
944 	struct iphdr *iph = skb->nh.iph;
945 	struct sock *nsk;
946 	struct request_sock **prev;
947 	/* Find possible connection requests. */
948 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
949 						       iph->saddr, iph->daddr);
950 	if (req)
951 		return tcp_check_req(sk, skb, req, prev);
952 
953 	nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
954 					th->source, skb->nh.iph->daddr,
955 					ntohs(th->dest), inet_iif(skb));
956 
957 	if (nsk) {
958 		if (nsk->sk_state != TCP_TIME_WAIT) {
959 			bh_lock_sock(nsk);
960 			return nsk;
961 		}
962 		inet_twsk_put((struct inet_timewait_sock *)nsk);
963 		return NULL;
964 	}
965 
966 #ifdef CONFIG_SYN_COOKIES
967 	if (!th->rst && !th->syn && th->ack)
968 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
969 #endif
970 	return sk;
971 }
972 
973 static int tcp_v4_checksum_init(struct sk_buff *skb)
974 {
975 	if (skb->ip_summed == CHECKSUM_HW) {
976 		if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
977 				  skb->nh.iph->daddr, skb->csum)) {
978 			skb->ip_summed = CHECKSUM_UNNECESSARY;
979 			return 0;
980 		}
981 	}
982 
983 	skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, skb->nh.iph->daddr,
984 				       skb->len, IPPROTO_TCP, 0);
985 
986 	if (skb->len <= 76) {
987 		return __skb_checksum_complete(skb);
988 	}
989 	return 0;
990 }
991 
992 
993 /* The socket must have it's spinlock held when we get
994  * here.
995  *
996  * We have a potential double-lock case here, so even when
997  * doing backlog processing we use the BH locking scheme.
998  * This is because we cannot sleep with the original spinlock
999  * held.
1000  */
1001 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1002 {
1003 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1004 		TCP_CHECK_TIMER(sk);
1005 		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1006 			goto reset;
1007 		TCP_CHECK_TIMER(sk);
1008 		return 0;
1009 	}
1010 
1011 	if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
1012 		goto csum_err;
1013 
1014 	if (sk->sk_state == TCP_LISTEN) {
1015 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1016 		if (!nsk)
1017 			goto discard;
1018 
1019 		if (nsk != sk) {
1020 			if (tcp_child_process(sk, nsk, skb))
1021 				goto reset;
1022 			return 0;
1023 		}
1024 	}
1025 
1026 	TCP_CHECK_TIMER(sk);
1027 	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1028 		goto reset;
1029 	TCP_CHECK_TIMER(sk);
1030 	return 0;
1031 
1032 reset:
1033 	tcp_v4_send_reset(skb);
1034 discard:
1035 	kfree_skb(skb);
1036 	/* Be careful here. If this function gets more complicated and
1037 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
1038 	 * might be destroyed here. This current version compiles correctly,
1039 	 * but you have been warned.
1040 	 */
1041 	return 0;
1042 
1043 csum_err:
1044 	TCP_INC_STATS_BH(TCP_MIB_INERRS);
1045 	goto discard;
1046 }
1047 
1048 /*
1049  *	From tcp_input.c
1050  */
1051 
1052 int tcp_v4_rcv(struct sk_buff *skb)
1053 {
1054 	struct tcphdr *th;
1055 	struct sock *sk;
1056 	int ret;
1057 
1058 	if (skb->pkt_type != PACKET_HOST)
1059 		goto discard_it;
1060 
1061 	/* Count it even if it's bad */
1062 	TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1063 
1064 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1065 		goto discard_it;
1066 
1067 	th = skb->h.th;
1068 
1069 	if (th->doff < sizeof(struct tcphdr) / 4)
1070 		goto bad_packet;
1071 	if (!pskb_may_pull(skb, th->doff * 4))
1072 		goto discard_it;
1073 
1074 	/* An explanation is required here, I think.
1075 	 * Packet length and doff are validated by header prediction,
1076 	 * provided case of th->doff==0 is eliminated.
1077 	 * So, we defer the checks. */
1078 	if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1079 	     tcp_v4_checksum_init(skb)))
1080 		goto bad_packet;
1081 
1082 	th = skb->h.th;
1083 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1084 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1085 				    skb->len - th->doff * 4);
1086 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1087 	TCP_SKB_CB(skb)->when	 = 0;
1088 	TCP_SKB_CB(skb)->flags	 = skb->nh.iph->tos;
1089 	TCP_SKB_CB(skb)->sacked	 = 0;
1090 
1091 	sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
1092 			   skb->nh.iph->daddr, ntohs(th->dest),
1093 			   inet_iif(skb));
1094 
1095 	if (!sk)
1096 		goto no_tcp_socket;
1097 
1098 process:
1099 	if (sk->sk_state == TCP_TIME_WAIT)
1100 		goto do_time_wait;
1101 
1102 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1103 		goto discard_and_relse;
1104 	nf_reset(skb);
1105 
1106 	if (sk_filter(sk, skb, 0))
1107 		goto discard_and_relse;
1108 
1109 	skb->dev = NULL;
1110 
1111 	bh_lock_sock_nested(sk);
1112 	ret = 0;
1113 	if (!sock_owned_by_user(sk)) {
1114 #ifdef CONFIG_NET_DMA
1115 		struct tcp_sock *tp = tcp_sk(sk);
1116 		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1117 			tp->ucopy.dma_chan = get_softnet_dma();
1118 		if (tp->ucopy.dma_chan)
1119 			ret = tcp_v4_do_rcv(sk, skb);
1120 		else
1121 #endif
1122 		{
1123 			if (!tcp_prequeue(sk, skb))
1124 			ret = tcp_v4_do_rcv(sk, skb);
1125 		}
1126 	} else
1127 		sk_add_backlog(sk, skb);
1128 	bh_unlock_sock(sk);
1129 
1130 	sock_put(sk);
1131 
1132 	return ret;
1133 
1134 no_tcp_socket:
1135 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1136 		goto discard_it;
1137 
1138 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1139 bad_packet:
1140 		TCP_INC_STATS_BH(TCP_MIB_INERRS);
1141 	} else {
1142 		tcp_v4_send_reset(skb);
1143 	}
1144 
1145 discard_it:
1146 	/* Discard frame. */
1147 	kfree_skb(skb);
1148   	return 0;
1149 
1150 discard_and_relse:
1151 	sock_put(sk);
1152 	goto discard_it;
1153 
1154 do_time_wait:
1155 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1156 		inet_twsk_put((struct inet_timewait_sock *) sk);
1157 		goto discard_it;
1158 	}
1159 
1160 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1161 		TCP_INC_STATS_BH(TCP_MIB_INERRS);
1162 		inet_twsk_put((struct inet_timewait_sock *) sk);
1163 		goto discard_it;
1164 	}
1165 	switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1166 					   skb, th)) {
1167 	case TCP_TW_SYN: {
1168 		struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
1169 							skb->nh.iph->daddr,
1170 							ntohs(th->dest),
1171 							inet_iif(skb));
1172 		if (sk2) {
1173 			inet_twsk_deschedule((struct inet_timewait_sock *)sk,
1174 					     &tcp_death_row);
1175 			inet_twsk_put((struct inet_timewait_sock *)sk);
1176 			sk = sk2;
1177 			goto process;
1178 		}
1179 		/* Fall through to ACK */
1180 	}
1181 	case TCP_TW_ACK:
1182 		tcp_v4_timewait_ack(sk, skb);
1183 		break;
1184 	case TCP_TW_RST:
1185 		goto no_tcp_socket;
1186 	case TCP_TW_SUCCESS:;
1187 	}
1188 	goto discard_it;
1189 }
1190 
1191 /* VJ's idea. Save last timestamp seen from this destination
1192  * and hold it at least for normal timewait interval to use for duplicate
1193  * segment detection in subsequent connections, before they enter synchronized
1194  * state.
1195  */
1196 
1197 int tcp_v4_remember_stamp(struct sock *sk)
1198 {
1199 	struct inet_sock *inet = inet_sk(sk);
1200 	struct tcp_sock *tp = tcp_sk(sk);
1201 	struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1202 	struct inet_peer *peer = NULL;
1203 	int release_it = 0;
1204 
1205 	if (!rt || rt->rt_dst != inet->daddr) {
1206 		peer = inet_getpeer(inet->daddr, 1);
1207 		release_it = 1;
1208 	} else {
1209 		if (!rt->peer)
1210 			rt_bind_peer(rt, 1);
1211 		peer = rt->peer;
1212 	}
1213 
1214 	if (peer) {
1215 		if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1216 		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
1217 		     peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1218 			peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1219 			peer->tcp_ts = tp->rx_opt.ts_recent;
1220 		}
1221 		if (release_it)
1222 			inet_putpeer(peer);
1223 		return 1;
1224 	}
1225 
1226 	return 0;
1227 }
1228 
1229 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1230 {
1231 	struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1232 
1233 	if (peer) {
1234 		const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1235 
1236 		if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1237 		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
1238 		     peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
1239 			peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
1240 			peer->tcp_ts	   = tcptw->tw_ts_recent;
1241 		}
1242 		inet_putpeer(peer);
1243 		return 1;
1244 	}
1245 
1246 	return 0;
1247 }
1248 
1249 struct inet_connection_sock_af_ops ipv4_specific = {
1250 	.queue_xmit	   = ip_queue_xmit,
1251 	.send_check	   = tcp_v4_send_check,
1252 	.rebuild_header	   = inet_sk_rebuild_header,
1253 	.conn_request	   = tcp_v4_conn_request,
1254 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
1255 	.remember_stamp	   = tcp_v4_remember_stamp,
1256 	.net_header_len	   = sizeof(struct iphdr),
1257 	.setsockopt	   = ip_setsockopt,
1258 	.getsockopt	   = ip_getsockopt,
1259 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1260 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1261 #ifdef CONFIG_COMPAT
1262 	.compat_setsockopt = compat_ip_setsockopt,
1263 	.compat_getsockopt = compat_ip_getsockopt,
1264 #endif
1265 };
1266 
1267 /* NOTE: A lot of things set to zero explicitly by call to
1268  *       sk_alloc() so need not be done here.
1269  */
1270 static int tcp_v4_init_sock(struct sock *sk)
1271 {
1272 	struct inet_connection_sock *icsk = inet_csk(sk);
1273 	struct tcp_sock *tp = tcp_sk(sk);
1274 
1275 	skb_queue_head_init(&tp->out_of_order_queue);
1276 	tcp_init_xmit_timers(sk);
1277 	tcp_prequeue_init(tp);
1278 
1279 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
1280 	tp->mdev = TCP_TIMEOUT_INIT;
1281 
1282 	/* So many TCP implementations out there (incorrectly) count the
1283 	 * initial SYN frame in their delayed-ACK and congestion control
1284 	 * algorithms that we must have the following bandaid to talk
1285 	 * efficiently to them.  -DaveM
1286 	 */
1287 	tp->snd_cwnd = 2;
1288 
1289 	/* See draft-stevens-tcpca-spec-01 for discussion of the
1290 	 * initialization of these values.
1291 	 */
1292 	tp->snd_ssthresh = 0x7fffffff;	/* Infinity */
1293 	tp->snd_cwnd_clamp = ~0;
1294 	tp->mss_cache = 536;
1295 
1296 	tp->reordering = sysctl_tcp_reordering;
1297 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1298 
1299 	sk->sk_state = TCP_CLOSE;
1300 
1301 	sk->sk_write_space = sk_stream_write_space;
1302 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1303 
1304 	icsk->icsk_af_ops = &ipv4_specific;
1305 	icsk->icsk_sync_mss = tcp_sync_mss;
1306 
1307 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
1308 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1309 
1310 	atomic_inc(&tcp_sockets_allocated);
1311 
1312 	return 0;
1313 }
1314 
1315 int tcp_v4_destroy_sock(struct sock *sk)
1316 {
1317 	struct tcp_sock *tp = tcp_sk(sk);
1318 
1319 	tcp_clear_xmit_timers(sk);
1320 
1321 	tcp_cleanup_congestion_control(sk);
1322 
1323 	/* Cleanup up the write buffer. */
1324   	sk_stream_writequeue_purge(sk);
1325 
1326 	/* Cleans up our, hopefully empty, out_of_order_queue. */
1327   	__skb_queue_purge(&tp->out_of_order_queue);
1328 
1329 #ifdef CONFIG_NET_DMA
1330 	/* Cleans up our sk_async_wait_queue */
1331   	__skb_queue_purge(&sk->sk_async_wait_queue);
1332 #endif
1333 
1334 	/* Clean prequeue, it must be empty really */
1335 	__skb_queue_purge(&tp->ucopy.prequeue);
1336 
1337 	/* Clean up a referenced TCP bind bucket. */
1338 	if (inet_csk(sk)->icsk_bind_hash)
1339 		inet_put_port(&tcp_hashinfo, sk);
1340 
1341 	/*
1342 	 * If sendmsg cached page exists, toss it.
1343 	 */
1344 	if (sk->sk_sndmsg_page) {
1345 		__free_page(sk->sk_sndmsg_page);
1346 		sk->sk_sndmsg_page = NULL;
1347 	}
1348 
1349 	atomic_dec(&tcp_sockets_allocated);
1350 
1351 	return 0;
1352 }
1353 
1354 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1355 
1356 #ifdef CONFIG_PROC_FS
1357 /* Proc filesystem TCP sock list dumping. */
1358 
1359 static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
1360 {
1361 	return hlist_empty(head) ? NULL :
1362 		list_entry(head->first, struct inet_timewait_sock, tw_node);
1363 }
1364 
1365 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1366 {
1367 	return tw->tw_node.next ?
1368 		hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1369 }
1370 
1371 static void *listening_get_next(struct seq_file *seq, void *cur)
1372 {
1373 	struct inet_connection_sock *icsk;
1374 	struct hlist_node *node;
1375 	struct sock *sk = cur;
1376 	struct tcp_iter_state* st = seq->private;
1377 
1378 	if (!sk) {
1379 		st->bucket = 0;
1380 		sk = sk_head(&tcp_hashinfo.listening_hash[0]);
1381 		goto get_sk;
1382 	}
1383 
1384 	++st->num;
1385 
1386 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
1387 		struct request_sock *req = cur;
1388 
1389 	       	icsk = inet_csk(st->syn_wait_sk);
1390 		req = req->dl_next;
1391 		while (1) {
1392 			while (req) {
1393 				if (req->rsk_ops->family == st->family) {
1394 					cur = req;
1395 					goto out;
1396 				}
1397 				req = req->dl_next;
1398 			}
1399 			if (++st->sbucket >= TCP_SYNQ_HSIZE)
1400 				break;
1401 get_req:
1402 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1403 		}
1404 		sk	  = sk_next(st->syn_wait_sk);
1405 		st->state = TCP_SEQ_STATE_LISTENING;
1406 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1407 	} else {
1408 	       	icsk = inet_csk(sk);
1409 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1410 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
1411 			goto start_req;
1412 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1413 		sk = sk_next(sk);
1414 	}
1415 get_sk:
1416 	sk_for_each_from(sk, node) {
1417 		if (sk->sk_family == st->family) {
1418 			cur = sk;
1419 			goto out;
1420 		}
1421 	       	icsk = inet_csk(sk);
1422 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1423 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1424 start_req:
1425 			st->uid		= sock_i_uid(sk);
1426 			st->syn_wait_sk = sk;
1427 			st->state	= TCP_SEQ_STATE_OPENREQ;
1428 			st->sbucket	= 0;
1429 			goto get_req;
1430 		}
1431 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1432 	}
1433 	if (++st->bucket < INET_LHTABLE_SIZE) {
1434 		sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
1435 		goto get_sk;
1436 	}
1437 	cur = NULL;
1438 out:
1439 	return cur;
1440 }
1441 
1442 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1443 {
1444 	void *rc = listening_get_next(seq, NULL);
1445 
1446 	while (rc && *pos) {
1447 		rc = listening_get_next(seq, rc);
1448 		--*pos;
1449 	}
1450 	return rc;
1451 }
1452 
1453 static void *established_get_first(struct seq_file *seq)
1454 {
1455 	struct tcp_iter_state* st = seq->private;
1456 	void *rc = NULL;
1457 
1458 	for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
1459 		struct sock *sk;
1460 		struct hlist_node *node;
1461 		struct inet_timewait_sock *tw;
1462 
1463 		/* We can reschedule _before_ having picked the target: */
1464 		cond_resched_softirq();
1465 
1466 		read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
1467 		sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
1468 			if (sk->sk_family != st->family) {
1469 				continue;
1470 			}
1471 			rc = sk;
1472 			goto out;
1473 		}
1474 		st->state = TCP_SEQ_STATE_TIME_WAIT;
1475 		inet_twsk_for_each(tw, node,
1476 				   &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) {
1477 			if (tw->tw_family != st->family) {
1478 				continue;
1479 			}
1480 			rc = tw;
1481 			goto out;
1482 		}
1483 		read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
1484 		st->state = TCP_SEQ_STATE_ESTABLISHED;
1485 	}
1486 out:
1487 	return rc;
1488 }
1489 
1490 static void *established_get_next(struct seq_file *seq, void *cur)
1491 {
1492 	struct sock *sk = cur;
1493 	struct inet_timewait_sock *tw;
1494 	struct hlist_node *node;
1495 	struct tcp_iter_state* st = seq->private;
1496 
1497 	++st->num;
1498 
1499 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
1500 		tw = cur;
1501 		tw = tw_next(tw);
1502 get_tw:
1503 		while (tw && tw->tw_family != st->family) {
1504 			tw = tw_next(tw);
1505 		}
1506 		if (tw) {
1507 			cur = tw;
1508 			goto out;
1509 		}
1510 		read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
1511 		st->state = TCP_SEQ_STATE_ESTABLISHED;
1512 
1513 		/* We can reschedule between buckets: */
1514 		cond_resched_softirq();
1515 
1516 		if (++st->bucket < tcp_hashinfo.ehash_size) {
1517 			read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
1518 			sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
1519 		} else {
1520 			cur = NULL;
1521 			goto out;
1522 		}
1523 	} else
1524 		sk = sk_next(sk);
1525 
1526 	sk_for_each_from(sk, node) {
1527 		if (sk->sk_family == st->family)
1528 			goto found;
1529 	}
1530 
1531 	st->state = TCP_SEQ_STATE_TIME_WAIT;
1532 	tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain);
1533 	goto get_tw;
1534 found:
1535 	cur = sk;
1536 out:
1537 	return cur;
1538 }
1539 
1540 static void *established_get_idx(struct seq_file *seq, loff_t pos)
1541 {
1542 	void *rc = established_get_first(seq);
1543 
1544 	while (rc && pos) {
1545 		rc = established_get_next(seq, rc);
1546 		--pos;
1547 	}
1548 	return rc;
1549 }
1550 
1551 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
1552 {
1553 	void *rc;
1554 	struct tcp_iter_state* st = seq->private;
1555 
1556 	inet_listen_lock(&tcp_hashinfo);
1557 	st->state = TCP_SEQ_STATE_LISTENING;
1558 	rc	  = listening_get_idx(seq, &pos);
1559 
1560 	if (!rc) {
1561 		inet_listen_unlock(&tcp_hashinfo);
1562 		local_bh_disable();
1563 		st->state = TCP_SEQ_STATE_ESTABLISHED;
1564 		rc	  = established_get_idx(seq, pos);
1565 	}
1566 
1567 	return rc;
1568 }
1569 
1570 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
1571 {
1572 	struct tcp_iter_state* st = seq->private;
1573 	st->state = TCP_SEQ_STATE_LISTENING;
1574 	st->num = 0;
1575 	return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
1576 }
1577 
1578 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1579 {
1580 	void *rc = NULL;
1581 	struct tcp_iter_state* st;
1582 
1583 	if (v == SEQ_START_TOKEN) {
1584 		rc = tcp_get_idx(seq, 0);
1585 		goto out;
1586 	}
1587 	st = seq->private;
1588 
1589 	switch (st->state) {
1590 	case TCP_SEQ_STATE_OPENREQ:
1591 	case TCP_SEQ_STATE_LISTENING:
1592 		rc = listening_get_next(seq, v);
1593 		if (!rc) {
1594 			inet_listen_unlock(&tcp_hashinfo);
1595 			local_bh_disable();
1596 			st->state = TCP_SEQ_STATE_ESTABLISHED;
1597 			rc	  = established_get_first(seq);
1598 		}
1599 		break;
1600 	case TCP_SEQ_STATE_ESTABLISHED:
1601 	case TCP_SEQ_STATE_TIME_WAIT:
1602 		rc = established_get_next(seq, v);
1603 		break;
1604 	}
1605 out:
1606 	++*pos;
1607 	return rc;
1608 }
1609 
1610 static void tcp_seq_stop(struct seq_file *seq, void *v)
1611 {
1612 	struct tcp_iter_state* st = seq->private;
1613 
1614 	switch (st->state) {
1615 	case TCP_SEQ_STATE_OPENREQ:
1616 		if (v) {
1617 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
1618 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1619 		}
1620 	case TCP_SEQ_STATE_LISTENING:
1621 		if (v != SEQ_START_TOKEN)
1622 			inet_listen_unlock(&tcp_hashinfo);
1623 		break;
1624 	case TCP_SEQ_STATE_TIME_WAIT:
1625 	case TCP_SEQ_STATE_ESTABLISHED:
1626 		if (v)
1627 			read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
1628 		local_bh_enable();
1629 		break;
1630 	}
1631 }
1632 
1633 static int tcp_seq_open(struct inode *inode, struct file *file)
1634 {
1635 	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
1636 	struct seq_file *seq;
1637 	struct tcp_iter_state *s;
1638 	int rc;
1639 
1640 	if (unlikely(afinfo == NULL))
1641 		return -EINVAL;
1642 
1643 	s = kmalloc(sizeof(*s), GFP_KERNEL);
1644 	if (!s)
1645 		return -ENOMEM;
1646 	memset(s, 0, sizeof(*s));
1647 	s->family		= afinfo->family;
1648 	s->seq_ops.start	= tcp_seq_start;
1649 	s->seq_ops.next		= tcp_seq_next;
1650 	s->seq_ops.show		= afinfo->seq_show;
1651 	s->seq_ops.stop		= tcp_seq_stop;
1652 
1653 	rc = seq_open(file, &s->seq_ops);
1654 	if (rc)
1655 		goto out_kfree;
1656 	seq	     = file->private_data;
1657 	seq->private = s;
1658 out:
1659 	return rc;
1660 out_kfree:
1661 	kfree(s);
1662 	goto out;
1663 }
1664 
1665 int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
1666 {
1667 	int rc = 0;
1668 	struct proc_dir_entry *p;
1669 
1670 	if (!afinfo)
1671 		return -EINVAL;
1672 	afinfo->seq_fops->owner		= afinfo->owner;
1673 	afinfo->seq_fops->open		= tcp_seq_open;
1674 	afinfo->seq_fops->read		= seq_read;
1675 	afinfo->seq_fops->llseek	= seq_lseek;
1676 	afinfo->seq_fops->release	= seq_release_private;
1677 
1678 	p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
1679 	if (p)
1680 		p->data = afinfo;
1681 	else
1682 		rc = -ENOMEM;
1683 	return rc;
1684 }
1685 
1686 void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
1687 {
1688 	if (!afinfo)
1689 		return;
1690 	proc_net_remove(afinfo->name);
1691 	memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
1692 }
1693 
1694 static void get_openreq4(struct sock *sk, struct request_sock *req,
1695 			 char *tmpbuf, int i, int uid)
1696 {
1697 	const struct inet_request_sock *ireq = inet_rsk(req);
1698 	int ttd = req->expires - jiffies;
1699 
1700 	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
1701 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
1702 		i,
1703 		ireq->loc_addr,
1704 		ntohs(inet_sk(sk)->sport),
1705 		ireq->rmt_addr,
1706 		ntohs(ireq->rmt_port),
1707 		TCP_SYN_RECV,
1708 		0, 0, /* could print option size, but that is af dependent. */
1709 		1,    /* timers active (only the expire timer) */
1710 		jiffies_to_clock_t(ttd),
1711 		req->retrans,
1712 		uid,
1713 		0,  /* non standard timer */
1714 		0, /* open_requests have no inode */
1715 		atomic_read(&sk->sk_refcnt),
1716 		req);
1717 }
1718 
1719 static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
1720 {
1721 	int timer_active;
1722 	unsigned long timer_expires;
1723 	struct tcp_sock *tp = tcp_sk(sp);
1724 	const struct inet_connection_sock *icsk = inet_csk(sp);
1725 	struct inet_sock *inet = inet_sk(sp);
1726 	unsigned int dest = inet->daddr;
1727 	unsigned int src = inet->rcv_saddr;
1728 	__u16 destp = ntohs(inet->dport);
1729 	__u16 srcp = ntohs(inet->sport);
1730 
1731 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1732 		timer_active	= 1;
1733 		timer_expires	= icsk->icsk_timeout;
1734 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1735 		timer_active	= 4;
1736 		timer_expires	= icsk->icsk_timeout;
1737 	} else if (timer_pending(&sp->sk_timer)) {
1738 		timer_active	= 2;
1739 		timer_expires	= sp->sk_timer.expires;
1740 	} else {
1741 		timer_active	= 0;
1742 		timer_expires = jiffies;
1743 	}
1744 
1745 	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
1746 			"%08X %5d %8d %lu %d %p %u %u %u %u %d",
1747 		i, src, srcp, dest, destp, sp->sk_state,
1748 		tp->write_seq - tp->snd_una,
1749 		(sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1750 		timer_active,
1751 		jiffies_to_clock_t(timer_expires - jiffies),
1752 		icsk->icsk_retransmits,
1753 		sock_i_uid(sp),
1754 		icsk->icsk_probes_out,
1755 		sock_i_ino(sp),
1756 		atomic_read(&sp->sk_refcnt), sp,
1757 		icsk->icsk_rto,
1758 		icsk->icsk_ack.ato,
1759 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1760 		tp->snd_cwnd,
1761 		tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
1762 }
1763 
1764 static void get_timewait4_sock(struct inet_timewait_sock *tw, char *tmpbuf, int i)
1765 {
1766 	unsigned int dest, src;
1767 	__u16 destp, srcp;
1768 	int ttd = tw->tw_ttd - jiffies;
1769 
1770 	if (ttd < 0)
1771 		ttd = 0;
1772 
1773 	dest  = tw->tw_daddr;
1774 	src   = tw->tw_rcv_saddr;
1775 	destp = ntohs(tw->tw_dport);
1776 	srcp  = ntohs(tw->tw_sport);
1777 
1778 	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
1779 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
1780 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
1781 		3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
1782 		atomic_read(&tw->tw_refcnt), tw);
1783 }
1784 
1785 #define TMPSZ 150
1786 
1787 static int tcp4_seq_show(struct seq_file *seq, void *v)
1788 {
1789 	struct tcp_iter_state* st;
1790 	char tmpbuf[TMPSZ + 1];
1791 
1792 	if (v == SEQ_START_TOKEN) {
1793 		seq_printf(seq, "%-*s\n", TMPSZ - 1,
1794 			   "  sl  local_address rem_address   st tx_queue "
1795 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
1796 			   "inode");
1797 		goto out;
1798 	}
1799 	st = seq->private;
1800 
1801 	switch (st->state) {
1802 	case TCP_SEQ_STATE_LISTENING:
1803 	case TCP_SEQ_STATE_ESTABLISHED:
1804 		get_tcp4_sock(v, tmpbuf, st->num);
1805 		break;
1806 	case TCP_SEQ_STATE_OPENREQ:
1807 		get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid);
1808 		break;
1809 	case TCP_SEQ_STATE_TIME_WAIT:
1810 		get_timewait4_sock(v, tmpbuf, st->num);
1811 		break;
1812 	}
1813 	seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
1814 out:
1815 	return 0;
1816 }
1817 
1818 static struct file_operations tcp4_seq_fops;
1819 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1820 	.owner		= THIS_MODULE,
1821 	.name		= "tcp",
1822 	.family		= AF_INET,
1823 	.seq_show	= tcp4_seq_show,
1824 	.seq_fops	= &tcp4_seq_fops,
1825 };
1826 
1827 int __init tcp4_proc_init(void)
1828 {
1829 	return tcp_proc_register(&tcp4_seq_afinfo);
1830 }
1831 
1832 void tcp4_proc_exit(void)
1833 {
1834 	tcp_proc_unregister(&tcp4_seq_afinfo);
1835 }
1836 #endif /* CONFIG_PROC_FS */
1837 
1838 struct proto tcp_prot = {
1839 	.name			= "TCP",
1840 	.owner			= THIS_MODULE,
1841 	.close			= tcp_close,
1842 	.connect		= tcp_v4_connect,
1843 	.disconnect		= tcp_disconnect,
1844 	.accept			= inet_csk_accept,
1845 	.ioctl			= tcp_ioctl,
1846 	.init			= tcp_v4_init_sock,
1847 	.destroy		= tcp_v4_destroy_sock,
1848 	.shutdown		= tcp_shutdown,
1849 	.setsockopt		= tcp_setsockopt,
1850 	.getsockopt		= tcp_getsockopt,
1851 	.sendmsg		= tcp_sendmsg,
1852 	.recvmsg		= tcp_recvmsg,
1853 	.backlog_rcv		= tcp_v4_do_rcv,
1854 	.hash			= tcp_v4_hash,
1855 	.unhash			= tcp_unhash,
1856 	.get_port		= tcp_v4_get_port,
1857 	.enter_memory_pressure	= tcp_enter_memory_pressure,
1858 	.sockets_allocated	= &tcp_sockets_allocated,
1859 	.orphan_count		= &tcp_orphan_count,
1860 	.memory_allocated	= &tcp_memory_allocated,
1861 	.memory_pressure	= &tcp_memory_pressure,
1862 	.sysctl_mem		= sysctl_tcp_mem,
1863 	.sysctl_wmem		= sysctl_tcp_wmem,
1864 	.sysctl_rmem		= sysctl_tcp_rmem,
1865 	.max_header		= MAX_TCP_HEADER,
1866 	.obj_size		= sizeof(struct tcp_sock),
1867 	.twsk_prot		= &tcp_timewait_sock_ops,
1868 	.rsk_prot		= &tcp_request_sock_ops,
1869 #ifdef CONFIG_COMPAT
1870 	.compat_setsockopt	= compat_tcp_setsockopt,
1871 	.compat_getsockopt	= compat_tcp_getsockopt,
1872 #endif
1873 };
1874 
1875 void __init tcp_v4_init(struct net_proto_family *ops)
1876 {
1877 	if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW, IPPROTO_TCP) < 0)
1878 		panic("Failed to create the TCP control socket.\n");
1879 }
1880 
1881 EXPORT_SYMBOL(ipv4_specific);
1882 EXPORT_SYMBOL(tcp_hashinfo);
1883 EXPORT_SYMBOL(tcp_prot);
1884 EXPORT_SYMBOL(tcp_unhash);
1885 EXPORT_SYMBOL(tcp_v4_conn_request);
1886 EXPORT_SYMBOL(tcp_v4_connect);
1887 EXPORT_SYMBOL(tcp_v4_do_rcv);
1888 EXPORT_SYMBOL(tcp_v4_remember_stamp);
1889 EXPORT_SYMBOL(tcp_v4_send_check);
1890 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1891 
1892 #ifdef CONFIG_PROC_FS
1893 EXPORT_SYMBOL(tcp_proc_register);
1894 EXPORT_SYMBOL(tcp_proc_unregister);
1895 #endif
1896 EXPORT_SYMBOL(sysctl_local_port_range);
1897 EXPORT_SYMBOL(sysctl_tcp_low_latency);
1898 
1899