xref: /linux/net/ipv4/tcp_ipv4.c (revision a7edd0e676d51145ae634a2acf7a447e319200fa)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Implementation of the Transmission Control Protocol(TCP).
7  *
8  * Version:	$Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
9  *
10  *		IPv4 specific functions
11  *
12  *
13  *		code split from:
14  *		linux/ipv4/tcp.c
15  *		linux/ipv4/tcp_input.c
16  *		linux/ipv4/tcp_output.c
17  *
18  *		See tcp.c for author information
19  *
20  *	This program is free software; you can redistribute it and/or
21  *      modify it under the terms of the GNU General Public License
22  *      as published by the Free Software Foundation; either version
23  *      2 of the License, or (at your option) any later version.
24  */
25 
26 /*
27  * Changes:
28  *		David S. Miller	:	New socket lookup architecture.
29  *					This code is dedicated to John Dyson.
30  *		David S. Miller :	Change semantics of established hash,
31  *					half is devoted to TIME_WAIT sockets
32  *					and the rest go in the other half.
33  *		Andi Kleen :		Add support for syncookies and fixed
34  *					some bugs: ip options weren't passed to
35  *					the TCP layer, missed a check for an
36  *					ACK bit.
37  *		Andi Kleen :		Implemented fast path mtu discovery.
38  *	     				Fixed many serious bugs in the
39  *					request_sock handling and moved
40  *					most of it into the af independent code.
41  *					Added tail drop and some other bugfixes.
42  *					Added new listen semantics.
43  *		Mike McLagan	:	Routing by source
44  *	Juan Jose Ciarlante:		ip_dynaddr bits
45  *		Andi Kleen:		various fixes.
46  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
47  *					coma.
48  *	Andi Kleen		:	Fix new listen.
49  *	Andi Kleen		:	Fix accept error reporting.
50  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
51  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
52  *					a single port at the same time.
53  */
54 
55 
56 #include <linux/types.h>
57 #include <linux/fcntl.h>
58 #include <linux/module.h>
59 #include <linux/random.h>
60 #include <linux/cache.h>
61 #include <linux/jhash.h>
62 #include <linux/init.h>
63 #include <linux/times.h>
64 
65 #include <net/icmp.h>
66 #include <net/inet_hashtables.h>
67 #include <net/tcp.h>
68 #include <net/transp_v6.h>
69 #include <net/ipv6.h>
70 #include <net/inet_common.h>
71 #include <net/timewait_sock.h>
72 #include <net/xfrm.h>
73 #include <net/netdma.h>
74 
75 #include <linux/inet.h>
76 #include <linux/ipv6.h>
77 #include <linux/stddef.h>
78 #include <linux/proc_fs.h>
79 #include <linux/seq_file.h>
80 
81 #include <linux/crypto.h>
82 #include <linux/scatterlist.h>
83 
84 int sysctl_tcp_tw_reuse __read_mostly;
85 int sysctl_tcp_low_latency __read_mostly;
86 
87 /* Check TCP sequence numbers in ICMP packets. */
88 #define ICMP_MIN_LENGTH 8
89 
90 /* Socket used for sending RSTs */
91 static struct socket *tcp_socket;
92 
93 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
94 
95 #ifdef CONFIG_TCP_MD5SIG
96 static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
97 						   __be32 addr);
98 static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
99 				   __be32 saddr, __be32 daddr,
100 				   struct tcphdr *th, int protocol,
101 				   int tcplen);
102 #endif
103 
104 struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
105 	.lhash_lock  = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
106 	.lhash_users = ATOMIC_INIT(0),
107 	.lhash_wait  = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
108 };
109 
110 static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
111 {
112 	return inet_csk_get_port(&tcp_hashinfo, sk, snum,
113 				 inet_csk_bind_conflict);
114 }
115 
116 static void tcp_v4_hash(struct sock *sk)
117 {
118 	inet_hash(&tcp_hashinfo, sk);
119 }
120 
121 void tcp_unhash(struct sock *sk)
122 {
123 	inet_unhash(&tcp_hashinfo, sk);
124 }
125 
126 static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
127 {
128 	return secure_tcp_sequence_number(skb->nh.iph->daddr,
129 					  skb->nh.iph->saddr,
130 					  skb->h.th->dest,
131 					  skb->h.th->source);
132 }
133 
134 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
135 {
136 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
137 	struct tcp_sock *tp = tcp_sk(sk);
138 
139 	/* With PAWS, it is safe from the viewpoint
140 	   of data integrity. Even without PAWS it is safe provided sequence
141 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
142 
143 	   Actually, the idea is close to VJ's one, only timestamp cache is
144 	   held not per host, but per port pair and TW bucket is used as state
145 	   holder.
146 
147 	   If TW bucket has been already destroyed we fall back to VJ's scheme
148 	   and use initial timestamp retrieved from peer table.
149 	 */
150 	if (tcptw->tw_ts_recent_stamp &&
151 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
152 			     xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
153 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
154 		if (tp->write_seq == 0)
155 			tp->write_seq = 1;
156 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
157 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
158 		sock_hold(sktw);
159 		return 1;
160 	}
161 
162 	return 0;
163 }
164 
165 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
166 
167 /* This will initiate an outgoing connection. */
168 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
169 {
170 	struct inet_sock *inet = inet_sk(sk);
171 	struct tcp_sock *tp = tcp_sk(sk);
172 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
173 	struct rtable *rt;
174 	__be32 daddr, nexthop;
175 	int tmp;
176 	int err;
177 
178 	if (addr_len < sizeof(struct sockaddr_in))
179 		return -EINVAL;
180 
181 	if (usin->sin_family != AF_INET)
182 		return -EAFNOSUPPORT;
183 
184 	nexthop = daddr = usin->sin_addr.s_addr;
185 	if (inet->opt && inet->opt->srr) {
186 		if (!daddr)
187 			return -EINVAL;
188 		nexthop = inet->opt->faddr;
189 	}
190 
191 	tmp = ip_route_connect(&rt, nexthop, inet->saddr,
192 			       RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
193 			       IPPROTO_TCP,
194 			       inet->sport, usin->sin_port, sk, 1);
195 	if (tmp < 0)
196 		return tmp;
197 
198 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
199 		ip_rt_put(rt);
200 		return -ENETUNREACH;
201 	}
202 
203 	if (!inet->opt || !inet->opt->srr)
204 		daddr = rt->rt_dst;
205 
206 	if (!inet->saddr)
207 		inet->saddr = rt->rt_src;
208 	inet->rcv_saddr = inet->saddr;
209 
210 	if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
211 		/* Reset inherited state */
212 		tp->rx_opt.ts_recent	   = 0;
213 		tp->rx_opt.ts_recent_stamp = 0;
214 		tp->write_seq		   = 0;
215 	}
216 
217 	if (tcp_death_row.sysctl_tw_recycle &&
218 	    !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
219 		struct inet_peer *peer = rt_get_peer(rt);
220 		/*
221 		 * VJ's idea. We save last timestamp seen from
222 		 * the destination in peer table, when entering state
223 		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
224 		 * when trying new connection.
225 		 */
226 		if (peer != NULL &&
227 		    peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
228 			tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
229 			tp->rx_opt.ts_recent = peer->tcp_ts;
230 		}
231 	}
232 
233 	inet->dport = usin->sin_port;
234 	inet->daddr = daddr;
235 
236 	inet_csk(sk)->icsk_ext_hdr_len = 0;
237 	if (inet->opt)
238 		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
239 
240 	tp->rx_opt.mss_clamp = 536;
241 
242 	/* Socket identity is still unknown (sport may be zero).
243 	 * However we set state to SYN-SENT and not releasing socket
244 	 * lock select source port, enter ourselves into the hash tables and
245 	 * complete initialization after this.
246 	 */
247 	tcp_set_state(sk, TCP_SYN_SENT);
248 	err = inet_hash_connect(&tcp_death_row, sk);
249 	if (err)
250 		goto failure;
251 
252 	err = ip_route_newports(&rt, IPPROTO_TCP,
253 				inet->sport, inet->dport, sk);
254 	if (err)
255 		goto failure;
256 
257 	/* OK, now commit destination to socket.  */
258 	sk->sk_gso_type = SKB_GSO_TCPV4;
259 	sk_setup_caps(sk, &rt->u.dst);
260 
261 	if (!tp->write_seq)
262 		tp->write_seq = secure_tcp_sequence_number(inet->saddr,
263 							   inet->daddr,
264 							   inet->sport,
265 							   usin->sin_port);
266 
267 	inet->id = tp->write_seq ^ jiffies;
268 
269 	err = tcp_connect(sk);
270 	rt = NULL;
271 	if (err)
272 		goto failure;
273 
274 	return 0;
275 
276 failure:
277 	/*
278 	 * This unhashes the socket and releases the local port,
279 	 * if necessary.
280 	 */
281 	tcp_set_state(sk, TCP_CLOSE);
282 	ip_rt_put(rt);
283 	sk->sk_route_caps = 0;
284 	inet->dport = 0;
285 	return err;
286 }
287 
288 /*
289  * This routine does path mtu discovery as defined in RFC1191.
290  */
291 static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
292 {
293 	struct dst_entry *dst;
294 	struct inet_sock *inet = inet_sk(sk);
295 
296 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
297 	 * send out by Linux are always <576bytes so they should go through
298 	 * unfragmented).
299 	 */
300 	if (sk->sk_state == TCP_LISTEN)
301 		return;
302 
303 	/* We don't check in the destentry if pmtu discovery is forbidden
304 	 * on this route. We just assume that no packet_to_big packets
305 	 * are send back when pmtu discovery is not active.
306 	 * There is a small race when the user changes this flag in the
307 	 * route, but I think that's acceptable.
308 	 */
309 	if ((dst = __sk_dst_check(sk, 0)) == NULL)
310 		return;
311 
312 	dst->ops->update_pmtu(dst, mtu);
313 
314 	/* Something is about to be wrong... Remember soft error
315 	 * for the case, if this connection will not able to recover.
316 	 */
317 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
318 		sk->sk_err_soft = EMSGSIZE;
319 
320 	mtu = dst_mtu(dst);
321 
322 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
323 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
324 		tcp_sync_mss(sk, mtu);
325 
326 		/* Resend the TCP packet because it's
327 		 * clear that the old packet has been
328 		 * dropped. This is the new "fast" path mtu
329 		 * discovery.
330 		 */
331 		tcp_simple_retransmit(sk);
332 	} /* else let the usual retransmit timer handle it */
333 }
334 
335 /*
336  * This routine is called by the ICMP module when it gets some
337  * sort of error condition.  If err < 0 then the socket should
338  * be closed and the error returned to the user.  If err > 0
339  * it's just the icmp type << 8 | icmp code.  After adjustment
340  * header points to the first 8 bytes of the tcp header.  We need
341  * to find the appropriate port.
342  *
343  * The locking strategy used here is very "optimistic". When
344  * someone else accesses the socket the ICMP is just dropped
345  * and for some paths there is no check at all.
346  * A more general error queue to queue errors for later handling
347  * is probably better.
348  *
349  */
350 
351 void tcp_v4_err(struct sk_buff *skb, u32 info)
352 {
353 	struct iphdr *iph = (struct iphdr *)skb->data;
354 	struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
355 	struct tcp_sock *tp;
356 	struct inet_sock *inet;
357 	int type = skb->h.icmph->type;
358 	int code = skb->h.icmph->code;
359 	struct sock *sk;
360 	__u32 seq;
361 	int err;
362 
363 	if (skb->len < (iph->ihl << 2) + 8) {
364 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
365 		return;
366 	}
367 
368 	sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
369 			 th->source, inet_iif(skb));
370 	if (!sk) {
371 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
372 		return;
373 	}
374 	if (sk->sk_state == TCP_TIME_WAIT) {
375 		inet_twsk_put(inet_twsk(sk));
376 		return;
377 	}
378 
379 	bh_lock_sock(sk);
380 	/* If too many ICMPs get dropped on busy
381 	 * servers this needs to be solved differently.
382 	 */
383 	if (sock_owned_by_user(sk))
384 		NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
385 
386 	if (sk->sk_state == TCP_CLOSE)
387 		goto out;
388 
389 	tp = tcp_sk(sk);
390 	seq = ntohl(th->seq);
391 	if (sk->sk_state != TCP_LISTEN &&
392 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
393 		NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
394 		goto out;
395 	}
396 
397 	switch (type) {
398 	case ICMP_SOURCE_QUENCH:
399 		/* Just silently ignore these. */
400 		goto out;
401 	case ICMP_PARAMETERPROB:
402 		err = EPROTO;
403 		break;
404 	case ICMP_DEST_UNREACH:
405 		if (code > NR_ICMP_UNREACH)
406 			goto out;
407 
408 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
409 			if (!sock_owned_by_user(sk))
410 				do_pmtu_discovery(sk, iph, info);
411 			goto out;
412 		}
413 
414 		err = icmp_err_convert[code].errno;
415 		break;
416 	case ICMP_TIME_EXCEEDED:
417 		err = EHOSTUNREACH;
418 		break;
419 	default:
420 		goto out;
421 	}
422 
423 	switch (sk->sk_state) {
424 		struct request_sock *req, **prev;
425 	case TCP_LISTEN:
426 		if (sock_owned_by_user(sk))
427 			goto out;
428 
429 		req = inet_csk_search_req(sk, &prev, th->dest,
430 					  iph->daddr, iph->saddr);
431 		if (!req)
432 			goto out;
433 
434 		/* ICMPs are not backlogged, hence we cannot get
435 		   an established socket here.
436 		 */
437 		BUG_TRAP(!req->sk);
438 
439 		if (seq != tcp_rsk(req)->snt_isn) {
440 			NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
441 			goto out;
442 		}
443 
444 		/*
445 		 * Still in SYN_RECV, just remove it silently.
446 		 * There is no good way to pass the error to the newly
447 		 * created socket, and POSIX does not want network
448 		 * errors returned from accept().
449 		 */
450 		inet_csk_reqsk_queue_drop(sk, req, prev);
451 		goto out;
452 
453 	case TCP_SYN_SENT:
454 	case TCP_SYN_RECV:  /* Cannot happen.
455 			       It can f.e. if SYNs crossed.
456 			     */
457 		if (!sock_owned_by_user(sk)) {
458 			sk->sk_err = err;
459 
460 			sk->sk_error_report(sk);
461 
462 			tcp_done(sk);
463 		} else {
464 			sk->sk_err_soft = err;
465 		}
466 		goto out;
467 	}
468 
469 	/* If we've already connected we will keep trying
470 	 * until we time out, or the user gives up.
471 	 *
472 	 * rfc1122 4.2.3.9 allows to consider as hard errors
473 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
474 	 * but it is obsoleted by pmtu discovery).
475 	 *
476 	 * Note, that in modern internet, where routing is unreliable
477 	 * and in each dark corner broken firewalls sit, sending random
478 	 * errors ordered by their masters even this two messages finally lose
479 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
480 	 *
481 	 * Now we are in compliance with RFCs.
482 	 *							--ANK (980905)
483 	 */
484 
485 	inet = inet_sk(sk);
486 	if (!sock_owned_by_user(sk) && inet->recverr) {
487 		sk->sk_err = err;
488 		sk->sk_error_report(sk);
489 	} else	{ /* Only an error on timeout */
490 		sk->sk_err_soft = err;
491 	}
492 
493 out:
494 	bh_unlock_sock(sk);
495 	sock_put(sk);
496 }
497 
498 /* This routine computes an IPv4 TCP checksum. */
499 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
500 {
501 	struct inet_sock *inet = inet_sk(sk);
502 	struct tcphdr *th = skb->h.th;
503 
504 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
505 		th->check = ~tcp_v4_check(len, inet->saddr,
506 					  inet->daddr, 0);
507 		skb->csum_offset = offsetof(struct tcphdr, check);
508 	} else {
509 		th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
510 					 csum_partial((char *)th,
511 						      th->doff << 2,
512 						      skb->csum));
513 	}
514 }
515 
516 int tcp_v4_gso_send_check(struct sk_buff *skb)
517 {
518 	struct iphdr *iph;
519 	struct tcphdr *th;
520 
521 	if (!pskb_may_pull(skb, sizeof(*th)))
522 		return -EINVAL;
523 
524 	iph = skb->nh.iph;
525 	th = skb->h.th;
526 
527 	th->check = 0;
528 	th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
529 	skb->csum_offset = offsetof(struct tcphdr, check);
530 	skb->ip_summed = CHECKSUM_PARTIAL;
531 	return 0;
532 }
533 
534 /*
535  *	This routine will send an RST to the other tcp.
536  *
537  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
538  *		      for reset.
539  *	Answer: if a packet caused RST, it is not for a socket
540  *		existing in our system, if it is matched to a socket,
541  *		it is just duplicate segment or bug in other side's TCP.
542  *		So that we build reply only basing on parameters
543  *		arrived with segment.
544  *	Exception: precedence violation. We do not implement it in any case.
545  */
546 
547 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
548 {
549 	struct tcphdr *th = skb->h.th;
550 	struct {
551 		struct tcphdr th;
552 #ifdef CONFIG_TCP_MD5SIG
553 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
554 #endif
555 	} rep;
556 	struct ip_reply_arg arg;
557 #ifdef CONFIG_TCP_MD5SIG
558 	struct tcp_md5sig_key *key;
559 #endif
560 
561 	/* Never send a reset in response to a reset. */
562 	if (th->rst)
563 		return;
564 
565 	if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
566 		return;
567 
568 	/* Swap the send and the receive. */
569 	memset(&rep, 0, sizeof(rep));
570 	rep.th.dest   = th->source;
571 	rep.th.source = th->dest;
572 	rep.th.doff   = sizeof(struct tcphdr) / 4;
573 	rep.th.rst    = 1;
574 
575 	if (th->ack) {
576 		rep.th.seq = th->ack_seq;
577 	} else {
578 		rep.th.ack = 1;
579 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
580 				       skb->len - (th->doff << 2));
581 	}
582 
583 	memset(&arg, 0, sizeof(arg));
584 	arg.iov[0].iov_base = (unsigned char *)&rep;
585 	arg.iov[0].iov_len  = sizeof(rep.th);
586 
587 #ifdef CONFIG_TCP_MD5SIG
588 	key = sk ? tcp_v4_md5_do_lookup(sk, skb->nh.iph->daddr) : NULL;
589 	if (key) {
590 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
591 				   (TCPOPT_NOP << 16) |
592 				   (TCPOPT_MD5SIG << 8) |
593 				   TCPOLEN_MD5SIG);
594 		/* Update length and the length the header thinks exists */
595 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
596 		rep.th.doff = arg.iov[0].iov_len / 4;
597 
598 		tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1],
599 					key,
600 					skb->nh.iph->daddr,
601 					skb->nh.iph->saddr,
602 					&rep.th, IPPROTO_TCP,
603 					arg.iov[0].iov_len);
604 	}
605 #endif
606 	arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
607 				      skb->nh.iph->saddr, /* XXX */
608 				      sizeof(struct tcphdr), IPPROTO_TCP, 0);
609 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
610 
611 	ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
612 
613 	TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
614 	TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
615 }
616 
617 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
618    outside socket context is ugly, certainly. What can I do?
619  */
620 
621 static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
622 			    struct sk_buff *skb, u32 seq, u32 ack,
623 			    u32 win, u32 ts)
624 {
625 	struct tcphdr *th = skb->h.th;
626 	struct {
627 		struct tcphdr th;
628 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
629 #ifdef CONFIG_TCP_MD5SIG
630 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
631 #endif
632 			];
633 	} rep;
634 	struct ip_reply_arg arg;
635 #ifdef CONFIG_TCP_MD5SIG
636 	struct tcp_md5sig_key *key;
637 	struct tcp_md5sig_key tw_key;
638 #endif
639 
640 	memset(&rep.th, 0, sizeof(struct tcphdr));
641 	memset(&arg, 0, sizeof(arg));
642 
643 	arg.iov[0].iov_base = (unsigned char *)&rep;
644 	arg.iov[0].iov_len  = sizeof(rep.th);
645 	if (ts) {
646 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
647 				   (TCPOPT_TIMESTAMP << 8) |
648 				   TCPOLEN_TIMESTAMP);
649 		rep.opt[1] = htonl(tcp_time_stamp);
650 		rep.opt[2] = htonl(ts);
651 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
652 	}
653 
654 	/* Swap the send and the receive. */
655 	rep.th.dest    = th->source;
656 	rep.th.source  = th->dest;
657 	rep.th.doff    = arg.iov[0].iov_len / 4;
658 	rep.th.seq     = htonl(seq);
659 	rep.th.ack_seq = htonl(ack);
660 	rep.th.ack     = 1;
661 	rep.th.window  = htons(win);
662 
663 #ifdef CONFIG_TCP_MD5SIG
664 	/*
665 	 * The SKB holds an imcoming packet, but may not have a valid ->sk
666 	 * pointer. This is especially the case when we're dealing with a
667 	 * TIME_WAIT ack, because the sk structure is long gone, and only
668 	 * the tcp_timewait_sock remains. So the md5 key is stashed in that
669 	 * structure, and we use it in preference.  I believe that (twsk ||
670 	 * skb->sk) holds true, but we program defensively.
671 	 */
672 	if (!twsk && skb->sk) {
673 		key = tcp_v4_md5_do_lookup(skb->sk, skb->nh.iph->daddr);
674 	} else if (twsk && twsk->tw_md5_keylen) {
675 		tw_key.key = twsk->tw_md5_key;
676 		tw_key.keylen = twsk->tw_md5_keylen;
677 		key = &tw_key;
678 	} else
679 		key = NULL;
680 
681 	if (key) {
682 		int offset = (ts) ? 3 : 0;
683 
684 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
685 					  (TCPOPT_NOP << 16) |
686 					  (TCPOPT_MD5SIG << 8) |
687 					  TCPOLEN_MD5SIG);
688 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
689 		rep.th.doff = arg.iov[0].iov_len/4;
690 
691 		tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset],
692 					key,
693 					skb->nh.iph->daddr,
694 					skb->nh.iph->saddr,
695 					&rep.th, IPPROTO_TCP,
696 					arg.iov[0].iov_len);
697 	}
698 #endif
699 	arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
700 				      skb->nh.iph->saddr, /* XXX */
701 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
702 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
703 
704 	ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
705 
706 	TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
707 }
708 
709 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
710 {
711 	struct inet_timewait_sock *tw = inet_twsk(sk);
712 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
713 
714 	tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
715 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
716 			tcptw->tw_ts_recent);
717 
718 	inet_twsk_put(tw);
719 }
720 
721 static void tcp_v4_reqsk_send_ack(struct sk_buff *skb,
722 				  struct request_sock *req)
723 {
724 	tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1,
725 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
726 			req->ts_recent);
727 }
728 
729 /*
730  *	Send a SYN-ACK after having received an ACK.
731  *	This still operates on a request_sock only, not on a big
732  *	socket.
733  */
734 static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
735 			      struct dst_entry *dst)
736 {
737 	const struct inet_request_sock *ireq = inet_rsk(req);
738 	int err = -1;
739 	struct sk_buff * skb;
740 
741 	/* First, grab a route. */
742 	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
743 		goto out;
744 
745 	skb = tcp_make_synack(sk, dst, req);
746 
747 	if (skb) {
748 		struct tcphdr *th = skb->h.th;
749 
750 		th->check = tcp_v4_check(skb->len,
751 					 ireq->loc_addr,
752 					 ireq->rmt_addr,
753 					 csum_partial((char *)th, skb->len,
754 						      skb->csum));
755 
756 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
757 					    ireq->rmt_addr,
758 					    ireq->opt);
759 		err = net_xmit_eval(err);
760 	}
761 
762 out:
763 	dst_release(dst);
764 	return err;
765 }
766 
767 /*
768  *	IPv4 request_sock destructor.
769  */
770 static void tcp_v4_reqsk_destructor(struct request_sock *req)
771 {
772 	kfree(inet_rsk(req)->opt);
773 }
774 
775 #ifdef CONFIG_SYN_COOKIES
776 static void syn_flood_warning(struct sk_buff *skb)
777 {
778 	static unsigned long warntime;
779 
780 	if (time_after(jiffies, (warntime + HZ * 60))) {
781 		warntime = jiffies;
782 		printk(KERN_INFO
783 		       "possible SYN flooding on port %d. Sending cookies.\n",
784 		       ntohs(skb->h.th->dest));
785 	}
786 }
787 #endif
788 
789 /*
790  * Save and compile IPv4 options into the request_sock if needed.
791  */
792 static struct ip_options *tcp_v4_save_options(struct sock *sk,
793 					      struct sk_buff *skb)
794 {
795 	struct ip_options *opt = &(IPCB(skb)->opt);
796 	struct ip_options *dopt = NULL;
797 
798 	if (opt && opt->optlen) {
799 		int opt_size = optlength(opt);
800 		dopt = kmalloc(opt_size, GFP_ATOMIC);
801 		if (dopt) {
802 			if (ip_options_echo(dopt, skb)) {
803 				kfree(dopt);
804 				dopt = NULL;
805 			}
806 		}
807 	}
808 	return dopt;
809 }
810 
811 #ifdef CONFIG_TCP_MD5SIG
812 /*
813  * RFC2385 MD5 checksumming requires a mapping of
814  * IP address->MD5 Key.
815  * We need to maintain these in the sk structure.
816  */
817 
818 /* Find the Key structure for an address.  */
819 static struct tcp_md5sig_key *
820 			tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
821 {
822 	struct tcp_sock *tp = tcp_sk(sk);
823 	int i;
824 
825 	if (!tp->md5sig_info || !tp->md5sig_info->entries4)
826 		return NULL;
827 	for (i = 0; i < tp->md5sig_info->entries4; i++) {
828 		if (tp->md5sig_info->keys4[i].addr == addr)
829 			return (struct tcp_md5sig_key *)
830 						&tp->md5sig_info->keys4[i];
831 	}
832 	return NULL;
833 }
834 
835 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
836 					 struct sock *addr_sk)
837 {
838 	return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr);
839 }
840 
841 EXPORT_SYMBOL(tcp_v4_md5_lookup);
842 
843 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
844 						      struct request_sock *req)
845 {
846 	return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
847 }
848 
849 /* This can be called on a newly created socket, from other files */
850 int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
851 		      u8 *newkey, u8 newkeylen)
852 {
853 	/* Add Key to the list */
854 	struct tcp4_md5sig_key *key;
855 	struct tcp_sock *tp = tcp_sk(sk);
856 	struct tcp4_md5sig_key *keys;
857 
858 	key = (struct tcp4_md5sig_key *)tcp_v4_md5_do_lookup(sk, addr);
859 	if (key) {
860 		/* Pre-existing entry - just update that one. */
861 		kfree(key->key);
862 		key->key = newkey;
863 		key->keylen = newkeylen;
864 	} else {
865 		struct tcp_md5sig_info *md5sig;
866 
867 		if (!tp->md5sig_info) {
868 			tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
869 						  GFP_ATOMIC);
870 			if (!tp->md5sig_info) {
871 				kfree(newkey);
872 				return -ENOMEM;
873 			}
874 		}
875 		if (tcp_alloc_md5sig_pool() == NULL) {
876 			kfree(newkey);
877 			return -ENOMEM;
878 		}
879 		md5sig = tp->md5sig_info;
880 
881 		if (md5sig->alloced4 == md5sig->entries4) {
882 			keys = kmalloc((sizeof(*keys) *
883 					(md5sig->entries4 + 1)), GFP_ATOMIC);
884 			if (!keys) {
885 				kfree(newkey);
886 				tcp_free_md5sig_pool();
887 				return -ENOMEM;
888 			}
889 
890 			if (md5sig->entries4)
891 				memcpy(keys, md5sig->keys4,
892 				       sizeof(*keys) * md5sig->entries4);
893 
894 			/* Free old key list, and reference new one */
895 			if (md5sig->keys4)
896 				kfree(md5sig->keys4);
897 			md5sig->keys4 = keys;
898 			md5sig->alloced4++;
899 		}
900 		md5sig->entries4++;
901 		md5sig->keys4[md5sig->entries4 - 1].addr   = addr;
902 		md5sig->keys4[md5sig->entries4 - 1].key    = newkey;
903 		md5sig->keys4[md5sig->entries4 - 1].keylen = newkeylen;
904 	}
905 	return 0;
906 }
907 
908 EXPORT_SYMBOL(tcp_v4_md5_do_add);
909 
910 static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
911 			       u8 *newkey, u8 newkeylen)
912 {
913 	return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr,
914 				 newkey, newkeylen);
915 }
916 
917 int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
918 {
919 	struct tcp_sock *tp = tcp_sk(sk);
920 	int i;
921 
922 	for (i = 0; i < tp->md5sig_info->entries4; i++) {
923 		if (tp->md5sig_info->keys4[i].addr == addr) {
924 			/* Free the key */
925 			kfree(tp->md5sig_info->keys4[i].key);
926 			tp->md5sig_info->entries4--;
927 
928 			if (tp->md5sig_info->entries4 == 0) {
929 				kfree(tp->md5sig_info->keys4);
930 				tp->md5sig_info->keys4 = NULL;
931 				tp->md5sig_info->alloced4 = 0;
932 			} else if (tp->md5sig_info->entries4 != i) {
933 				/* Need to do some manipulation */
934 				memcpy(&tp->md5sig_info->keys4[i],
935 				       &tp->md5sig_info->keys4[i+1],
936 				       (tp->md5sig_info->entries4 - i) *
937 					sizeof(struct tcp4_md5sig_key));
938 			}
939 			tcp_free_md5sig_pool();
940 			return 0;
941 		}
942 	}
943 	return -ENOENT;
944 }
945 
946 EXPORT_SYMBOL(tcp_v4_md5_do_del);
947 
948 static void tcp_v4_clear_md5_list(struct sock *sk)
949 {
950 	struct tcp_sock *tp = tcp_sk(sk);
951 
952 	/* Free each key, then the set of key keys,
953 	 * the crypto element, and then decrement our
954 	 * hold on the last resort crypto.
955 	 */
956 	if (tp->md5sig_info->entries4) {
957 		int i;
958 		for (i = 0; i < tp->md5sig_info->entries4; i++)
959 			kfree(tp->md5sig_info->keys4[i].key);
960 		tp->md5sig_info->entries4 = 0;
961 		tcp_free_md5sig_pool();
962 	}
963 	if (tp->md5sig_info->keys4) {
964 		kfree(tp->md5sig_info->keys4);
965 		tp->md5sig_info->keys4 = NULL;
966 		tp->md5sig_info->alloced4  = 0;
967 	}
968 }
969 
970 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
971 				 int optlen)
972 {
973 	struct tcp_md5sig cmd;
974 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
975 	u8 *newkey;
976 
977 	if (optlen < sizeof(cmd))
978 		return -EINVAL;
979 
980 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
981 		return -EFAULT;
982 
983 	if (sin->sin_family != AF_INET)
984 		return -EINVAL;
985 
986 	if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
987 		if (!tcp_sk(sk)->md5sig_info)
988 			return -ENOENT;
989 		return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
990 	}
991 
992 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
993 		return -EINVAL;
994 
995 	if (!tcp_sk(sk)->md5sig_info) {
996 		struct tcp_sock *tp = tcp_sk(sk);
997 		struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL);
998 
999 		if (!p)
1000 			return -EINVAL;
1001 
1002 		tp->md5sig_info = p;
1003 
1004 	}
1005 
1006 	newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
1007 	if (!newkey)
1008 		return -ENOMEM;
1009 	return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1010 				 newkey, cmd.tcpm_keylen);
1011 }
1012 
1013 static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1014 				   __be32 saddr, __be32 daddr,
1015 				   struct tcphdr *th, int protocol,
1016 				   int tcplen)
1017 {
1018 	struct scatterlist sg[4];
1019 	__u16 data_len;
1020 	int block = 0;
1021 	__sum16 old_checksum;
1022 	struct tcp_md5sig_pool *hp;
1023 	struct tcp4_pseudohdr *bp;
1024 	struct hash_desc *desc;
1025 	int err;
1026 	unsigned int nbytes = 0;
1027 
1028 	/*
1029 	 * Okay, so RFC2385 is turned on for this connection,
1030 	 * so we need to generate the MD5 hash for the packet now.
1031 	 */
1032 
1033 	hp = tcp_get_md5sig_pool();
1034 	if (!hp)
1035 		goto clear_hash_noput;
1036 
1037 	bp = &hp->md5_blk.ip4;
1038 	desc = &hp->md5_desc;
1039 
1040 	/*
1041 	 * 1. the TCP pseudo-header (in the order: source IP address,
1042 	 * destination IP address, zero-padded protocol number, and
1043 	 * segment length)
1044 	 */
1045 	bp->saddr = saddr;
1046 	bp->daddr = daddr;
1047 	bp->pad = 0;
1048 	bp->protocol = protocol;
1049 	bp->len = htons(tcplen);
1050 	sg_set_buf(&sg[block++], bp, sizeof(*bp));
1051 	nbytes += sizeof(*bp);
1052 
1053 	/* 2. the TCP header, excluding options, and assuming a
1054 	 * checksum of zero/
1055 	 */
1056 	old_checksum = th->check;
1057 	th->check = 0;
1058 	sg_set_buf(&sg[block++], th, sizeof(struct tcphdr));
1059 	nbytes += sizeof(struct tcphdr);
1060 
1061 	/* 3. the TCP segment data (if any) */
1062 	data_len = tcplen - (th->doff << 2);
1063 	if (data_len > 0) {
1064 		unsigned char *data = (unsigned char *)th + (th->doff << 2);
1065 		sg_set_buf(&sg[block++], data, data_len);
1066 		nbytes += data_len;
1067 	}
1068 
1069 	/* 4. an independently-specified key or password, known to both
1070 	 * TCPs and presumably connection-specific
1071 	 */
1072 	sg_set_buf(&sg[block++], key->key, key->keylen);
1073 	nbytes += key->keylen;
1074 
1075 	/* Now store the Hash into the packet */
1076 	err = crypto_hash_init(desc);
1077 	if (err)
1078 		goto clear_hash;
1079 	err = crypto_hash_update(desc, sg, nbytes);
1080 	if (err)
1081 		goto clear_hash;
1082 	err = crypto_hash_final(desc, md5_hash);
1083 	if (err)
1084 		goto clear_hash;
1085 
1086 	/* Reset header, and free up the crypto */
1087 	tcp_put_md5sig_pool();
1088 	th->check = old_checksum;
1089 
1090 out:
1091 	return 0;
1092 clear_hash:
1093 	tcp_put_md5sig_pool();
1094 clear_hash_noput:
1095 	memset(md5_hash, 0, 16);
1096 	goto out;
1097 }
1098 
1099 int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1100 			 struct sock *sk,
1101 			 struct dst_entry *dst,
1102 			 struct request_sock *req,
1103 			 struct tcphdr *th, int protocol,
1104 			 int tcplen)
1105 {
1106 	__be32 saddr, daddr;
1107 
1108 	if (sk) {
1109 		saddr = inet_sk(sk)->saddr;
1110 		daddr = inet_sk(sk)->daddr;
1111 	} else {
1112 		struct rtable *rt = (struct rtable *)dst;
1113 		BUG_ON(!rt);
1114 		saddr = rt->rt_src;
1115 		daddr = rt->rt_dst;
1116 	}
1117 	return tcp_v4_do_calc_md5_hash(md5_hash, key,
1118 				       saddr, daddr,
1119 				       th, protocol, tcplen);
1120 }
1121 
1122 EXPORT_SYMBOL(tcp_v4_calc_md5_hash);
1123 
1124 static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1125 {
1126 	/*
1127 	 * This gets called for each TCP segment that arrives
1128 	 * so we want to be efficient.
1129 	 * We have 3 drop cases:
1130 	 * o No MD5 hash and one expected.
1131 	 * o MD5 hash and we're not expecting one.
1132 	 * o MD5 hash and its wrong.
1133 	 */
1134 	__u8 *hash_location = NULL;
1135 	struct tcp_md5sig_key *hash_expected;
1136 	struct iphdr *iph = skb->nh.iph;
1137 	struct tcphdr *th = skb->h.th;
1138 	int length = (th->doff << 2) - sizeof(struct tcphdr);
1139 	int genhash;
1140 	unsigned char *ptr;
1141 	unsigned char newhash[16];
1142 
1143 	hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1144 
1145 	/*
1146 	 * If the TCP option length is less than the TCP_MD5SIG
1147 	 * option length, then we can shortcut
1148 	 */
1149 	if (length < TCPOLEN_MD5SIG) {
1150 		if (hash_expected)
1151 			return 1;
1152 		else
1153 			return 0;
1154 	}
1155 
1156 	/* Okay, we can't shortcut - we have to grub through the options */
1157 	ptr = (unsigned char *)(th + 1);
1158 	while (length > 0) {
1159 		int opcode = *ptr++;
1160 		int opsize;
1161 
1162 		switch (opcode) {
1163 		case TCPOPT_EOL:
1164 			goto done_opts;
1165 		case TCPOPT_NOP:
1166 			length--;
1167 			continue;
1168 		default:
1169 			opsize = *ptr++;
1170 			if (opsize < 2)
1171 				goto done_opts;
1172 			if (opsize > length)
1173 				goto done_opts;
1174 
1175 			if (opcode == TCPOPT_MD5SIG) {
1176 				hash_location = ptr;
1177 				goto done_opts;
1178 			}
1179 		}
1180 		ptr += opsize-2;
1181 		length -= opsize;
1182 	}
1183 done_opts:
1184 	/* We've parsed the options - do we have a hash? */
1185 	if (!hash_expected && !hash_location)
1186 		return 0;
1187 
1188 	if (hash_expected && !hash_location) {
1189 		LIMIT_NETDEBUG(KERN_INFO "MD5 Hash expected but NOT found "
1190 			       "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
1191 			       NIPQUAD(iph->saddr), ntohs(th->source),
1192 			       NIPQUAD(iph->daddr), ntohs(th->dest));
1193 		return 1;
1194 	}
1195 
1196 	if (!hash_expected && hash_location) {
1197 		LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found "
1198 			       "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
1199 			       NIPQUAD(iph->saddr), ntohs(th->source),
1200 			       NIPQUAD(iph->daddr), ntohs(th->dest));
1201 		return 1;
1202 	}
1203 
1204 	/* Okay, so this is hash_expected and hash_location -
1205 	 * so we need to calculate the checksum.
1206 	 */
1207 	genhash = tcp_v4_do_calc_md5_hash(newhash,
1208 					  hash_expected,
1209 					  iph->saddr, iph->daddr,
1210 					  th, sk->sk_protocol,
1211 					  skb->len);
1212 
1213 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1214 		if (net_ratelimit()) {
1215 			printk(KERN_INFO "MD5 Hash failed for "
1216 			       "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n",
1217 			       NIPQUAD(iph->saddr), ntohs(th->source),
1218 			       NIPQUAD(iph->daddr), ntohs(th->dest),
1219 			       genhash ? " tcp_v4_calc_md5_hash failed" : "");
1220 		}
1221 		return 1;
1222 	}
1223 	return 0;
1224 }
1225 
1226 #endif
1227 
1228 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1229 	.family		=	PF_INET,
1230 	.obj_size	=	sizeof(struct tcp_request_sock),
1231 	.rtx_syn_ack	=	tcp_v4_send_synack,
1232 	.send_ack	=	tcp_v4_reqsk_send_ack,
1233 	.destructor	=	tcp_v4_reqsk_destructor,
1234 	.send_reset	=	tcp_v4_send_reset,
1235 };
1236 
1237 #ifdef CONFIG_TCP_MD5SIG
1238 static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1239 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1240 };
1241 #endif
1242 
1243 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1244 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1245 	.twsk_unique	= tcp_twsk_unique,
1246 	.twsk_destructor= tcp_twsk_destructor,
1247 };
1248 
1249 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1250 {
1251 	struct inet_request_sock *ireq;
1252 	struct tcp_options_received tmp_opt;
1253 	struct request_sock *req;
1254 	__be32 saddr = skb->nh.iph->saddr;
1255 	__be32 daddr = skb->nh.iph->daddr;
1256 	__u32 isn = TCP_SKB_CB(skb)->when;
1257 	struct dst_entry *dst = NULL;
1258 #ifdef CONFIG_SYN_COOKIES
1259 	int want_cookie = 0;
1260 #else
1261 #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1262 #endif
1263 
1264 	/* Never answer to SYNs send to broadcast or multicast */
1265 	if (((struct rtable *)skb->dst)->rt_flags &
1266 	    (RTCF_BROADCAST | RTCF_MULTICAST))
1267 		goto drop;
1268 
1269 	/* TW buckets are converted to open requests without
1270 	 * limitations, they conserve resources and peer is
1271 	 * evidently real one.
1272 	 */
1273 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1274 #ifdef CONFIG_SYN_COOKIES
1275 		if (sysctl_tcp_syncookies) {
1276 			want_cookie = 1;
1277 		} else
1278 #endif
1279 		goto drop;
1280 	}
1281 
1282 	/* Accept backlog is full. If we have already queued enough
1283 	 * of warm entries in syn queue, drop request. It is better than
1284 	 * clogging syn queue with openreqs with exponentially increasing
1285 	 * timeout.
1286 	 */
1287 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1288 		goto drop;
1289 
1290 	req = reqsk_alloc(&tcp_request_sock_ops);
1291 	if (!req)
1292 		goto drop;
1293 
1294 #ifdef CONFIG_TCP_MD5SIG
1295 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1296 #endif
1297 
1298 	tcp_clear_options(&tmp_opt);
1299 	tmp_opt.mss_clamp = 536;
1300 	tmp_opt.user_mss  = tcp_sk(sk)->rx_opt.user_mss;
1301 
1302 	tcp_parse_options(skb, &tmp_opt, 0);
1303 
1304 	if (want_cookie) {
1305 		tcp_clear_options(&tmp_opt);
1306 		tmp_opt.saw_tstamp = 0;
1307 	}
1308 
1309 	if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
1310 		/* Some OSes (unknown ones, but I see them on web server, which
1311 		 * contains information interesting only for windows'
1312 		 * users) do not send their stamp in SYN. It is easy case.
1313 		 * We simply do not advertise TS support.
1314 		 */
1315 		tmp_opt.saw_tstamp = 0;
1316 		tmp_opt.tstamp_ok  = 0;
1317 	}
1318 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1319 
1320 	tcp_openreq_init(req, &tmp_opt, skb);
1321 
1322 	if (security_inet_conn_request(sk, skb, req))
1323 		goto drop_and_free;
1324 
1325 	ireq = inet_rsk(req);
1326 	ireq->loc_addr = daddr;
1327 	ireq->rmt_addr = saddr;
1328 	ireq->opt = tcp_v4_save_options(sk, skb);
1329 	if (!want_cookie)
1330 		TCP_ECN_create_request(req, skb->h.th);
1331 
1332 	if (want_cookie) {
1333 #ifdef CONFIG_SYN_COOKIES
1334 		syn_flood_warning(skb);
1335 #endif
1336 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1337 	} else if (!isn) {
1338 		struct inet_peer *peer = NULL;
1339 
1340 		/* VJ's idea. We save last timestamp seen
1341 		 * from the destination in peer table, when entering
1342 		 * state TIME-WAIT, and check against it before
1343 		 * accepting new connection request.
1344 		 *
1345 		 * If "isn" is not zero, this request hit alive
1346 		 * timewait bucket, so that all the necessary checks
1347 		 * are made in the function processing timewait state.
1348 		 */
1349 		if (tmp_opt.saw_tstamp &&
1350 		    tcp_death_row.sysctl_tw_recycle &&
1351 		    (dst = inet_csk_route_req(sk, req)) != NULL &&
1352 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1353 		    peer->v4daddr == saddr) {
1354 			if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1355 			    (s32)(peer->tcp_ts - req->ts_recent) >
1356 							TCP_PAWS_WINDOW) {
1357 				NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
1358 				dst_release(dst);
1359 				goto drop_and_free;
1360 			}
1361 		}
1362 		/* Kill the following clause, if you dislike this way. */
1363 		else if (!sysctl_tcp_syncookies &&
1364 			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1365 			  (sysctl_max_syn_backlog >> 2)) &&
1366 			 (!peer || !peer->tcp_ts_stamp) &&
1367 			 (!dst || !dst_metric(dst, RTAX_RTT))) {
1368 			/* Without syncookies last quarter of
1369 			 * backlog is filled with destinations,
1370 			 * proven to be alive.
1371 			 * It means that we continue to communicate
1372 			 * to destinations, already remembered
1373 			 * to the moment of synflood.
1374 			 */
1375 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
1376 				       "request from %u.%u.%u.%u/%u\n",
1377 				       NIPQUAD(saddr),
1378 				       ntohs(skb->h.th->source));
1379 			dst_release(dst);
1380 			goto drop_and_free;
1381 		}
1382 
1383 		isn = tcp_v4_init_sequence(skb);
1384 	}
1385 	tcp_rsk(req)->snt_isn = isn;
1386 
1387 	if (tcp_v4_send_synack(sk, req, dst))
1388 		goto drop_and_free;
1389 
1390 	if (want_cookie) {
1391 		reqsk_free(req);
1392 	} else {
1393 		inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1394 	}
1395 	return 0;
1396 
1397 drop_and_free:
1398 	reqsk_free(req);
1399 drop:
1400 	return 0;
1401 }
1402 
1403 
1404 /*
1405  * The three way handshake has completed - we got a valid synack -
1406  * now create the new socket.
1407  */
1408 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1409 				  struct request_sock *req,
1410 				  struct dst_entry *dst)
1411 {
1412 	struct inet_request_sock *ireq;
1413 	struct inet_sock *newinet;
1414 	struct tcp_sock *newtp;
1415 	struct sock *newsk;
1416 #ifdef CONFIG_TCP_MD5SIG
1417 	struct tcp_md5sig_key *key;
1418 #endif
1419 
1420 	if (sk_acceptq_is_full(sk))
1421 		goto exit_overflow;
1422 
1423 	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1424 		goto exit;
1425 
1426 	newsk = tcp_create_openreq_child(sk, req, skb);
1427 	if (!newsk)
1428 		goto exit;
1429 
1430 	newsk->sk_gso_type = SKB_GSO_TCPV4;
1431 	sk_setup_caps(newsk, dst);
1432 
1433 	newtp		      = tcp_sk(newsk);
1434 	newinet		      = inet_sk(newsk);
1435 	ireq		      = inet_rsk(req);
1436 	newinet->daddr	      = ireq->rmt_addr;
1437 	newinet->rcv_saddr    = ireq->loc_addr;
1438 	newinet->saddr	      = ireq->loc_addr;
1439 	newinet->opt	      = ireq->opt;
1440 	ireq->opt	      = NULL;
1441 	newinet->mc_index     = inet_iif(skb);
1442 	newinet->mc_ttl	      = skb->nh.iph->ttl;
1443 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1444 	if (newinet->opt)
1445 		inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1446 	newinet->id = newtp->write_seq ^ jiffies;
1447 
1448 	tcp_mtup_init(newsk);
1449 	tcp_sync_mss(newsk, dst_mtu(dst));
1450 	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1451 	tcp_initialize_rcv_mss(newsk);
1452 
1453 #ifdef CONFIG_TCP_MD5SIG
1454 	/* Copy over the MD5 key from the original socket */
1455 	if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
1456 		/*
1457 		 * We're using one, so create a matching key
1458 		 * on the newsk structure. If we fail to get
1459 		 * memory, then we end up not copying the key
1460 		 * across. Shucks.
1461 		 */
1462 		char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1463 		if (newkey != NULL)
1464 			tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
1465 					  newkey, key->keylen);
1466 	}
1467 #endif
1468 
1469 	__inet_hash(&tcp_hashinfo, newsk, 0);
1470 	__inet_inherit_port(&tcp_hashinfo, sk, newsk);
1471 
1472 	return newsk;
1473 
1474 exit_overflow:
1475 	NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1476 exit:
1477 	NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1478 	dst_release(dst);
1479 	return NULL;
1480 }
1481 
1482 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1483 {
1484 	struct tcphdr *th = skb->h.th;
1485 	struct iphdr *iph = skb->nh.iph;
1486 	struct sock *nsk;
1487 	struct request_sock **prev;
1488 	/* Find possible connection requests. */
1489 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1490 						       iph->saddr, iph->daddr);
1491 	if (req)
1492 		return tcp_check_req(sk, skb, req, prev);
1493 
1494 	nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
1495 				      th->source, skb->nh.iph->daddr,
1496 				      th->dest, inet_iif(skb));
1497 
1498 	if (nsk) {
1499 		if (nsk->sk_state != TCP_TIME_WAIT) {
1500 			bh_lock_sock(nsk);
1501 			return nsk;
1502 		}
1503 		inet_twsk_put(inet_twsk(nsk));
1504 		return NULL;
1505 	}
1506 
1507 #ifdef CONFIG_SYN_COOKIES
1508 	if (!th->rst && !th->syn && th->ack)
1509 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1510 #endif
1511 	return sk;
1512 }
1513 
1514 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1515 {
1516 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
1517 		if (!tcp_v4_check(skb->len, skb->nh.iph->saddr,
1518 				  skb->nh.iph->daddr, skb->csum)) {
1519 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1520 			return 0;
1521 		}
1522 	}
1523 
1524 	skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, skb->nh.iph->daddr,
1525 				       skb->len, IPPROTO_TCP, 0);
1526 
1527 	if (skb->len <= 76) {
1528 		return __skb_checksum_complete(skb);
1529 	}
1530 	return 0;
1531 }
1532 
1533 
1534 /* The socket must have it's spinlock held when we get
1535  * here.
1536  *
1537  * We have a potential double-lock case here, so even when
1538  * doing backlog processing we use the BH locking scheme.
1539  * This is because we cannot sleep with the original spinlock
1540  * held.
1541  */
1542 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1543 {
1544 	struct sock *rsk;
1545 #ifdef CONFIG_TCP_MD5SIG
1546 	/*
1547 	 * We really want to reject the packet as early as possible
1548 	 * if:
1549 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1550 	 *  o There is an MD5 option and we're not expecting one
1551 	 */
1552 	if (tcp_v4_inbound_md5_hash(sk, skb))
1553 		goto discard;
1554 #endif
1555 
1556 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1557 		TCP_CHECK_TIMER(sk);
1558 		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) {
1559 			rsk = sk;
1560 			goto reset;
1561 		}
1562 		TCP_CHECK_TIMER(sk);
1563 		return 0;
1564 	}
1565 
1566 	if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
1567 		goto csum_err;
1568 
1569 	if (sk->sk_state == TCP_LISTEN) {
1570 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1571 		if (!nsk)
1572 			goto discard;
1573 
1574 		if (nsk != sk) {
1575 			if (tcp_child_process(sk, nsk, skb)) {
1576 				rsk = nsk;
1577 				goto reset;
1578 			}
1579 			return 0;
1580 		}
1581 	}
1582 
1583 	TCP_CHECK_TIMER(sk);
1584 	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) {
1585 		rsk = sk;
1586 		goto reset;
1587 	}
1588 	TCP_CHECK_TIMER(sk);
1589 	return 0;
1590 
1591 reset:
1592 	tcp_v4_send_reset(rsk, skb);
1593 discard:
1594 	kfree_skb(skb);
1595 	/* Be careful here. If this function gets more complicated and
1596 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
1597 	 * might be destroyed here. This current version compiles correctly,
1598 	 * but you have been warned.
1599 	 */
1600 	return 0;
1601 
1602 csum_err:
1603 	TCP_INC_STATS_BH(TCP_MIB_INERRS);
1604 	goto discard;
1605 }
1606 
1607 /*
1608  *	From tcp_input.c
1609  */
1610 
1611 int tcp_v4_rcv(struct sk_buff *skb)
1612 {
1613 	struct tcphdr *th;
1614 	struct sock *sk;
1615 	int ret;
1616 
1617 	if (skb->pkt_type != PACKET_HOST)
1618 		goto discard_it;
1619 
1620 	/* Count it even if it's bad */
1621 	TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1622 
1623 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1624 		goto discard_it;
1625 
1626 	th = skb->h.th;
1627 
1628 	if (th->doff < sizeof(struct tcphdr) / 4)
1629 		goto bad_packet;
1630 	if (!pskb_may_pull(skb, th->doff * 4))
1631 		goto discard_it;
1632 
1633 	/* An explanation is required here, I think.
1634 	 * Packet length and doff are validated by header prediction,
1635 	 * provided case of th->doff==0 is eliminated.
1636 	 * So, we defer the checks. */
1637 	if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1638 	     tcp_v4_checksum_init(skb)))
1639 		goto bad_packet;
1640 
1641 	th = skb->h.th;
1642 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1643 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1644 				    skb->len - th->doff * 4);
1645 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1646 	TCP_SKB_CB(skb)->when	 = 0;
1647 	TCP_SKB_CB(skb)->flags	 = skb->nh.iph->tos;
1648 	TCP_SKB_CB(skb)->sacked	 = 0;
1649 
1650 	sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
1651 			   skb->nh.iph->daddr, th->dest,
1652 			   inet_iif(skb));
1653 
1654 	if (!sk)
1655 		goto no_tcp_socket;
1656 
1657 process:
1658 	if (sk->sk_state == TCP_TIME_WAIT)
1659 		goto do_time_wait;
1660 
1661 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1662 		goto discard_and_relse;
1663 	nf_reset(skb);
1664 
1665 	if (sk_filter(sk, skb))
1666 		goto discard_and_relse;
1667 
1668 	skb->dev = NULL;
1669 
1670 	bh_lock_sock_nested(sk);
1671 	ret = 0;
1672 	if (!sock_owned_by_user(sk)) {
1673 #ifdef CONFIG_NET_DMA
1674 		struct tcp_sock *tp = tcp_sk(sk);
1675 		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1676 			tp->ucopy.dma_chan = get_softnet_dma();
1677 		if (tp->ucopy.dma_chan)
1678 			ret = tcp_v4_do_rcv(sk, skb);
1679 		else
1680 #endif
1681 		{
1682 			if (!tcp_prequeue(sk, skb))
1683 			ret = tcp_v4_do_rcv(sk, skb);
1684 		}
1685 	} else
1686 		sk_add_backlog(sk, skb);
1687 	bh_unlock_sock(sk);
1688 
1689 	sock_put(sk);
1690 
1691 	return ret;
1692 
1693 no_tcp_socket:
1694 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1695 		goto discard_it;
1696 
1697 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1698 bad_packet:
1699 		TCP_INC_STATS_BH(TCP_MIB_INERRS);
1700 	} else {
1701 		tcp_v4_send_reset(NULL, skb);
1702 	}
1703 
1704 discard_it:
1705 	/* Discard frame. */
1706 	kfree_skb(skb);
1707 	return 0;
1708 
1709 discard_and_relse:
1710 	sock_put(sk);
1711 	goto discard_it;
1712 
1713 do_time_wait:
1714 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1715 		inet_twsk_put(inet_twsk(sk));
1716 		goto discard_it;
1717 	}
1718 
1719 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1720 		TCP_INC_STATS_BH(TCP_MIB_INERRS);
1721 		inet_twsk_put(inet_twsk(sk));
1722 		goto discard_it;
1723 	}
1724 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1725 	case TCP_TW_SYN: {
1726 		struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
1727 							skb->nh.iph->daddr,
1728 							th->dest,
1729 							inet_iif(skb));
1730 		if (sk2) {
1731 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1732 			inet_twsk_put(inet_twsk(sk));
1733 			sk = sk2;
1734 			goto process;
1735 		}
1736 		/* Fall through to ACK */
1737 	}
1738 	case TCP_TW_ACK:
1739 		tcp_v4_timewait_ack(sk, skb);
1740 		break;
1741 	case TCP_TW_RST:
1742 		goto no_tcp_socket;
1743 	case TCP_TW_SUCCESS:;
1744 	}
1745 	goto discard_it;
1746 }
1747 
1748 /* VJ's idea. Save last timestamp seen from this destination
1749  * and hold it at least for normal timewait interval to use for duplicate
1750  * segment detection in subsequent connections, before they enter synchronized
1751  * state.
1752  */
1753 
1754 int tcp_v4_remember_stamp(struct sock *sk)
1755 {
1756 	struct inet_sock *inet = inet_sk(sk);
1757 	struct tcp_sock *tp = tcp_sk(sk);
1758 	struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1759 	struct inet_peer *peer = NULL;
1760 	int release_it = 0;
1761 
1762 	if (!rt || rt->rt_dst != inet->daddr) {
1763 		peer = inet_getpeer(inet->daddr, 1);
1764 		release_it = 1;
1765 	} else {
1766 		if (!rt->peer)
1767 			rt_bind_peer(rt, 1);
1768 		peer = rt->peer;
1769 	}
1770 
1771 	if (peer) {
1772 		if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1773 		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
1774 		     peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1775 			peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1776 			peer->tcp_ts = tp->rx_opt.ts_recent;
1777 		}
1778 		if (release_it)
1779 			inet_putpeer(peer);
1780 		return 1;
1781 	}
1782 
1783 	return 0;
1784 }
1785 
1786 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1787 {
1788 	struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1789 
1790 	if (peer) {
1791 		const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1792 
1793 		if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1794 		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
1795 		     peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
1796 			peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
1797 			peer->tcp_ts	   = tcptw->tw_ts_recent;
1798 		}
1799 		inet_putpeer(peer);
1800 		return 1;
1801 	}
1802 
1803 	return 0;
1804 }
1805 
1806 struct inet_connection_sock_af_ops ipv4_specific = {
1807 	.queue_xmit	   = ip_queue_xmit,
1808 	.send_check	   = tcp_v4_send_check,
1809 	.rebuild_header	   = inet_sk_rebuild_header,
1810 	.conn_request	   = tcp_v4_conn_request,
1811 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
1812 	.remember_stamp	   = tcp_v4_remember_stamp,
1813 	.net_header_len	   = sizeof(struct iphdr),
1814 	.setsockopt	   = ip_setsockopt,
1815 	.getsockopt	   = ip_getsockopt,
1816 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1817 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1818 #ifdef CONFIG_COMPAT
1819 	.compat_setsockopt = compat_ip_setsockopt,
1820 	.compat_getsockopt = compat_ip_getsockopt,
1821 #endif
1822 };
1823 
1824 #ifdef CONFIG_TCP_MD5SIG
1825 static struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1826 	.md5_lookup		= tcp_v4_md5_lookup,
1827 	.calc_md5_hash		= tcp_v4_calc_md5_hash,
1828 	.md5_add		= tcp_v4_md5_add_func,
1829 	.md5_parse		= tcp_v4_parse_md5_keys,
1830 };
1831 #endif
1832 
1833 /* NOTE: A lot of things set to zero explicitly by call to
1834  *       sk_alloc() so need not be done here.
1835  */
1836 static int tcp_v4_init_sock(struct sock *sk)
1837 {
1838 	struct inet_connection_sock *icsk = inet_csk(sk);
1839 	struct tcp_sock *tp = tcp_sk(sk);
1840 
1841 	skb_queue_head_init(&tp->out_of_order_queue);
1842 	tcp_init_xmit_timers(sk);
1843 	tcp_prequeue_init(tp);
1844 
1845 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
1846 	tp->mdev = TCP_TIMEOUT_INIT;
1847 
1848 	/* So many TCP implementations out there (incorrectly) count the
1849 	 * initial SYN frame in their delayed-ACK and congestion control
1850 	 * algorithms that we must have the following bandaid to talk
1851 	 * efficiently to them.  -DaveM
1852 	 */
1853 	tp->snd_cwnd = 2;
1854 
1855 	/* See draft-stevens-tcpca-spec-01 for discussion of the
1856 	 * initialization of these values.
1857 	 */
1858 	tp->snd_ssthresh = 0x7fffffff;	/* Infinity */
1859 	tp->snd_cwnd_clamp = ~0;
1860 	tp->mss_cache = 536;
1861 
1862 	tp->reordering = sysctl_tcp_reordering;
1863 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1864 
1865 	sk->sk_state = TCP_CLOSE;
1866 
1867 	sk->sk_write_space = sk_stream_write_space;
1868 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1869 
1870 	icsk->icsk_af_ops = &ipv4_specific;
1871 	icsk->icsk_sync_mss = tcp_sync_mss;
1872 #ifdef CONFIG_TCP_MD5SIG
1873 	tp->af_specific = &tcp_sock_ipv4_specific;
1874 #endif
1875 
1876 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
1877 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1878 
1879 	atomic_inc(&tcp_sockets_allocated);
1880 
1881 	return 0;
1882 }
1883 
1884 int tcp_v4_destroy_sock(struct sock *sk)
1885 {
1886 	struct tcp_sock *tp = tcp_sk(sk);
1887 
1888 	tcp_clear_xmit_timers(sk);
1889 
1890 	tcp_cleanup_congestion_control(sk);
1891 
1892 	/* Cleanup up the write buffer. */
1893 	sk_stream_writequeue_purge(sk);
1894 
1895 	/* Cleans up our, hopefully empty, out_of_order_queue. */
1896 	__skb_queue_purge(&tp->out_of_order_queue);
1897 
1898 #ifdef CONFIG_TCP_MD5SIG
1899 	/* Clean up the MD5 key list, if any */
1900 	if (tp->md5sig_info) {
1901 		tcp_v4_clear_md5_list(sk);
1902 		kfree(tp->md5sig_info);
1903 		tp->md5sig_info = NULL;
1904 	}
1905 #endif
1906 
1907 #ifdef CONFIG_NET_DMA
1908 	/* Cleans up our sk_async_wait_queue */
1909 	__skb_queue_purge(&sk->sk_async_wait_queue);
1910 #endif
1911 
1912 	/* Clean prequeue, it must be empty really */
1913 	__skb_queue_purge(&tp->ucopy.prequeue);
1914 
1915 	/* Clean up a referenced TCP bind bucket. */
1916 	if (inet_csk(sk)->icsk_bind_hash)
1917 		inet_put_port(&tcp_hashinfo, sk);
1918 
1919 	/*
1920 	 * If sendmsg cached page exists, toss it.
1921 	 */
1922 	if (sk->sk_sndmsg_page) {
1923 		__free_page(sk->sk_sndmsg_page);
1924 		sk->sk_sndmsg_page = NULL;
1925 	}
1926 
1927 	atomic_dec(&tcp_sockets_allocated);
1928 
1929 	return 0;
1930 }
1931 
1932 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1933 
1934 #ifdef CONFIG_PROC_FS
1935 /* Proc filesystem TCP sock list dumping. */
1936 
1937 static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
1938 {
1939 	return hlist_empty(head) ? NULL :
1940 		list_entry(head->first, struct inet_timewait_sock, tw_node);
1941 }
1942 
1943 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1944 {
1945 	return tw->tw_node.next ?
1946 		hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1947 }
1948 
1949 static void *listening_get_next(struct seq_file *seq, void *cur)
1950 {
1951 	struct inet_connection_sock *icsk;
1952 	struct hlist_node *node;
1953 	struct sock *sk = cur;
1954 	struct tcp_iter_state* st = seq->private;
1955 
1956 	if (!sk) {
1957 		st->bucket = 0;
1958 		sk = sk_head(&tcp_hashinfo.listening_hash[0]);
1959 		goto get_sk;
1960 	}
1961 
1962 	++st->num;
1963 
1964 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
1965 		struct request_sock *req = cur;
1966 
1967 		icsk = inet_csk(st->syn_wait_sk);
1968 		req = req->dl_next;
1969 		while (1) {
1970 			while (req) {
1971 				if (req->rsk_ops->family == st->family) {
1972 					cur = req;
1973 					goto out;
1974 				}
1975 				req = req->dl_next;
1976 			}
1977 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1978 				break;
1979 get_req:
1980 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1981 		}
1982 		sk	  = sk_next(st->syn_wait_sk);
1983 		st->state = TCP_SEQ_STATE_LISTENING;
1984 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1985 	} else {
1986 		icsk = inet_csk(sk);
1987 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1988 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
1989 			goto start_req;
1990 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1991 		sk = sk_next(sk);
1992 	}
1993 get_sk:
1994 	sk_for_each_from(sk, node) {
1995 		if (sk->sk_family == st->family) {
1996 			cur = sk;
1997 			goto out;
1998 		}
1999 		icsk = inet_csk(sk);
2000 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2001 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2002 start_req:
2003 			st->uid		= sock_i_uid(sk);
2004 			st->syn_wait_sk = sk;
2005 			st->state	= TCP_SEQ_STATE_OPENREQ;
2006 			st->sbucket	= 0;
2007 			goto get_req;
2008 		}
2009 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2010 	}
2011 	if (++st->bucket < INET_LHTABLE_SIZE) {
2012 		sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
2013 		goto get_sk;
2014 	}
2015 	cur = NULL;
2016 out:
2017 	return cur;
2018 }
2019 
2020 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2021 {
2022 	void *rc = listening_get_next(seq, NULL);
2023 
2024 	while (rc && *pos) {
2025 		rc = listening_get_next(seq, rc);
2026 		--*pos;
2027 	}
2028 	return rc;
2029 }
2030 
2031 static void *established_get_first(struct seq_file *seq)
2032 {
2033 	struct tcp_iter_state* st = seq->private;
2034 	void *rc = NULL;
2035 
2036 	for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
2037 		struct sock *sk;
2038 		struct hlist_node *node;
2039 		struct inet_timewait_sock *tw;
2040 
2041 		/* We can reschedule _before_ having picked the target: */
2042 		cond_resched_softirq();
2043 
2044 		read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
2045 		sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2046 			if (sk->sk_family != st->family) {
2047 				continue;
2048 			}
2049 			rc = sk;
2050 			goto out;
2051 		}
2052 		st->state = TCP_SEQ_STATE_TIME_WAIT;
2053 		inet_twsk_for_each(tw, node,
2054 				   &tcp_hashinfo.ehash[st->bucket].twchain) {
2055 			if (tw->tw_family != st->family) {
2056 				continue;
2057 			}
2058 			rc = tw;
2059 			goto out;
2060 		}
2061 		read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
2062 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2063 	}
2064 out:
2065 	return rc;
2066 }
2067 
2068 static void *established_get_next(struct seq_file *seq, void *cur)
2069 {
2070 	struct sock *sk = cur;
2071 	struct inet_timewait_sock *tw;
2072 	struct hlist_node *node;
2073 	struct tcp_iter_state* st = seq->private;
2074 
2075 	++st->num;
2076 
2077 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2078 		tw = cur;
2079 		tw = tw_next(tw);
2080 get_tw:
2081 		while (tw && tw->tw_family != st->family) {
2082 			tw = tw_next(tw);
2083 		}
2084 		if (tw) {
2085 			cur = tw;
2086 			goto out;
2087 		}
2088 		read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
2089 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2090 
2091 		/* We can reschedule between buckets: */
2092 		cond_resched_softirq();
2093 
2094 		if (++st->bucket < tcp_hashinfo.ehash_size) {
2095 			read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
2096 			sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
2097 		} else {
2098 			cur = NULL;
2099 			goto out;
2100 		}
2101 	} else
2102 		sk = sk_next(sk);
2103 
2104 	sk_for_each_from(sk, node) {
2105 		if (sk->sk_family == st->family)
2106 			goto found;
2107 	}
2108 
2109 	st->state = TCP_SEQ_STATE_TIME_WAIT;
2110 	tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2111 	goto get_tw;
2112 found:
2113 	cur = sk;
2114 out:
2115 	return cur;
2116 }
2117 
2118 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2119 {
2120 	void *rc = established_get_first(seq);
2121 
2122 	while (rc && pos) {
2123 		rc = established_get_next(seq, rc);
2124 		--pos;
2125 	}
2126 	return rc;
2127 }
2128 
2129 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2130 {
2131 	void *rc;
2132 	struct tcp_iter_state* st = seq->private;
2133 
2134 	inet_listen_lock(&tcp_hashinfo);
2135 	st->state = TCP_SEQ_STATE_LISTENING;
2136 	rc	  = listening_get_idx(seq, &pos);
2137 
2138 	if (!rc) {
2139 		inet_listen_unlock(&tcp_hashinfo);
2140 		local_bh_disable();
2141 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2142 		rc	  = established_get_idx(seq, pos);
2143 	}
2144 
2145 	return rc;
2146 }
2147 
2148 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2149 {
2150 	struct tcp_iter_state* st = seq->private;
2151 	st->state = TCP_SEQ_STATE_LISTENING;
2152 	st->num = 0;
2153 	return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2154 }
2155 
2156 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2157 {
2158 	void *rc = NULL;
2159 	struct tcp_iter_state* st;
2160 
2161 	if (v == SEQ_START_TOKEN) {
2162 		rc = tcp_get_idx(seq, 0);
2163 		goto out;
2164 	}
2165 	st = seq->private;
2166 
2167 	switch (st->state) {
2168 	case TCP_SEQ_STATE_OPENREQ:
2169 	case TCP_SEQ_STATE_LISTENING:
2170 		rc = listening_get_next(seq, v);
2171 		if (!rc) {
2172 			inet_listen_unlock(&tcp_hashinfo);
2173 			local_bh_disable();
2174 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2175 			rc	  = established_get_first(seq);
2176 		}
2177 		break;
2178 	case TCP_SEQ_STATE_ESTABLISHED:
2179 	case TCP_SEQ_STATE_TIME_WAIT:
2180 		rc = established_get_next(seq, v);
2181 		break;
2182 	}
2183 out:
2184 	++*pos;
2185 	return rc;
2186 }
2187 
2188 static void tcp_seq_stop(struct seq_file *seq, void *v)
2189 {
2190 	struct tcp_iter_state* st = seq->private;
2191 
2192 	switch (st->state) {
2193 	case TCP_SEQ_STATE_OPENREQ:
2194 		if (v) {
2195 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2196 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2197 		}
2198 	case TCP_SEQ_STATE_LISTENING:
2199 		if (v != SEQ_START_TOKEN)
2200 			inet_listen_unlock(&tcp_hashinfo);
2201 		break;
2202 	case TCP_SEQ_STATE_TIME_WAIT:
2203 	case TCP_SEQ_STATE_ESTABLISHED:
2204 		if (v)
2205 			read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
2206 		local_bh_enable();
2207 		break;
2208 	}
2209 }
2210 
2211 static int tcp_seq_open(struct inode *inode, struct file *file)
2212 {
2213 	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2214 	struct seq_file *seq;
2215 	struct tcp_iter_state *s;
2216 	int rc;
2217 
2218 	if (unlikely(afinfo == NULL))
2219 		return -EINVAL;
2220 
2221 	s = kzalloc(sizeof(*s), GFP_KERNEL);
2222 	if (!s)
2223 		return -ENOMEM;
2224 	s->family		= afinfo->family;
2225 	s->seq_ops.start	= tcp_seq_start;
2226 	s->seq_ops.next		= tcp_seq_next;
2227 	s->seq_ops.show		= afinfo->seq_show;
2228 	s->seq_ops.stop		= tcp_seq_stop;
2229 
2230 	rc = seq_open(file, &s->seq_ops);
2231 	if (rc)
2232 		goto out_kfree;
2233 	seq	     = file->private_data;
2234 	seq->private = s;
2235 out:
2236 	return rc;
2237 out_kfree:
2238 	kfree(s);
2239 	goto out;
2240 }
2241 
2242 int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
2243 {
2244 	int rc = 0;
2245 	struct proc_dir_entry *p;
2246 
2247 	if (!afinfo)
2248 		return -EINVAL;
2249 	afinfo->seq_fops->owner		= afinfo->owner;
2250 	afinfo->seq_fops->open		= tcp_seq_open;
2251 	afinfo->seq_fops->read		= seq_read;
2252 	afinfo->seq_fops->llseek	= seq_lseek;
2253 	afinfo->seq_fops->release	= seq_release_private;
2254 
2255 	p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
2256 	if (p)
2257 		p->data = afinfo;
2258 	else
2259 		rc = -ENOMEM;
2260 	return rc;
2261 }
2262 
2263 void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
2264 {
2265 	if (!afinfo)
2266 		return;
2267 	proc_net_remove(afinfo->name);
2268 	memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
2269 }
2270 
2271 static void get_openreq4(struct sock *sk, struct request_sock *req,
2272 			 char *tmpbuf, int i, int uid)
2273 {
2274 	const struct inet_request_sock *ireq = inet_rsk(req);
2275 	int ttd = req->expires - jiffies;
2276 
2277 	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2278 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
2279 		i,
2280 		ireq->loc_addr,
2281 		ntohs(inet_sk(sk)->sport),
2282 		ireq->rmt_addr,
2283 		ntohs(ireq->rmt_port),
2284 		TCP_SYN_RECV,
2285 		0, 0, /* could print option size, but that is af dependent. */
2286 		1,    /* timers active (only the expire timer) */
2287 		jiffies_to_clock_t(ttd),
2288 		req->retrans,
2289 		uid,
2290 		0,  /* non standard timer */
2291 		0, /* open_requests have no inode */
2292 		atomic_read(&sk->sk_refcnt),
2293 		req);
2294 }
2295 
2296 static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
2297 {
2298 	int timer_active;
2299 	unsigned long timer_expires;
2300 	struct tcp_sock *tp = tcp_sk(sp);
2301 	const struct inet_connection_sock *icsk = inet_csk(sp);
2302 	struct inet_sock *inet = inet_sk(sp);
2303 	__be32 dest = inet->daddr;
2304 	__be32 src = inet->rcv_saddr;
2305 	__u16 destp = ntohs(inet->dport);
2306 	__u16 srcp = ntohs(inet->sport);
2307 
2308 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2309 		timer_active	= 1;
2310 		timer_expires	= icsk->icsk_timeout;
2311 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2312 		timer_active	= 4;
2313 		timer_expires	= icsk->icsk_timeout;
2314 	} else if (timer_pending(&sp->sk_timer)) {
2315 		timer_active	= 2;
2316 		timer_expires	= sp->sk_timer.expires;
2317 	} else {
2318 		timer_active	= 0;
2319 		timer_expires = jiffies;
2320 	}
2321 
2322 	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2323 			"%08X %5d %8d %lu %d %p %u %u %u %u %d",
2324 		i, src, srcp, dest, destp, sp->sk_state,
2325 		tp->write_seq - tp->snd_una,
2326 		sp->sk_state == TCP_LISTEN ? sp->sk_ack_backlog :
2327 					     (tp->rcv_nxt - tp->copied_seq),
2328 		timer_active,
2329 		jiffies_to_clock_t(timer_expires - jiffies),
2330 		icsk->icsk_retransmits,
2331 		sock_i_uid(sp),
2332 		icsk->icsk_probes_out,
2333 		sock_i_ino(sp),
2334 		atomic_read(&sp->sk_refcnt), sp,
2335 		icsk->icsk_rto,
2336 		icsk->icsk_ack.ato,
2337 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2338 		tp->snd_cwnd,
2339 		tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
2340 }
2341 
2342 static void get_timewait4_sock(struct inet_timewait_sock *tw,
2343 			       char *tmpbuf, int i)
2344 {
2345 	__be32 dest, src;
2346 	__u16 destp, srcp;
2347 	int ttd = tw->tw_ttd - jiffies;
2348 
2349 	if (ttd < 0)
2350 		ttd = 0;
2351 
2352 	dest  = tw->tw_daddr;
2353 	src   = tw->tw_rcv_saddr;
2354 	destp = ntohs(tw->tw_dport);
2355 	srcp  = ntohs(tw->tw_sport);
2356 
2357 	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2358 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
2359 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2360 		3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2361 		atomic_read(&tw->tw_refcnt), tw);
2362 }
2363 
2364 #define TMPSZ 150
2365 
2366 static int tcp4_seq_show(struct seq_file *seq, void *v)
2367 {
2368 	struct tcp_iter_state* st;
2369 	char tmpbuf[TMPSZ + 1];
2370 
2371 	if (v == SEQ_START_TOKEN) {
2372 		seq_printf(seq, "%-*s\n", TMPSZ - 1,
2373 			   "  sl  local_address rem_address   st tx_queue "
2374 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
2375 			   "inode");
2376 		goto out;
2377 	}
2378 	st = seq->private;
2379 
2380 	switch (st->state) {
2381 	case TCP_SEQ_STATE_LISTENING:
2382 	case TCP_SEQ_STATE_ESTABLISHED:
2383 		get_tcp4_sock(v, tmpbuf, st->num);
2384 		break;
2385 	case TCP_SEQ_STATE_OPENREQ:
2386 		get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid);
2387 		break;
2388 	case TCP_SEQ_STATE_TIME_WAIT:
2389 		get_timewait4_sock(v, tmpbuf, st->num);
2390 		break;
2391 	}
2392 	seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
2393 out:
2394 	return 0;
2395 }
2396 
2397 static struct file_operations tcp4_seq_fops;
2398 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2399 	.owner		= THIS_MODULE,
2400 	.name		= "tcp",
2401 	.family		= AF_INET,
2402 	.seq_show	= tcp4_seq_show,
2403 	.seq_fops	= &tcp4_seq_fops,
2404 };
2405 
2406 int __init tcp4_proc_init(void)
2407 {
2408 	return tcp_proc_register(&tcp4_seq_afinfo);
2409 }
2410 
2411 void tcp4_proc_exit(void)
2412 {
2413 	tcp_proc_unregister(&tcp4_seq_afinfo);
2414 }
2415 #endif /* CONFIG_PROC_FS */
2416 
2417 struct proto tcp_prot = {
2418 	.name			= "TCP",
2419 	.owner			= THIS_MODULE,
2420 	.close			= tcp_close,
2421 	.connect		= tcp_v4_connect,
2422 	.disconnect		= tcp_disconnect,
2423 	.accept			= inet_csk_accept,
2424 	.ioctl			= tcp_ioctl,
2425 	.init			= tcp_v4_init_sock,
2426 	.destroy		= tcp_v4_destroy_sock,
2427 	.shutdown		= tcp_shutdown,
2428 	.setsockopt		= tcp_setsockopt,
2429 	.getsockopt		= tcp_getsockopt,
2430 	.sendmsg		= tcp_sendmsg,
2431 	.recvmsg		= tcp_recvmsg,
2432 	.backlog_rcv		= tcp_v4_do_rcv,
2433 	.hash			= tcp_v4_hash,
2434 	.unhash			= tcp_unhash,
2435 	.get_port		= tcp_v4_get_port,
2436 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2437 	.sockets_allocated	= &tcp_sockets_allocated,
2438 	.orphan_count		= &tcp_orphan_count,
2439 	.memory_allocated	= &tcp_memory_allocated,
2440 	.memory_pressure	= &tcp_memory_pressure,
2441 	.sysctl_mem		= sysctl_tcp_mem,
2442 	.sysctl_wmem		= sysctl_tcp_wmem,
2443 	.sysctl_rmem		= sysctl_tcp_rmem,
2444 	.max_header		= MAX_TCP_HEADER,
2445 	.obj_size		= sizeof(struct tcp_sock),
2446 	.twsk_prot		= &tcp_timewait_sock_ops,
2447 	.rsk_prot		= &tcp_request_sock_ops,
2448 #ifdef CONFIG_COMPAT
2449 	.compat_setsockopt	= compat_tcp_setsockopt,
2450 	.compat_getsockopt	= compat_tcp_getsockopt,
2451 #endif
2452 };
2453 
2454 void __init tcp_v4_init(struct net_proto_family *ops)
2455 {
2456 	if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW,
2457 				     IPPROTO_TCP) < 0)
2458 		panic("Failed to create the TCP control socket.\n");
2459 }
2460 
2461 EXPORT_SYMBOL(ipv4_specific);
2462 EXPORT_SYMBOL(tcp_hashinfo);
2463 EXPORT_SYMBOL(tcp_prot);
2464 EXPORT_SYMBOL(tcp_unhash);
2465 EXPORT_SYMBOL(tcp_v4_conn_request);
2466 EXPORT_SYMBOL(tcp_v4_connect);
2467 EXPORT_SYMBOL(tcp_v4_do_rcv);
2468 EXPORT_SYMBOL(tcp_v4_remember_stamp);
2469 EXPORT_SYMBOL(tcp_v4_send_check);
2470 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2471 
2472 #ifdef CONFIG_PROC_FS
2473 EXPORT_SYMBOL(tcp_proc_register);
2474 EXPORT_SYMBOL(tcp_proc_unregister);
2475 #endif
2476 EXPORT_SYMBOL(sysctl_local_port_range);
2477 EXPORT_SYMBOL(sysctl_tcp_low_latency);
2478 
2479