xref: /linux/net/ipv4/tcp_ipv4.c (revision 354faf0977397cea382e9e9c214fd028377d277b)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Implementation of the Transmission Control Protocol(TCP).
7  *
8  * Version:	$Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
9  *
10  *		IPv4 specific functions
11  *
12  *
13  *		code split from:
14  *		linux/ipv4/tcp.c
15  *		linux/ipv4/tcp_input.c
16  *		linux/ipv4/tcp_output.c
17  *
18  *		See tcp.c for author information
19  *
20  *	This program is free software; you can redistribute it and/or
21  *      modify it under the terms of the GNU General Public License
22  *      as published by the Free Software Foundation; either version
23  *      2 of the License, or (at your option) any later version.
24  */
25 
26 /*
27  * Changes:
28  *		David S. Miller	:	New socket lookup architecture.
29  *					This code is dedicated to John Dyson.
30  *		David S. Miller :	Change semantics of established hash,
31  *					half is devoted to TIME_WAIT sockets
32  *					and the rest go in the other half.
33  *		Andi Kleen :		Add support for syncookies and fixed
34  *					some bugs: ip options weren't passed to
35  *					the TCP layer, missed a check for an
36  *					ACK bit.
37  *		Andi Kleen :		Implemented fast path mtu discovery.
38  *	     				Fixed many serious bugs in the
39  *					request_sock handling and moved
40  *					most of it into the af independent code.
41  *					Added tail drop and some other bugfixes.
42  *					Added new listen semantics.
43  *		Mike McLagan	:	Routing by source
44  *	Juan Jose Ciarlante:		ip_dynaddr bits
45  *		Andi Kleen:		various fixes.
46  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
47  *					coma.
48  *	Andi Kleen		:	Fix new listen.
49  *	Andi Kleen		:	Fix accept error reporting.
50  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
51  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
52  *					a single port at the same time.
53  */
54 
55 
56 #include <linux/types.h>
57 #include <linux/fcntl.h>
58 #include <linux/module.h>
59 #include <linux/random.h>
60 #include <linux/cache.h>
61 #include <linux/jhash.h>
62 #include <linux/init.h>
63 #include <linux/times.h>
64 
65 #include <net/net_namespace.h>
66 #include <net/icmp.h>
67 #include <net/inet_hashtables.h>
68 #include <net/tcp.h>
69 #include <net/transp_v6.h>
70 #include <net/ipv6.h>
71 #include <net/inet_common.h>
72 #include <net/timewait_sock.h>
73 #include <net/xfrm.h>
74 #include <net/netdma.h>
75 
76 #include <linux/inet.h>
77 #include <linux/ipv6.h>
78 #include <linux/stddef.h>
79 #include <linux/proc_fs.h>
80 #include <linux/seq_file.h>
81 
82 #include <linux/crypto.h>
83 #include <linux/scatterlist.h>
84 
85 int sysctl_tcp_tw_reuse __read_mostly;
86 int sysctl_tcp_low_latency __read_mostly;
87 
88 /* Check TCP sequence numbers in ICMP packets. */
89 #define ICMP_MIN_LENGTH 8
90 
91 /* Socket used for sending RSTs */
92 static struct socket *tcp_socket __read_mostly;
93 
94 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
95 
96 #ifdef CONFIG_TCP_MD5SIG
97 static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
98 						   __be32 addr);
99 static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
100 				   __be32 saddr, __be32 daddr,
101 				   struct tcphdr *th, int protocol,
102 				   int tcplen);
103 #endif
104 
105 struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
106 	.lhash_lock  = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
107 	.lhash_users = ATOMIC_INIT(0),
108 	.lhash_wait  = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
109 };
110 
111 static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
112 {
113 	return inet_csk_get_port(&tcp_hashinfo, sk, snum,
114 				 inet_csk_bind_conflict);
115 }
116 
117 static void tcp_v4_hash(struct sock *sk)
118 {
119 	inet_hash(&tcp_hashinfo, sk);
120 }
121 
122 void tcp_unhash(struct sock *sk)
123 {
124 	inet_unhash(&tcp_hashinfo, sk);
125 }
126 
127 static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
128 {
129 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
130 					  ip_hdr(skb)->saddr,
131 					  tcp_hdr(skb)->dest,
132 					  tcp_hdr(skb)->source);
133 }
134 
135 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
136 {
137 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
138 	struct tcp_sock *tp = tcp_sk(sk);
139 
140 	/* With PAWS, it is safe from the viewpoint
141 	   of data integrity. Even without PAWS it is safe provided sequence
142 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
143 
144 	   Actually, the idea is close to VJ's one, only timestamp cache is
145 	   held not per host, but per port pair and TW bucket is used as state
146 	   holder.
147 
148 	   If TW bucket has been already destroyed we fall back to VJ's scheme
149 	   and use initial timestamp retrieved from peer table.
150 	 */
151 	if (tcptw->tw_ts_recent_stamp &&
152 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
153 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
154 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
155 		if (tp->write_seq == 0)
156 			tp->write_seq = 1;
157 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
158 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
159 		sock_hold(sktw);
160 		return 1;
161 	}
162 
163 	return 0;
164 }
165 
166 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
167 
168 /* This will initiate an outgoing connection. */
169 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
170 {
171 	struct inet_sock *inet = inet_sk(sk);
172 	struct tcp_sock *tp = tcp_sk(sk);
173 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
174 	struct rtable *rt;
175 	__be32 daddr, nexthop;
176 	int tmp;
177 	int err;
178 
179 	if (addr_len < sizeof(struct sockaddr_in))
180 		return -EINVAL;
181 
182 	if (usin->sin_family != AF_INET)
183 		return -EAFNOSUPPORT;
184 
185 	nexthop = daddr = usin->sin_addr.s_addr;
186 	if (inet->opt && inet->opt->srr) {
187 		if (!daddr)
188 			return -EINVAL;
189 		nexthop = inet->opt->faddr;
190 	}
191 
192 	tmp = ip_route_connect(&rt, nexthop, inet->saddr,
193 			       RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
194 			       IPPROTO_TCP,
195 			       inet->sport, usin->sin_port, sk, 1);
196 	if (tmp < 0) {
197 		if (tmp == -ENETUNREACH)
198 			IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
199 		return tmp;
200 	}
201 
202 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
203 		ip_rt_put(rt);
204 		return -ENETUNREACH;
205 	}
206 
207 	if (!inet->opt || !inet->opt->srr)
208 		daddr = rt->rt_dst;
209 
210 	if (!inet->saddr)
211 		inet->saddr = rt->rt_src;
212 	inet->rcv_saddr = inet->saddr;
213 
214 	if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
215 		/* Reset inherited state */
216 		tp->rx_opt.ts_recent	   = 0;
217 		tp->rx_opt.ts_recent_stamp = 0;
218 		tp->write_seq		   = 0;
219 	}
220 
221 	if (tcp_death_row.sysctl_tw_recycle &&
222 	    !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
223 		struct inet_peer *peer = rt_get_peer(rt);
224 		/*
225 		 * VJ's idea. We save last timestamp seen from
226 		 * the destination in peer table, when entering state
227 		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
228 		 * when trying new connection.
229 		 */
230 		if (peer != NULL &&
231 		    peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
232 			tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
233 			tp->rx_opt.ts_recent = peer->tcp_ts;
234 		}
235 	}
236 
237 	inet->dport = usin->sin_port;
238 	inet->daddr = daddr;
239 
240 	inet_csk(sk)->icsk_ext_hdr_len = 0;
241 	if (inet->opt)
242 		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
243 
244 	tp->rx_opt.mss_clamp = 536;
245 
246 	/* Socket identity is still unknown (sport may be zero).
247 	 * However we set state to SYN-SENT and not releasing socket
248 	 * lock select source port, enter ourselves into the hash tables and
249 	 * complete initialization after this.
250 	 */
251 	tcp_set_state(sk, TCP_SYN_SENT);
252 	err = inet_hash_connect(&tcp_death_row, sk);
253 	if (err)
254 		goto failure;
255 
256 	err = ip_route_newports(&rt, IPPROTO_TCP,
257 				inet->sport, inet->dport, sk);
258 	if (err)
259 		goto failure;
260 
261 	/* OK, now commit destination to socket.  */
262 	sk->sk_gso_type = SKB_GSO_TCPV4;
263 	sk_setup_caps(sk, &rt->u.dst);
264 
265 	if (!tp->write_seq)
266 		tp->write_seq = secure_tcp_sequence_number(inet->saddr,
267 							   inet->daddr,
268 							   inet->sport,
269 							   usin->sin_port);
270 
271 	inet->id = tp->write_seq ^ jiffies;
272 
273 	err = tcp_connect(sk);
274 	rt = NULL;
275 	if (err)
276 		goto failure;
277 
278 	return 0;
279 
280 failure:
281 	/*
282 	 * This unhashes the socket and releases the local port,
283 	 * if necessary.
284 	 */
285 	tcp_set_state(sk, TCP_CLOSE);
286 	ip_rt_put(rt);
287 	sk->sk_route_caps = 0;
288 	inet->dport = 0;
289 	return err;
290 }
291 
292 /*
293  * This routine does path mtu discovery as defined in RFC1191.
294  */
295 static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
296 {
297 	struct dst_entry *dst;
298 	struct inet_sock *inet = inet_sk(sk);
299 
300 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
301 	 * send out by Linux are always <576bytes so they should go through
302 	 * unfragmented).
303 	 */
304 	if (sk->sk_state == TCP_LISTEN)
305 		return;
306 
307 	/* We don't check in the destentry if pmtu discovery is forbidden
308 	 * on this route. We just assume that no packet_to_big packets
309 	 * are send back when pmtu discovery is not active.
310 	 * There is a small race when the user changes this flag in the
311 	 * route, but I think that's acceptable.
312 	 */
313 	if ((dst = __sk_dst_check(sk, 0)) == NULL)
314 		return;
315 
316 	dst->ops->update_pmtu(dst, mtu);
317 
318 	/* Something is about to be wrong... Remember soft error
319 	 * for the case, if this connection will not able to recover.
320 	 */
321 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
322 		sk->sk_err_soft = EMSGSIZE;
323 
324 	mtu = dst_mtu(dst);
325 
326 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
327 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
328 		tcp_sync_mss(sk, mtu);
329 
330 		/* Resend the TCP packet because it's
331 		 * clear that the old packet has been
332 		 * dropped. This is the new "fast" path mtu
333 		 * discovery.
334 		 */
335 		tcp_simple_retransmit(sk);
336 	} /* else let the usual retransmit timer handle it */
337 }
338 
339 /*
340  * This routine is called by the ICMP module when it gets some
341  * sort of error condition.  If err < 0 then the socket should
342  * be closed and the error returned to the user.  If err > 0
343  * it's just the icmp type << 8 | icmp code.  After adjustment
344  * header points to the first 8 bytes of the tcp header.  We need
345  * to find the appropriate port.
346  *
347  * The locking strategy used here is very "optimistic". When
348  * someone else accesses the socket the ICMP is just dropped
349  * and for some paths there is no check at all.
350  * A more general error queue to queue errors for later handling
351  * is probably better.
352  *
353  */
354 
355 void tcp_v4_err(struct sk_buff *skb, u32 info)
356 {
357 	struct iphdr *iph = (struct iphdr *)skb->data;
358 	struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
359 	struct tcp_sock *tp;
360 	struct inet_sock *inet;
361 	const int type = icmp_hdr(skb)->type;
362 	const int code = icmp_hdr(skb)->code;
363 	struct sock *sk;
364 	__u32 seq;
365 	int err;
366 
367 	if (skb->len < (iph->ihl << 2) + 8) {
368 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
369 		return;
370 	}
371 
372 	sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
373 			 th->source, inet_iif(skb));
374 	if (!sk) {
375 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
376 		return;
377 	}
378 	if (sk->sk_state == TCP_TIME_WAIT) {
379 		inet_twsk_put(inet_twsk(sk));
380 		return;
381 	}
382 
383 	bh_lock_sock(sk);
384 	/* If too many ICMPs get dropped on busy
385 	 * servers this needs to be solved differently.
386 	 */
387 	if (sock_owned_by_user(sk))
388 		NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
389 
390 	if (sk->sk_state == TCP_CLOSE)
391 		goto out;
392 
393 	tp = tcp_sk(sk);
394 	seq = ntohl(th->seq);
395 	if (sk->sk_state != TCP_LISTEN &&
396 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
397 		NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
398 		goto out;
399 	}
400 
401 	switch (type) {
402 	case ICMP_SOURCE_QUENCH:
403 		/* Just silently ignore these. */
404 		goto out;
405 	case ICMP_PARAMETERPROB:
406 		err = EPROTO;
407 		break;
408 	case ICMP_DEST_UNREACH:
409 		if (code > NR_ICMP_UNREACH)
410 			goto out;
411 
412 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
413 			if (!sock_owned_by_user(sk))
414 				do_pmtu_discovery(sk, iph, info);
415 			goto out;
416 		}
417 
418 		err = icmp_err_convert[code].errno;
419 		break;
420 	case ICMP_TIME_EXCEEDED:
421 		err = EHOSTUNREACH;
422 		break;
423 	default:
424 		goto out;
425 	}
426 
427 	switch (sk->sk_state) {
428 		struct request_sock *req, **prev;
429 	case TCP_LISTEN:
430 		if (sock_owned_by_user(sk))
431 			goto out;
432 
433 		req = inet_csk_search_req(sk, &prev, th->dest,
434 					  iph->daddr, iph->saddr);
435 		if (!req)
436 			goto out;
437 
438 		/* ICMPs are not backlogged, hence we cannot get
439 		   an established socket here.
440 		 */
441 		BUG_TRAP(!req->sk);
442 
443 		if (seq != tcp_rsk(req)->snt_isn) {
444 			NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
445 			goto out;
446 		}
447 
448 		/*
449 		 * Still in SYN_RECV, just remove it silently.
450 		 * There is no good way to pass the error to the newly
451 		 * created socket, and POSIX does not want network
452 		 * errors returned from accept().
453 		 */
454 		inet_csk_reqsk_queue_drop(sk, req, prev);
455 		goto out;
456 
457 	case TCP_SYN_SENT:
458 	case TCP_SYN_RECV:  /* Cannot happen.
459 			       It can f.e. if SYNs crossed.
460 			     */
461 		if (!sock_owned_by_user(sk)) {
462 			sk->sk_err = err;
463 
464 			sk->sk_error_report(sk);
465 
466 			tcp_done(sk);
467 		} else {
468 			sk->sk_err_soft = err;
469 		}
470 		goto out;
471 	}
472 
473 	/* If we've already connected we will keep trying
474 	 * until we time out, or the user gives up.
475 	 *
476 	 * rfc1122 4.2.3.9 allows to consider as hard errors
477 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
478 	 * but it is obsoleted by pmtu discovery).
479 	 *
480 	 * Note, that in modern internet, where routing is unreliable
481 	 * and in each dark corner broken firewalls sit, sending random
482 	 * errors ordered by their masters even this two messages finally lose
483 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
484 	 *
485 	 * Now we are in compliance with RFCs.
486 	 *							--ANK (980905)
487 	 */
488 
489 	inet = inet_sk(sk);
490 	if (!sock_owned_by_user(sk) && inet->recverr) {
491 		sk->sk_err = err;
492 		sk->sk_error_report(sk);
493 	} else	{ /* Only an error on timeout */
494 		sk->sk_err_soft = err;
495 	}
496 
497 out:
498 	bh_unlock_sock(sk);
499 	sock_put(sk);
500 }
501 
502 /* This routine computes an IPv4 TCP checksum. */
503 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
504 {
505 	struct inet_sock *inet = inet_sk(sk);
506 	struct tcphdr *th = tcp_hdr(skb);
507 
508 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
509 		th->check = ~tcp_v4_check(len, inet->saddr,
510 					  inet->daddr, 0);
511 		skb->csum_start = skb_transport_header(skb) - skb->head;
512 		skb->csum_offset = offsetof(struct tcphdr, check);
513 	} else {
514 		th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
515 					 csum_partial((char *)th,
516 						      th->doff << 2,
517 						      skb->csum));
518 	}
519 }
520 
521 int tcp_v4_gso_send_check(struct sk_buff *skb)
522 {
523 	const struct iphdr *iph;
524 	struct tcphdr *th;
525 
526 	if (!pskb_may_pull(skb, sizeof(*th)))
527 		return -EINVAL;
528 
529 	iph = ip_hdr(skb);
530 	th = tcp_hdr(skb);
531 
532 	th->check = 0;
533 	th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
534 	skb->csum_start = skb_transport_header(skb) - skb->head;
535 	skb->csum_offset = offsetof(struct tcphdr, check);
536 	skb->ip_summed = CHECKSUM_PARTIAL;
537 	return 0;
538 }
539 
540 /*
541  *	This routine will send an RST to the other tcp.
542  *
543  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
544  *		      for reset.
545  *	Answer: if a packet caused RST, it is not for a socket
546  *		existing in our system, if it is matched to a socket,
547  *		it is just duplicate segment or bug in other side's TCP.
548  *		So that we build reply only basing on parameters
549  *		arrived with segment.
550  *	Exception: precedence violation. We do not implement it in any case.
551  */
552 
553 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
554 {
555 	struct tcphdr *th = tcp_hdr(skb);
556 	struct {
557 		struct tcphdr th;
558 #ifdef CONFIG_TCP_MD5SIG
559 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
560 #endif
561 	} rep;
562 	struct ip_reply_arg arg;
563 #ifdef CONFIG_TCP_MD5SIG
564 	struct tcp_md5sig_key *key;
565 #endif
566 
567 	/* Never send a reset in response to a reset. */
568 	if (th->rst)
569 		return;
570 
571 	if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
572 		return;
573 
574 	/* Swap the send and the receive. */
575 	memset(&rep, 0, sizeof(rep));
576 	rep.th.dest   = th->source;
577 	rep.th.source = th->dest;
578 	rep.th.doff   = sizeof(struct tcphdr) / 4;
579 	rep.th.rst    = 1;
580 
581 	if (th->ack) {
582 		rep.th.seq = th->ack_seq;
583 	} else {
584 		rep.th.ack = 1;
585 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
586 				       skb->len - (th->doff << 2));
587 	}
588 
589 	memset(&arg, 0, sizeof(arg));
590 	arg.iov[0].iov_base = (unsigned char *)&rep;
591 	arg.iov[0].iov_len  = sizeof(rep.th);
592 
593 #ifdef CONFIG_TCP_MD5SIG
594 	key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
595 	if (key) {
596 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
597 				   (TCPOPT_NOP << 16) |
598 				   (TCPOPT_MD5SIG << 8) |
599 				   TCPOLEN_MD5SIG);
600 		/* Update length and the length the header thinks exists */
601 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
602 		rep.th.doff = arg.iov[0].iov_len / 4;
603 
604 		tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1],
605 					key,
606 					ip_hdr(skb)->daddr,
607 					ip_hdr(skb)->saddr,
608 					&rep.th, IPPROTO_TCP,
609 					arg.iov[0].iov_len);
610 	}
611 #endif
612 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
613 				      ip_hdr(skb)->saddr, /* XXX */
614 				      sizeof(struct tcphdr), IPPROTO_TCP, 0);
615 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
616 
617 	ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
618 
619 	TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
620 	TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
621 }
622 
623 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
624    outside socket context is ugly, certainly. What can I do?
625  */
626 
627 static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
628 			    struct sk_buff *skb, u32 seq, u32 ack,
629 			    u32 win, u32 ts)
630 {
631 	struct tcphdr *th = tcp_hdr(skb);
632 	struct {
633 		struct tcphdr th;
634 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
635 #ifdef CONFIG_TCP_MD5SIG
636 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
637 #endif
638 			];
639 	} rep;
640 	struct ip_reply_arg arg;
641 #ifdef CONFIG_TCP_MD5SIG
642 	struct tcp_md5sig_key *key;
643 	struct tcp_md5sig_key tw_key;
644 #endif
645 
646 	memset(&rep.th, 0, sizeof(struct tcphdr));
647 	memset(&arg, 0, sizeof(arg));
648 
649 	arg.iov[0].iov_base = (unsigned char *)&rep;
650 	arg.iov[0].iov_len  = sizeof(rep.th);
651 	if (ts) {
652 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
653 				   (TCPOPT_TIMESTAMP << 8) |
654 				   TCPOLEN_TIMESTAMP);
655 		rep.opt[1] = htonl(tcp_time_stamp);
656 		rep.opt[2] = htonl(ts);
657 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
658 	}
659 
660 	/* Swap the send and the receive. */
661 	rep.th.dest    = th->source;
662 	rep.th.source  = th->dest;
663 	rep.th.doff    = arg.iov[0].iov_len / 4;
664 	rep.th.seq     = htonl(seq);
665 	rep.th.ack_seq = htonl(ack);
666 	rep.th.ack     = 1;
667 	rep.th.window  = htons(win);
668 
669 #ifdef CONFIG_TCP_MD5SIG
670 	/*
671 	 * The SKB holds an imcoming packet, but may not have a valid ->sk
672 	 * pointer. This is especially the case when we're dealing with a
673 	 * TIME_WAIT ack, because the sk structure is long gone, and only
674 	 * the tcp_timewait_sock remains. So the md5 key is stashed in that
675 	 * structure, and we use it in preference.  I believe that (twsk ||
676 	 * skb->sk) holds true, but we program defensively.
677 	 */
678 	if (!twsk && skb->sk) {
679 		key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr);
680 	} else if (twsk && twsk->tw_md5_keylen) {
681 		tw_key.key = twsk->tw_md5_key;
682 		tw_key.keylen = twsk->tw_md5_keylen;
683 		key = &tw_key;
684 	} else
685 		key = NULL;
686 
687 	if (key) {
688 		int offset = (ts) ? 3 : 0;
689 
690 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
691 					  (TCPOPT_NOP << 16) |
692 					  (TCPOPT_MD5SIG << 8) |
693 					  TCPOLEN_MD5SIG);
694 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
695 		rep.th.doff = arg.iov[0].iov_len/4;
696 
697 		tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset],
698 					key,
699 					ip_hdr(skb)->daddr,
700 					ip_hdr(skb)->saddr,
701 					&rep.th, IPPROTO_TCP,
702 					arg.iov[0].iov_len);
703 	}
704 #endif
705 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
706 				      ip_hdr(skb)->saddr, /* XXX */
707 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
708 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
709 	if (twsk)
710 		arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if;
711 
712 	ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
713 
714 	TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
715 }
716 
717 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
718 {
719 	struct inet_timewait_sock *tw = inet_twsk(sk);
720 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
721 
722 	tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
723 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
724 			tcptw->tw_ts_recent);
725 
726 	inet_twsk_put(tw);
727 }
728 
729 static void tcp_v4_reqsk_send_ack(struct sk_buff *skb,
730 				  struct request_sock *req)
731 {
732 	tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1,
733 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
734 			req->ts_recent);
735 }
736 
737 /*
738  *	Send a SYN-ACK after having received an ACK.
739  *	This still operates on a request_sock only, not on a big
740  *	socket.
741  */
742 static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
743 			      struct dst_entry *dst)
744 {
745 	const struct inet_request_sock *ireq = inet_rsk(req);
746 	int err = -1;
747 	struct sk_buff * skb;
748 
749 	/* First, grab a route. */
750 	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
751 		goto out;
752 
753 	skb = tcp_make_synack(sk, dst, req);
754 
755 	if (skb) {
756 		struct tcphdr *th = tcp_hdr(skb);
757 
758 		th->check = tcp_v4_check(skb->len,
759 					 ireq->loc_addr,
760 					 ireq->rmt_addr,
761 					 csum_partial((char *)th, skb->len,
762 						      skb->csum));
763 
764 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
765 					    ireq->rmt_addr,
766 					    ireq->opt);
767 		err = net_xmit_eval(err);
768 	}
769 
770 out:
771 	dst_release(dst);
772 	return err;
773 }
774 
775 /*
776  *	IPv4 request_sock destructor.
777  */
778 static void tcp_v4_reqsk_destructor(struct request_sock *req)
779 {
780 	kfree(inet_rsk(req)->opt);
781 }
782 
783 #ifdef CONFIG_SYN_COOKIES
784 static void syn_flood_warning(struct sk_buff *skb)
785 {
786 	static unsigned long warntime;
787 
788 	if (time_after(jiffies, (warntime + HZ * 60))) {
789 		warntime = jiffies;
790 		printk(KERN_INFO
791 		       "possible SYN flooding on port %d. Sending cookies.\n",
792 		       ntohs(tcp_hdr(skb)->dest));
793 	}
794 }
795 #endif
796 
797 /*
798  * Save and compile IPv4 options into the request_sock if needed.
799  */
800 static struct ip_options *tcp_v4_save_options(struct sock *sk,
801 					      struct sk_buff *skb)
802 {
803 	struct ip_options *opt = &(IPCB(skb)->opt);
804 	struct ip_options *dopt = NULL;
805 
806 	if (opt && opt->optlen) {
807 		int opt_size = optlength(opt);
808 		dopt = kmalloc(opt_size, GFP_ATOMIC);
809 		if (dopt) {
810 			if (ip_options_echo(dopt, skb)) {
811 				kfree(dopt);
812 				dopt = NULL;
813 			}
814 		}
815 	}
816 	return dopt;
817 }
818 
819 #ifdef CONFIG_TCP_MD5SIG
820 /*
821  * RFC2385 MD5 checksumming requires a mapping of
822  * IP address->MD5 Key.
823  * We need to maintain these in the sk structure.
824  */
825 
826 /* Find the Key structure for an address.  */
827 static struct tcp_md5sig_key *
828 			tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
829 {
830 	struct tcp_sock *tp = tcp_sk(sk);
831 	int i;
832 
833 	if (!tp->md5sig_info || !tp->md5sig_info->entries4)
834 		return NULL;
835 	for (i = 0; i < tp->md5sig_info->entries4; i++) {
836 		if (tp->md5sig_info->keys4[i].addr == addr)
837 			return &tp->md5sig_info->keys4[i].base;
838 	}
839 	return NULL;
840 }
841 
842 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
843 					 struct sock *addr_sk)
844 {
845 	return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr);
846 }
847 
848 EXPORT_SYMBOL(tcp_v4_md5_lookup);
849 
850 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
851 						      struct request_sock *req)
852 {
853 	return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
854 }
855 
856 /* This can be called on a newly created socket, from other files */
857 int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
858 		      u8 *newkey, u8 newkeylen)
859 {
860 	/* Add Key to the list */
861 	struct tcp_md5sig_key *key;
862 	struct tcp_sock *tp = tcp_sk(sk);
863 	struct tcp4_md5sig_key *keys;
864 
865 	key = tcp_v4_md5_do_lookup(sk, addr);
866 	if (key) {
867 		/* Pre-existing entry - just update that one. */
868 		kfree(key->key);
869 		key->key = newkey;
870 		key->keylen = newkeylen;
871 	} else {
872 		struct tcp_md5sig_info *md5sig;
873 
874 		if (!tp->md5sig_info) {
875 			tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
876 						  GFP_ATOMIC);
877 			if (!tp->md5sig_info) {
878 				kfree(newkey);
879 				return -ENOMEM;
880 			}
881 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
882 		}
883 		if (tcp_alloc_md5sig_pool() == NULL) {
884 			kfree(newkey);
885 			return -ENOMEM;
886 		}
887 		md5sig = tp->md5sig_info;
888 
889 		if (md5sig->alloced4 == md5sig->entries4) {
890 			keys = kmalloc((sizeof(*keys) *
891 					(md5sig->entries4 + 1)), GFP_ATOMIC);
892 			if (!keys) {
893 				kfree(newkey);
894 				tcp_free_md5sig_pool();
895 				return -ENOMEM;
896 			}
897 
898 			if (md5sig->entries4)
899 				memcpy(keys, md5sig->keys4,
900 				       sizeof(*keys) * md5sig->entries4);
901 
902 			/* Free old key list, and reference new one */
903 			kfree(md5sig->keys4);
904 			md5sig->keys4 = keys;
905 			md5sig->alloced4++;
906 		}
907 		md5sig->entries4++;
908 		md5sig->keys4[md5sig->entries4 - 1].addr        = addr;
909 		md5sig->keys4[md5sig->entries4 - 1].base.key    = newkey;
910 		md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
911 	}
912 	return 0;
913 }
914 
915 EXPORT_SYMBOL(tcp_v4_md5_do_add);
916 
917 static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
918 			       u8 *newkey, u8 newkeylen)
919 {
920 	return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr,
921 				 newkey, newkeylen);
922 }
923 
924 int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
925 {
926 	struct tcp_sock *tp = tcp_sk(sk);
927 	int i;
928 
929 	for (i = 0; i < tp->md5sig_info->entries4; i++) {
930 		if (tp->md5sig_info->keys4[i].addr == addr) {
931 			/* Free the key */
932 			kfree(tp->md5sig_info->keys4[i].base.key);
933 			tp->md5sig_info->entries4--;
934 
935 			if (tp->md5sig_info->entries4 == 0) {
936 				kfree(tp->md5sig_info->keys4);
937 				tp->md5sig_info->keys4 = NULL;
938 				tp->md5sig_info->alloced4 = 0;
939 			} else if (tp->md5sig_info->entries4 != i) {
940 				/* Need to do some manipulation */
941 				memmove(&tp->md5sig_info->keys4[i],
942 					&tp->md5sig_info->keys4[i+1],
943 					(tp->md5sig_info->entries4 - i) *
944 					 sizeof(struct tcp4_md5sig_key));
945 			}
946 			tcp_free_md5sig_pool();
947 			return 0;
948 		}
949 	}
950 	return -ENOENT;
951 }
952 
953 EXPORT_SYMBOL(tcp_v4_md5_do_del);
954 
955 static void tcp_v4_clear_md5_list(struct sock *sk)
956 {
957 	struct tcp_sock *tp = tcp_sk(sk);
958 
959 	/* Free each key, then the set of key keys,
960 	 * the crypto element, and then decrement our
961 	 * hold on the last resort crypto.
962 	 */
963 	if (tp->md5sig_info->entries4) {
964 		int i;
965 		for (i = 0; i < tp->md5sig_info->entries4; i++)
966 			kfree(tp->md5sig_info->keys4[i].base.key);
967 		tp->md5sig_info->entries4 = 0;
968 		tcp_free_md5sig_pool();
969 	}
970 	if (tp->md5sig_info->keys4) {
971 		kfree(tp->md5sig_info->keys4);
972 		tp->md5sig_info->keys4 = NULL;
973 		tp->md5sig_info->alloced4  = 0;
974 	}
975 }
976 
977 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
978 				 int optlen)
979 {
980 	struct tcp_md5sig cmd;
981 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
982 	u8 *newkey;
983 
984 	if (optlen < sizeof(cmd))
985 		return -EINVAL;
986 
987 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
988 		return -EFAULT;
989 
990 	if (sin->sin_family != AF_INET)
991 		return -EINVAL;
992 
993 	if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
994 		if (!tcp_sk(sk)->md5sig_info)
995 			return -ENOENT;
996 		return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
997 	}
998 
999 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1000 		return -EINVAL;
1001 
1002 	if (!tcp_sk(sk)->md5sig_info) {
1003 		struct tcp_sock *tp = tcp_sk(sk);
1004 		struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL);
1005 
1006 		if (!p)
1007 			return -EINVAL;
1008 
1009 		tp->md5sig_info = p;
1010 		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1011 	}
1012 
1013 	newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
1014 	if (!newkey)
1015 		return -ENOMEM;
1016 	return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1017 				 newkey, cmd.tcpm_keylen);
1018 }
1019 
1020 static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1021 				   __be32 saddr, __be32 daddr,
1022 				   struct tcphdr *th, int protocol,
1023 				   int tcplen)
1024 {
1025 	struct scatterlist sg[4];
1026 	__u16 data_len;
1027 	int block = 0;
1028 	__sum16 old_checksum;
1029 	struct tcp_md5sig_pool *hp;
1030 	struct tcp4_pseudohdr *bp;
1031 	struct hash_desc *desc;
1032 	int err;
1033 	unsigned int nbytes = 0;
1034 
1035 	/*
1036 	 * Okay, so RFC2385 is turned on for this connection,
1037 	 * so we need to generate the MD5 hash for the packet now.
1038 	 */
1039 
1040 	hp = tcp_get_md5sig_pool();
1041 	if (!hp)
1042 		goto clear_hash_noput;
1043 
1044 	bp = &hp->md5_blk.ip4;
1045 	desc = &hp->md5_desc;
1046 
1047 	/*
1048 	 * 1. the TCP pseudo-header (in the order: source IP address,
1049 	 * destination IP address, zero-padded protocol number, and
1050 	 * segment length)
1051 	 */
1052 	bp->saddr = saddr;
1053 	bp->daddr = daddr;
1054 	bp->pad = 0;
1055 	bp->protocol = protocol;
1056 	bp->len = htons(tcplen);
1057 
1058 	sg_init_table(sg, 4);
1059 
1060 	sg_set_buf(&sg[block++], bp, sizeof(*bp));
1061 	nbytes += sizeof(*bp);
1062 
1063 	/* 2. the TCP header, excluding options, and assuming a
1064 	 * checksum of zero/
1065 	 */
1066 	old_checksum = th->check;
1067 	th->check = 0;
1068 	sg_set_buf(&sg[block++], th, sizeof(struct tcphdr));
1069 	nbytes += sizeof(struct tcphdr);
1070 
1071 	/* 3. the TCP segment data (if any) */
1072 	data_len = tcplen - (th->doff << 2);
1073 	if (data_len > 0) {
1074 		unsigned char *data = (unsigned char *)th + (th->doff << 2);
1075 		sg_set_buf(&sg[block++], data, data_len);
1076 		nbytes += data_len;
1077 	}
1078 
1079 	/* 4. an independently-specified key or password, known to both
1080 	 * TCPs and presumably connection-specific
1081 	 */
1082 	sg_set_buf(&sg[block++], key->key, key->keylen);
1083 	nbytes += key->keylen;
1084 
1085 	sg_mark_end(&sg[block - 1]);
1086 
1087 	/* Now store the Hash into the packet */
1088 	err = crypto_hash_init(desc);
1089 	if (err)
1090 		goto clear_hash;
1091 	err = crypto_hash_update(desc, sg, nbytes);
1092 	if (err)
1093 		goto clear_hash;
1094 	err = crypto_hash_final(desc, md5_hash);
1095 	if (err)
1096 		goto clear_hash;
1097 
1098 	/* Reset header, and free up the crypto */
1099 	tcp_put_md5sig_pool();
1100 	th->check = old_checksum;
1101 
1102 out:
1103 	return 0;
1104 clear_hash:
1105 	tcp_put_md5sig_pool();
1106 clear_hash_noput:
1107 	memset(md5_hash, 0, 16);
1108 	goto out;
1109 }
1110 
1111 int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1112 			 struct sock *sk,
1113 			 struct dst_entry *dst,
1114 			 struct request_sock *req,
1115 			 struct tcphdr *th, int protocol,
1116 			 int tcplen)
1117 {
1118 	__be32 saddr, daddr;
1119 
1120 	if (sk) {
1121 		saddr = inet_sk(sk)->saddr;
1122 		daddr = inet_sk(sk)->daddr;
1123 	} else {
1124 		struct rtable *rt = (struct rtable *)dst;
1125 		BUG_ON(!rt);
1126 		saddr = rt->rt_src;
1127 		daddr = rt->rt_dst;
1128 	}
1129 	return tcp_v4_do_calc_md5_hash(md5_hash, key,
1130 				       saddr, daddr,
1131 				       th, protocol, tcplen);
1132 }
1133 
1134 EXPORT_SYMBOL(tcp_v4_calc_md5_hash);
1135 
1136 static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1137 {
1138 	/*
1139 	 * This gets called for each TCP segment that arrives
1140 	 * so we want to be efficient.
1141 	 * We have 3 drop cases:
1142 	 * o No MD5 hash and one expected.
1143 	 * o MD5 hash and we're not expecting one.
1144 	 * o MD5 hash and its wrong.
1145 	 */
1146 	__u8 *hash_location = NULL;
1147 	struct tcp_md5sig_key *hash_expected;
1148 	const struct iphdr *iph = ip_hdr(skb);
1149 	struct tcphdr *th = tcp_hdr(skb);
1150 	int length = (th->doff << 2) - sizeof(struct tcphdr);
1151 	int genhash;
1152 	unsigned char *ptr;
1153 	unsigned char newhash[16];
1154 
1155 	hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1156 
1157 	/*
1158 	 * If the TCP option length is less than the TCP_MD5SIG
1159 	 * option length, then we can shortcut
1160 	 */
1161 	if (length < TCPOLEN_MD5SIG) {
1162 		if (hash_expected)
1163 			return 1;
1164 		else
1165 			return 0;
1166 	}
1167 
1168 	/* Okay, we can't shortcut - we have to grub through the options */
1169 	ptr = (unsigned char *)(th + 1);
1170 	while (length > 0) {
1171 		int opcode = *ptr++;
1172 		int opsize;
1173 
1174 		switch (opcode) {
1175 		case TCPOPT_EOL:
1176 			goto done_opts;
1177 		case TCPOPT_NOP:
1178 			length--;
1179 			continue;
1180 		default:
1181 			opsize = *ptr++;
1182 			if (opsize < 2)
1183 				goto done_opts;
1184 			if (opsize > length)
1185 				goto done_opts;
1186 
1187 			if (opcode == TCPOPT_MD5SIG) {
1188 				hash_location = ptr;
1189 				goto done_opts;
1190 			}
1191 		}
1192 		ptr += opsize-2;
1193 		length -= opsize;
1194 	}
1195 done_opts:
1196 	/* We've parsed the options - do we have a hash? */
1197 	if (!hash_expected && !hash_location)
1198 		return 0;
1199 
1200 	if (hash_expected && !hash_location) {
1201 		LIMIT_NETDEBUG(KERN_INFO "MD5 Hash expected but NOT found "
1202 			       "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
1203 			       NIPQUAD(iph->saddr), ntohs(th->source),
1204 			       NIPQUAD(iph->daddr), ntohs(th->dest));
1205 		return 1;
1206 	}
1207 
1208 	if (!hash_expected && hash_location) {
1209 		LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found "
1210 			       "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
1211 			       NIPQUAD(iph->saddr), ntohs(th->source),
1212 			       NIPQUAD(iph->daddr), ntohs(th->dest));
1213 		return 1;
1214 	}
1215 
1216 	/* Okay, so this is hash_expected and hash_location -
1217 	 * so we need to calculate the checksum.
1218 	 */
1219 	genhash = tcp_v4_do_calc_md5_hash(newhash,
1220 					  hash_expected,
1221 					  iph->saddr, iph->daddr,
1222 					  th, sk->sk_protocol,
1223 					  skb->len);
1224 
1225 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1226 		if (net_ratelimit()) {
1227 			printk(KERN_INFO "MD5 Hash failed for "
1228 			       "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n",
1229 			       NIPQUAD(iph->saddr), ntohs(th->source),
1230 			       NIPQUAD(iph->daddr), ntohs(th->dest),
1231 			       genhash ? " tcp_v4_calc_md5_hash failed" : "");
1232 		}
1233 		return 1;
1234 	}
1235 	return 0;
1236 }
1237 
1238 #endif
1239 
1240 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1241 	.family		=	PF_INET,
1242 	.obj_size	=	sizeof(struct tcp_request_sock),
1243 	.rtx_syn_ack	=	tcp_v4_send_synack,
1244 	.send_ack	=	tcp_v4_reqsk_send_ack,
1245 	.destructor	=	tcp_v4_reqsk_destructor,
1246 	.send_reset	=	tcp_v4_send_reset,
1247 };
1248 
1249 #ifdef CONFIG_TCP_MD5SIG
1250 static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1251 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1252 };
1253 #endif
1254 
1255 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1256 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1257 	.twsk_unique	= tcp_twsk_unique,
1258 	.twsk_destructor= tcp_twsk_destructor,
1259 };
1260 
1261 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1262 {
1263 	struct inet_request_sock *ireq;
1264 	struct tcp_options_received tmp_opt;
1265 	struct request_sock *req;
1266 	__be32 saddr = ip_hdr(skb)->saddr;
1267 	__be32 daddr = ip_hdr(skb)->daddr;
1268 	__u32 isn = TCP_SKB_CB(skb)->when;
1269 	struct dst_entry *dst = NULL;
1270 #ifdef CONFIG_SYN_COOKIES
1271 	int want_cookie = 0;
1272 #else
1273 #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1274 #endif
1275 
1276 	/* Never answer to SYNs send to broadcast or multicast */
1277 	if (((struct rtable *)skb->dst)->rt_flags &
1278 	    (RTCF_BROADCAST | RTCF_MULTICAST))
1279 		goto drop;
1280 
1281 	/* TW buckets are converted to open requests without
1282 	 * limitations, they conserve resources and peer is
1283 	 * evidently real one.
1284 	 */
1285 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1286 #ifdef CONFIG_SYN_COOKIES
1287 		if (sysctl_tcp_syncookies) {
1288 			want_cookie = 1;
1289 		} else
1290 #endif
1291 		goto drop;
1292 	}
1293 
1294 	/* Accept backlog is full. If we have already queued enough
1295 	 * of warm entries in syn queue, drop request. It is better than
1296 	 * clogging syn queue with openreqs with exponentially increasing
1297 	 * timeout.
1298 	 */
1299 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1300 		goto drop;
1301 
1302 	req = reqsk_alloc(&tcp_request_sock_ops);
1303 	if (!req)
1304 		goto drop;
1305 
1306 #ifdef CONFIG_TCP_MD5SIG
1307 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1308 #endif
1309 
1310 	tcp_clear_options(&tmp_opt);
1311 	tmp_opt.mss_clamp = 536;
1312 	tmp_opt.user_mss  = tcp_sk(sk)->rx_opt.user_mss;
1313 
1314 	tcp_parse_options(skb, &tmp_opt, 0);
1315 
1316 	if (want_cookie) {
1317 		tcp_clear_options(&tmp_opt);
1318 		tmp_opt.saw_tstamp = 0;
1319 	}
1320 
1321 	if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
1322 		/* Some OSes (unknown ones, but I see them on web server, which
1323 		 * contains information interesting only for windows'
1324 		 * users) do not send their stamp in SYN. It is easy case.
1325 		 * We simply do not advertise TS support.
1326 		 */
1327 		tmp_opt.saw_tstamp = 0;
1328 		tmp_opt.tstamp_ok  = 0;
1329 	}
1330 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1331 
1332 	tcp_openreq_init(req, &tmp_opt, skb);
1333 
1334 	if (security_inet_conn_request(sk, skb, req))
1335 		goto drop_and_free;
1336 
1337 	ireq = inet_rsk(req);
1338 	ireq->loc_addr = daddr;
1339 	ireq->rmt_addr = saddr;
1340 	ireq->opt = tcp_v4_save_options(sk, skb);
1341 	if (!want_cookie)
1342 		TCP_ECN_create_request(req, tcp_hdr(skb));
1343 
1344 	if (want_cookie) {
1345 #ifdef CONFIG_SYN_COOKIES
1346 		syn_flood_warning(skb);
1347 #endif
1348 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1349 	} else if (!isn) {
1350 		struct inet_peer *peer = NULL;
1351 
1352 		/* VJ's idea. We save last timestamp seen
1353 		 * from the destination in peer table, when entering
1354 		 * state TIME-WAIT, and check against it before
1355 		 * accepting new connection request.
1356 		 *
1357 		 * If "isn" is not zero, this request hit alive
1358 		 * timewait bucket, so that all the necessary checks
1359 		 * are made in the function processing timewait state.
1360 		 */
1361 		if (tmp_opt.saw_tstamp &&
1362 		    tcp_death_row.sysctl_tw_recycle &&
1363 		    (dst = inet_csk_route_req(sk, req)) != NULL &&
1364 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1365 		    peer->v4daddr == saddr) {
1366 			if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1367 			    (s32)(peer->tcp_ts - req->ts_recent) >
1368 							TCP_PAWS_WINDOW) {
1369 				NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
1370 				dst_release(dst);
1371 				goto drop_and_free;
1372 			}
1373 		}
1374 		/* Kill the following clause, if you dislike this way. */
1375 		else if (!sysctl_tcp_syncookies &&
1376 			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1377 			  (sysctl_max_syn_backlog >> 2)) &&
1378 			 (!peer || !peer->tcp_ts_stamp) &&
1379 			 (!dst || !dst_metric(dst, RTAX_RTT))) {
1380 			/* Without syncookies last quarter of
1381 			 * backlog is filled with destinations,
1382 			 * proven to be alive.
1383 			 * It means that we continue to communicate
1384 			 * to destinations, already remembered
1385 			 * to the moment of synflood.
1386 			 */
1387 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
1388 				       "request from %u.%u.%u.%u/%u\n",
1389 				       NIPQUAD(saddr),
1390 				       ntohs(tcp_hdr(skb)->source));
1391 			dst_release(dst);
1392 			goto drop_and_free;
1393 		}
1394 
1395 		isn = tcp_v4_init_sequence(skb);
1396 	}
1397 	tcp_rsk(req)->snt_isn = isn;
1398 
1399 	if (tcp_v4_send_synack(sk, req, dst))
1400 		goto drop_and_free;
1401 
1402 	if (want_cookie) {
1403 		reqsk_free(req);
1404 	} else {
1405 		inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1406 	}
1407 	return 0;
1408 
1409 drop_and_free:
1410 	reqsk_free(req);
1411 drop:
1412 	return 0;
1413 }
1414 
1415 
1416 /*
1417  * The three way handshake has completed - we got a valid synack -
1418  * now create the new socket.
1419  */
1420 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1421 				  struct request_sock *req,
1422 				  struct dst_entry *dst)
1423 {
1424 	struct inet_request_sock *ireq;
1425 	struct inet_sock *newinet;
1426 	struct tcp_sock *newtp;
1427 	struct sock *newsk;
1428 #ifdef CONFIG_TCP_MD5SIG
1429 	struct tcp_md5sig_key *key;
1430 #endif
1431 
1432 	if (sk_acceptq_is_full(sk))
1433 		goto exit_overflow;
1434 
1435 	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1436 		goto exit;
1437 
1438 	newsk = tcp_create_openreq_child(sk, req, skb);
1439 	if (!newsk)
1440 		goto exit;
1441 
1442 	newsk->sk_gso_type = SKB_GSO_TCPV4;
1443 	sk_setup_caps(newsk, dst);
1444 
1445 	newtp		      = tcp_sk(newsk);
1446 	newinet		      = inet_sk(newsk);
1447 	ireq		      = inet_rsk(req);
1448 	newinet->daddr	      = ireq->rmt_addr;
1449 	newinet->rcv_saddr    = ireq->loc_addr;
1450 	newinet->saddr	      = ireq->loc_addr;
1451 	newinet->opt	      = ireq->opt;
1452 	ireq->opt	      = NULL;
1453 	newinet->mc_index     = inet_iif(skb);
1454 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
1455 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1456 	if (newinet->opt)
1457 		inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1458 	newinet->id = newtp->write_seq ^ jiffies;
1459 
1460 	tcp_mtup_init(newsk);
1461 	tcp_sync_mss(newsk, dst_mtu(dst));
1462 	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1463 	tcp_initialize_rcv_mss(newsk);
1464 
1465 #ifdef CONFIG_TCP_MD5SIG
1466 	/* Copy over the MD5 key from the original socket */
1467 	if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
1468 		/*
1469 		 * We're using one, so create a matching key
1470 		 * on the newsk structure. If we fail to get
1471 		 * memory, then we end up not copying the key
1472 		 * across. Shucks.
1473 		 */
1474 		char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1475 		if (newkey != NULL)
1476 			tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
1477 					  newkey, key->keylen);
1478 	}
1479 #endif
1480 
1481 	__inet_hash(&tcp_hashinfo, newsk, 0);
1482 	__inet_inherit_port(&tcp_hashinfo, sk, newsk);
1483 
1484 	return newsk;
1485 
1486 exit_overflow:
1487 	NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1488 exit:
1489 	NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1490 	dst_release(dst);
1491 	return NULL;
1492 }
1493 
1494 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1495 {
1496 	struct tcphdr *th = tcp_hdr(skb);
1497 	const struct iphdr *iph = ip_hdr(skb);
1498 	struct sock *nsk;
1499 	struct request_sock **prev;
1500 	/* Find possible connection requests. */
1501 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1502 						       iph->saddr, iph->daddr);
1503 	if (req)
1504 		return tcp_check_req(sk, skb, req, prev);
1505 
1506 	nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source,
1507 				      iph->daddr, th->dest, inet_iif(skb));
1508 
1509 	if (nsk) {
1510 		if (nsk->sk_state != TCP_TIME_WAIT) {
1511 			bh_lock_sock(nsk);
1512 			return nsk;
1513 		}
1514 		inet_twsk_put(inet_twsk(nsk));
1515 		return NULL;
1516 	}
1517 
1518 #ifdef CONFIG_SYN_COOKIES
1519 	if (!th->rst && !th->syn && th->ack)
1520 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1521 #endif
1522 	return sk;
1523 }
1524 
1525 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1526 {
1527 	const struct iphdr *iph = ip_hdr(skb);
1528 
1529 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
1530 		if (!tcp_v4_check(skb->len, iph->saddr,
1531 				  iph->daddr, skb->csum)) {
1532 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1533 			return 0;
1534 		}
1535 	}
1536 
1537 	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1538 				       skb->len, IPPROTO_TCP, 0);
1539 
1540 	if (skb->len <= 76) {
1541 		return __skb_checksum_complete(skb);
1542 	}
1543 	return 0;
1544 }
1545 
1546 
1547 /* The socket must have it's spinlock held when we get
1548  * here.
1549  *
1550  * We have a potential double-lock case here, so even when
1551  * doing backlog processing we use the BH locking scheme.
1552  * This is because we cannot sleep with the original spinlock
1553  * held.
1554  */
1555 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1556 {
1557 	struct sock *rsk;
1558 #ifdef CONFIG_TCP_MD5SIG
1559 	/*
1560 	 * We really want to reject the packet as early as possible
1561 	 * if:
1562 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1563 	 *  o There is an MD5 option and we're not expecting one
1564 	 */
1565 	if (tcp_v4_inbound_md5_hash(sk, skb))
1566 		goto discard;
1567 #endif
1568 
1569 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1570 		TCP_CHECK_TIMER(sk);
1571 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1572 			rsk = sk;
1573 			goto reset;
1574 		}
1575 		TCP_CHECK_TIMER(sk);
1576 		return 0;
1577 	}
1578 
1579 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1580 		goto csum_err;
1581 
1582 	if (sk->sk_state == TCP_LISTEN) {
1583 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1584 		if (!nsk)
1585 			goto discard;
1586 
1587 		if (nsk != sk) {
1588 			if (tcp_child_process(sk, nsk, skb)) {
1589 				rsk = nsk;
1590 				goto reset;
1591 			}
1592 			return 0;
1593 		}
1594 	}
1595 
1596 	TCP_CHECK_TIMER(sk);
1597 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1598 		rsk = sk;
1599 		goto reset;
1600 	}
1601 	TCP_CHECK_TIMER(sk);
1602 	return 0;
1603 
1604 reset:
1605 	tcp_v4_send_reset(rsk, skb);
1606 discard:
1607 	kfree_skb(skb);
1608 	/* Be careful here. If this function gets more complicated and
1609 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
1610 	 * might be destroyed here. This current version compiles correctly,
1611 	 * but you have been warned.
1612 	 */
1613 	return 0;
1614 
1615 csum_err:
1616 	TCP_INC_STATS_BH(TCP_MIB_INERRS);
1617 	goto discard;
1618 }
1619 
1620 /*
1621  *	From tcp_input.c
1622  */
1623 
1624 int tcp_v4_rcv(struct sk_buff *skb)
1625 {
1626 	const struct iphdr *iph;
1627 	struct tcphdr *th;
1628 	struct sock *sk;
1629 	int ret;
1630 
1631 	if (skb->pkt_type != PACKET_HOST)
1632 		goto discard_it;
1633 
1634 	/* Count it even if it's bad */
1635 	TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1636 
1637 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1638 		goto discard_it;
1639 
1640 	th = tcp_hdr(skb);
1641 
1642 	if (th->doff < sizeof(struct tcphdr) / 4)
1643 		goto bad_packet;
1644 	if (!pskb_may_pull(skb, th->doff * 4))
1645 		goto discard_it;
1646 
1647 	/* An explanation is required here, I think.
1648 	 * Packet length and doff are validated by header prediction,
1649 	 * provided case of th->doff==0 is eliminated.
1650 	 * So, we defer the checks. */
1651 	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1652 		goto bad_packet;
1653 
1654 	th = tcp_hdr(skb);
1655 	iph = ip_hdr(skb);
1656 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1657 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1658 				    skb->len - th->doff * 4);
1659 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1660 	TCP_SKB_CB(skb)->when	 = 0;
1661 	TCP_SKB_CB(skb)->flags	 = iph->tos;
1662 	TCP_SKB_CB(skb)->sacked	 = 0;
1663 
1664 	sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source,
1665 			   iph->daddr, th->dest, inet_iif(skb));
1666 	if (!sk)
1667 		goto no_tcp_socket;
1668 
1669 process:
1670 	if (sk->sk_state == TCP_TIME_WAIT)
1671 		goto do_time_wait;
1672 
1673 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1674 		goto discard_and_relse;
1675 	nf_reset(skb);
1676 
1677 	if (sk_filter(sk, skb))
1678 		goto discard_and_relse;
1679 
1680 	skb->dev = NULL;
1681 
1682 	bh_lock_sock_nested(sk);
1683 	ret = 0;
1684 	if (!sock_owned_by_user(sk)) {
1685 #ifdef CONFIG_NET_DMA
1686 		struct tcp_sock *tp = tcp_sk(sk);
1687 		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1688 			tp->ucopy.dma_chan = get_softnet_dma();
1689 		if (tp->ucopy.dma_chan)
1690 			ret = tcp_v4_do_rcv(sk, skb);
1691 		else
1692 #endif
1693 		{
1694 			if (!tcp_prequeue(sk, skb))
1695 			ret = tcp_v4_do_rcv(sk, skb);
1696 		}
1697 	} else
1698 		sk_add_backlog(sk, skb);
1699 	bh_unlock_sock(sk);
1700 
1701 	sock_put(sk);
1702 
1703 	return ret;
1704 
1705 no_tcp_socket:
1706 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1707 		goto discard_it;
1708 
1709 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1710 bad_packet:
1711 		TCP_INC_STATS_BH(TCP_MIB_INERRS);
1712 	} else {
1713 		tcp_v4_send_reset(NULL, skb);
1714 	}
1715 
1716 discard_it:
1717 	/* Discard frame. */
1718 	kfree_skb(skb);
1719 	return 0;
1720 
1721 discard_and_relse:
1722 	sock_put(sk);
1723 	goto discard_it;
1724 
1725 do_time_wait:
1726 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1727 		inet_twsk_put(inet_twsk(sk));
1728 		goto discard_it;
1729 	}
1730 
1731 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1732 		TCP_INC_STATS_BH(TCP_MIB_INERRS);
1733 		inet_twsk_put(inet_twsk(sk));
1734 		goto discard_it;
1735 	}
1736 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1737 	case TCP_TW_SYN: {
1738 		struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
1739 							iph->daddr, th->dest,
1740 							inet_iif(skb));
1741 		if (sk2) {
1742 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1743 			inet_twsk_put(inet_twsk(sk));
1744 			sk = sk2;
1745 			goto process;
1746 		}
1747 		/* Fall through to ACK */
1748 	}
1749 	case TCP_TW_ACK:
1750 		tcp_v4_timewait_ack(sk, skb);
1751 		break;
1752 	case TCP_TW_RST:
1753 		goto no_tcp_socket;
1754 	case TCP_TW_SUCCESS:;
1755 	}
1756 	goto discard_it;
1757 }
1758 
1759 /* VJ's idea. Save last timestamp seen from this destination
1760  * and hold it at least for normal timewait interval to use for duplicate
1761  * segment detection in subsequent connections, before they enter synchronized
1762  * state.
1763  */
1764 
1765 int tcp_v4_remember_stamp(struct sock *sk)
1766 {
1767 	struct inet_sock *inet = inet_sk(sk);
1768 	struct tcp_sock *tp = tcp_sk(sk);
1769 	struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1770 	struct inet_peer *peer = NULL;
1771 	int release_it = 0;
1772 
1773 	if (!rt || rt->rt_dst != inet->daddr) {
1774 		peer = inet_getpeer(inet->daddr, 1);
1775 		release_it = 1;
1776 	} else {
1777 		if (!rt->peer)
1778 			rt_bind_peer(rt, 1);
1779 		peer = rt->peer;
1780 	}
1781 
1782 	if (peer) {
1783 		if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1784 		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
1785 		     peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1786 			peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1787 			peer->tcp_ts = tp->rx_opt.ts_recent;
1788 		}
1789 		if (release_it)
1790 			inet_putpeer(peer);
1791 		return 1;
1792 	}
1793 
1794 	return 0;
1795 }
1796 
1797 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1798 {
1799 	struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1800 
1801 	if (peer) {
1802 		const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1803 
1804 		if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1805 		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
1806 		     peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
1807 			peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
1808 			peer->tcp_ts	   = tcptw->tw_ts_recent;
1809 		}
1810 		inet_putpeer(peer);
1811 		return 1;
1812 	}
1813 
1814 	return 0;
1815 }
1816 
1817 struct inet_connection_sock_af_ops ipv4_specific = {
1818 	.queue_xmit	   = ip_queue_xmit,
1819 	.send_check	   = tcp_v4_send_check,
1820 	.rebuild_header	   = inet_sk_rebuild_header,
1821 	.conn_request	   = tcp_v4_conn_request,
1822 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
1823 	.remember_stamp	   = tcp_v4_remember_stamp,
1824 	.net_header_len	   = sizeof(struct iphdr),
1825 	.setsockopt	   = ip_setsockopt,
1826 	.getsockopt	   = ip_getsockopt,
1827 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1828 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1829 #ifdef CONFIG_COMPAT
1830 	.compat_setsockopt = compat_ip_setsockopt,
1831 	.compat_getsockopt = compat_ip_getsockopt,
1832 #endif
1833 };
1834 
1835 #ifdef CONFIG_TCP_MD5SIG
1836 static struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1837 	.md5_lookup		= tcp_v4_md5_lookup,
1838 	.calc_md5_hash		= tcp_v4_calc_md5_hash,
1839 	.md5_add		= tcp_v4_md5_add_func,
1840 	.md5_parse		= tcp_v4_parse_md5_keys,
1841 };
1842 #endif
1843 
1844 /* NOTE: A lot of things set to zero explicitly by call to
1845  *       sk_alloc() so need not be done here.
1846  */
1847 static int tcp_v4_init_sock(struct sock *sk)
1848 {
1849 	struct inet_connection_sock *icsk = inet_csk(sk);
1850 	struct tcp_sock *tp = tcp_sk(sk);
1851 
1852 	skb_queue_head_init(&tp->out_of_order_queue);
1853 	tcp_init_xmit_timers(sk);
1854 	tcp_prequeue_init(tp);
1855 
1856 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
1857 	tp->mdev = TCP_TIMEOUT_INIT;
1858 
1859 	/* So many TCP implementations out there (incorrectly) count the
1860 	 * initial SYN frame in their delayed-ACK and congestion control
1861 	 * algorithms that we must have the following bandaid to talk
1862 	 * efficiently to them.  -DaveM
1863 	 */
1864 	tp->snd_cwnd = 2;
1865 
1866 	/* See draft-stevens-tcpca-spec-01 for discussion of the
1867 	 * initialization of these values.
1868 	 */
1869 	tp->snd_ssthresh = 0x7fffffff;	/* Infinity */
1870 	tp->snd_cwnd_clamp = ~0;
1871 	tp->mss_cache = 536;
1872 
1873 	tp->reordering = sysctl_tcp_reordering;
1874 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1875 
1876 	sk->sk_state = TCP_CLOSE;
1877 
1878 	sk->sk_write_space = sk_stream_write_space;
1879 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1880 
1881 	icsk->icsk_af_ops = &ipv4_specific;
1882 	icsk->icsk_sync_mss = tcp_sync_mss;
1883 #ifdef CONFIG_TCP_MD5SIG
1884 	tp->af_specific = &tcp_sock_ipv4_specific;
1885 #endif
1886 
1887 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
1888 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1889 
1890 	atomic_inc(&tcp_sockets_allocated);
1891 
1892 	return 0;
1893 }
1894 
1895 int tcp_v4_destroy_sock(struct sock *sk)
1896 {
1897 	struct tcp_sock *tp = tcp_sk(sk);
1898 
1899 	tcp_clear_xmit_timers(sk);
1900 
1901 	tcp_cleanup_congestion_control(sk);
1902 
1903 	/* Cleanup up the write buffer. */
1904 	tcp_write_queue_purge(sk);
1905 
1906 	/* Cleans up our, hopefully empty, out_of_order_queue. */
1907 	__skb_queue_purge(&tp->out_of_order_queue);
1908 
1909 #ifdef CONFIG_TCP_MD5SIG
1910 	/* Clean up the MD5 key list, if any */
1911 	if (tp->md5sig_info) {
1912 		tcp_v4_clear_md5_list(sk);
1913 		kfree(tp->md5sig_info);
1914 		tp->md5sig_info = NULL;
1915 	}
1916 #endif
1917 
1918 #ifdef CONFIG_NET_DMA
1919 	/* Cleans up our sk_async_wait_queue */
1920 	__skb_queue_purge(&sk->sk_async_wait_queue);
1921 #endif
1922 
1923 	/* Clean prequeue, it must be empty really */
1924 	__skb_queue_purge(&tp->ucopy.prequeue);
1925 
1926 	/* Clean up a referenced TCP bind bucket. */
1927 	if (inet_csk(sk)->icsk_bind_hash)
1928 		inet_put_port(&tcp_hashinfo, sk);
1929 
1930 	/*
1931 	 * If sendmsg cached page exists, toss it.
1932 	 */
1933 	if (sk->sk_sndmsg_page) {
1934 		__free_page(sk->sk_sndmsg_page);
1935 		sk->sk_sndmsg_page = NULL;
1936 	}
1937 
1938 	atomic_dec(&tcp_sockets_allocated);
1939 
1940 	return 0;
1941 }
1942 
1943 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1944 
1945 #ifdef CONFIG_PROC_FS
1946 /* Proc filesystem TCP sock list dumping. */
1947 
1948 static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
1949 {
1950 	return hlist_empty(head) ? NULL :
1951 		list_entry(head->first, struct inet_timewait_sock, tw_node);
1952 }
1953 
1954 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1955 {
1956 	return tw->tw_node.next ?
1957 		hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1958 }
1959 
1960 static void *listening_get_next(struct seq_file *seq, void *cur)
1961 {
1962 	struct inet_connection_sock *icsk;
1963 	struct hlist_node *node;
1964 	struct sock *sk = cur;
1965 	struct tcp_iter_state* st = seq->private;
1966 
1967 	if (!sk) {
1968 		st->bucket = 0;
1969 		sk = sk_head(&tcp_hashinfo.listening_hash[0]);
1970 		goto get_sk;
1971 	}
1972 
1973 	++st->num;
1974 
1975 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
1976 		struct request_sock *req = cur;
1977 
1978 		icsk = inet_csk(st->syn_wait_sk);
1979 		req = req->dl_next;
1980 		while (1) {
1981 			while (req) {
1982 				if (req->rsk_ops->family == st->family) {
1983 					cur = req;
1984 					goto out;
1985 				}
1986 				req = req->dl_next;
1987 			}
1988 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1989 				break;
1990 get_req:
1991 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1992 		}
1993 		sk	  = sk_next(st->syn_wait_sk);
1994 		st->state = TCP_SEQ_STATE_LISTENING;
1995 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1996 	} else {
1997 		icsk = inet_csk(sk);
1998 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1999 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
2000 			goto start_req;
2001 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2002 		sk = sk_next(sk);
2003 	}
2004 get_sk:
2005 	sk_for_each_from(sk, node) {
2006 		if (sk->sk_family == st->family) {
2007 			cur = sk;
2008 			goto out;
2009 		}
2010 		icsk = inet_csk(sk);
2011 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2012 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2013 start_req:
2014 			st->uid		= sock_i_uid(sk);
2015 			st->syn_wait_sk = sk;
2016 			st->state	= TCP_SEQ_STATE_OPENREQ;
2017 			st->sbucket	= 0;
2018 			goto get_req;
2019 		}
2020 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2021 	}
2022 	if (++st->bucket < INET_LHTABLE_SIZE) {
2023 		sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
2024 		goto get_sk;
2025 	}
2026 	cur = NULL;
2027 out:
2028 	return cur;
2029 }
2030 
2031 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2032 {
2033 	void *rc = listening_get_next(seq, NULL);
2034 
2035 	while (rc && *pos) {
2036 		rc = listening_get_next(seq, rc);
2037 		--*pos;
2038 	}
2039 	return rc;
2040 }
2041 
2042 static void *established_get_first(struct seq_file *seq)
2043 {
2044 	struct tcp_iter_state* st = seq->private;
2045 	void *rc = NULL;
2046 
2047 	for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
2048 		struct sock *sk;
2049 		struct hlist_node *node;
2050 		struct inet_timewait_sock *tw;
2051 		rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2052 
2053 		read_lock_bh(lock);
2054 		sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2055 			if (sk->sk_family != st->family) {
2056 				continue;
2057 			}
2058 			rc = sk;
2059 			goto out;
2060 		}
2061 		st->state = TCP_SEQ_STATE_TIME_WAIT;
2062 		inet_twsk_for_each(tw, node,
2063 				   &tcp_hashinfo.ehash[st->bucket].twchain) {
2064 			if (tw->tw_family != st->family) {
2065 				continue;
2066 			}
2067 			rc = tw;
2068 			goto out;
2069 		}
2070 		read_unlock_bh(lock);
2071 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2072 	}
2073 out:
2074 	return rc;
2075 }
2076 
2077 static void *established_get_next(struct seq_file *seq, void *cur)
2078 {
2079 	struct sock *sk = cur;
2080 	struct inet_timewait_sock *tw;
2081 	struct hlist_node *node;
2082 	struct tcp_iter_state* st = seq->private;
2083 
2084 	++st->num;
2085 
2086 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2087 		tw = cur;
2088 		tw = tw_next(tw);
2089 get_tw:
2090 		while (tw && tw->tw_family != st->family) {
2091 			tw = tw_next(tw);
2092 		}
2093 		if (tw) {
2094 			cur = tw;
2095 			goto out;
2096 		}
2097 		read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2098 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2099 
2100 		if (++st->bucket < tcp_hashinfo.ehash_size) {
2101 			read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2102 			sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
2103 		} else {
2104 			cur = NULL;
2105 			goto out;
2106 		}
2107 	} else
2108 		sk = sk_next(sk);
2109 
2110 	sk_for_each_from(sk, node) {
2111 		if (sk->sk_family == st->family)
2112 			goto found;
2113 	}
2114 
2115 	st->state = TCP_SEQ_STATE_TIME_WAIT;
2116 	tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2117 	goto get_tw;
2118 found:
2119 	cur = sk;
2120 out:
2121 	return cur;
2122 }
2123 
2124 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2125 {
2126 	void *rc = established_get_first(seq);
2127 
2128 	while (rc && pos) {
2129 		rc = established_get_next(seq, rc);
2130 		--pos;
2131 	}
2132 	return rc;
2133 }
2134 
2135 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2136 {
2137 	void *rc;
2138 	struct tcp_iter_state* st = seq->private;
2139 
2140 	inet_listen_lock(&tcp_hashinfo);
2141 	st->state = TCP_SEQ_STATE_LISTENING;
2142 	rc	  = listening_get_idx(seq, &pos);
2143 
2144 	if (!rc) {
2145 		inet_listen_unlock(&tcp_hashinfo);
2146 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2147 		rc	  = established_get_idx(seq, pos);
2148 	}
2149 
2150 	return rc;
2151 }
2152 
2153 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2154 {
2155 	struct tcp_iter_state* st = seq->private;
2156 	st->state = TCP_SEQ_STATE_LISTENING;
2157 	st->num = 0;
2158 	return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2159 }
2160 
2161 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2162 {
2163 	void *rc = NULL;
2164 	struct tcp_iter_state* st;
2165 
2166 	if (v == SEQ_START_TOKEN) {
2167 		rc = tcp_get_idx(seq, 0);
2168 		goto out;
2169 	}
2170 	st = seq->private;
2171 
2172 	switch (st->state) {
2173 	case TCP_SEQ_STATE_OPENREQ:
2174 	case TCP_SEQ_STATE_LISTENING:
2175 		rc = listening_get_next(seq, v);
2176 		if (!rc) {
2177 			inet_listen_unlock(&tcp_hashinfo);
2178 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2179 			rc	  = established_get_first(seq);
2180 		}
2181 		break;
2182 	case TCP_SEQ_STATE_ESTABLISHED:
2183 	case TCP_SEQ_STATE_TIME_WAIT:
2184 		rc = established_get_next(seq, v);
2185 		break;
2186 	}
2187 out:
2188 	++*pos;
2189 	return rc;
2190 }
2191 
2192 static void tcp_seq_stop(struct seq_file *seq, void *v)
2193 {
2194 	struct tcp_iter_state* st = seq->private;
2195 
2196 	switch (st->state) {
2197 	case TCP_SEQ_STATE_OPENREQ:
2198 		if (v) {
2199 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2200 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2201 		}
2202 	case TCP_SEQ_STATE_LISTENING:
2203 		if (v != SEQ_START_TOKEN)
2204 			inet_listen_unlock(&tcp_hashinfo);
2205 		break;
2206 	case TCP_SEQ_STATE_TIME_WAIT:
2207 	case TCP_SEQ_STATE_ESTABLISHED:
2208 		if (v)
2209 			read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2210 		break;
2211 	}
2212 }
2213 
2214 static int tcp_seq_open(struct inode *inode, struct file *file)
2215 {
2216 	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2217 	struct seq_file *seq;
2218 	struct tcp_iter_state *s;
2219 	int rc;
2220 
2221 	if (unlikely(afinfo == NULL))
2222 		return -EINVAL;
2223 
2224 	s = kzalloc(sizeof(*s), GFP_KERNEL);
2225 	if (!s)
2226 		return -ENOMEM;
2227 	s->family		= afinfo->family;
2228 	s->seq_ops.start	= tcp_seq_start;
2229 	s->seq_ops.next		= tcp_seq_next;
2230 	s->seq_ops.show		= afinfo->seq_show;
2231 	s->seq_ops.stop		= tcp_seq_stop;
2232 
2233 	rc = seq_open(file, &s->seq_ops);
2234 	if (rc)
2235 		goto out_kfree;
2236 	seq	     = file->private_data;
2237 	seq->private = s;
2238 out:
2239 	return rc;
2240 out_kfree:
2241 	kfree(s);
2242 	goto out;
2243 }
2244 
2245 int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
2246 {
2247 	int rc = 0;
2248 	struct proc_dir_entry *p;
2249 
2250 	if (!afinfo)
2251 		return -EINVAL;
2252 	afinfo->seq_fops->owner		= afinfo->owner;
2253 	afinfo->seq_fops->open		= tcp_seq_open;
2254 	afinfo->seq_fops->read		= seq_read;
2255 	afinfo->seq_fops->llseek	= seq_lseek;
2256 	afinfo->seq_fops->release	= seq_release_private;
2257 
2258 	p = proc_net_fops_create(&init_net, afinfo->name, S_IRUGO, afinfo->seq_fops);
2259 	if (p)
2260 		p->data = afinfo;
2261 	else
2262 		rc = -ENOMEM;
2263 	return rc;
2264 }
2265 
2266 void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
2267 {
2268 	if (!afinfo)
2269 		return;
2270 	proc_net_remove(&init_net, afinfo->name);
2271 	memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
2272 }
2273 
2274 static void get_openreq4(struct sock *sk, struct request_sock *req,
2275 			 char *tmpbuf, int i, int uid)
2276 {
2277 	const struct inet_request_sock *ireq = inet_rsk(req);
2278 	int ttd = req->expires - jiffies;
2279 
2280 	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2281 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
2282 		i,
2283 		ireq->loc_addr,
2284 		ntohs(inet_sk(sk)->sport),
2285 		ireq->rmt_addr,
2286 		ntohs(ireq->rmt_port),
2287 		TCP_SYN_RECV,
2288 		0, 0, /* could print option size, but that is af dependent. */
2289 		1,    /* timers active (only the expire timer) */
2290 		jiffies_to_clock_t(ttd),
2291 		req->retrans,
2292 		uid,
2293 		0,  /* non standard timer */
2294 		0, /* open_requests have no inode */
2295 		atomic_read(&sk->sk_refcnt),
2296 		req);
2297 }
2298 
2299 static void get_tcp4_sock(struct sock *sk, char *tmpbuf, int i)
2300 {
2301 	int timer_active;
2302 	unsigned long timer_expires;
2303 	struct tcp_sock *tp = tcp_sk(sk);
2304 	const struct inet_connection_sock *icsk = inet_csk(sk);
2305 	struct inet_sock *inet = inet_sk(sk);
2306 	__be32 dest = inet->daddr;
2307 	__be32 src = inet->rcv_saddr;
2308 	__u16 destp = ntohs(inet->dport);
2309 	__u16 srcp = ntohs(inet->sport);
2310 
2311 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2312 		timer_active	= 1;
2313 		timer_expires	= icsk->icsk_timeout;
2314 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2315 		timer_active	= 4;
2316 		timer_expires	= icsk->icsk_timeout;
2317 	} else if (timer_pending(&sk->sk_timer)) {
2318 		timer_active	= 2;
2319 		timer_expires	= sk->sk_timer.expires;
2320 	} else {
2321 		timer_active	= 0;
2322 		timer_expires = jiffies;
2323 	}
2324 
2325 	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2326 			"%08X %5d %8d %lu %d %p %u %u %u %u %d",
2327 		i, src, srcp, dest, destp, sk->sk_state,
2328 		tp->write_seq - tp->snd_una,
2329 		sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
2330 					     (tp->rcv_nxt - tp->copied_seq),
2331 		timer_active,
2332 		jiffies_to_clock_t(timer_expires - jiffies),
2333 		icsk->icsk_retransmits,
2334 		sock_i_uid(sk),
2335 		icsk->icsk_probes_out,
2336 		sock_i_ino(sk),
2337 		atomic_read(&sk->sk_refcnt), sk,
2338 		icsk->icsk_rto,
2339 		icsk->icsk_ack.ato,
2340 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2341 		tp->snd_cwnd,
2342 		tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
2343 }
2344 
2345 static void get_timewait4_sock(struct inet_timewait_sock *tw,
2346 			       char *tmpbuf, int i)
2347 {
2348 	__be32 dest, src;
2349 	__u16 destp, srcp;
2350 	int ttd = tw->tw_ttd - jiffies;
2351 
2352 	if (ttd < 0)
2353 		ttd = 0;
2354 
2355 	dest  = tw->tw_daddr;
2356 	src   = tw->tw_rcv_saddr;
2357 	destp = ntohs(tw->tw_dport);
2358 	srcp  = ntohs(tw->tw_sport);
2359 
2360 	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2361 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
2362 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2363 		3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2364 		atomic_read(&tw->tw_refcnt), tw);
2365 }
2366 
2367 #define TMPSZ 150
2368 
2369 static int tcp4_seq_show(struct seq_file *seq, void *v)
2370 {
2371 	struct tcp_iter_state* st;
2372 	char tmpbuf[TMPSZ + 1];
2373 
2374 	if (v == SEQ_START_TOKEN) {
2375 		seq_printf(seq, "%-*s\n", TMPSZ - 1,
2376 			   "  sl  local_address rem_address   st tx_queue "
2377 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
2378 			   "inode");
2379 		goto out;
2380 	}
2381 	st = seq->private;
2382 
2383 	switch (st->state) {
2384 	case TCP_SEQ_STATE_LISTENING:
2385 	case TCP_SEQ_STATE_ESTABLISHED:
2386 		get_tcp4_sock(v, tmpbuf, st->num);
2387 		break;
2388 	case TCP_SEQ_STATE_OPENREQ:
2389 		get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid);
2390 		break;
2391 	case TCP_SEQ_STATE_TIME_WAIT:
2392 		get_timewait4_sock(v, tmpbuf, st->num);
2393 		break;
2394 	}
2395 	seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
2396 out:
2397 	return 0;
2398 }
2399 
2400 static struct file_operations tcp4_seq_fops;
2401 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2402 	.owner		= THIS_MODULE,
2403 	.name		= "tcp",
2404 	.family		= AF_INET,
2405 	.seq_show	= tcp4_seq_show,
2406 	.seq_fops	= &tcp4_seq_fops,
2407 };
2408 
2409 int __init tcp4_proc_init(void)
2410 {
2411 	return tcp_proc_register(&tcp4_seq_afinfo);
2412 }
2413 
2414 void tcp4_proc_exit(void)
2415 {
2416 	tcp_proc_unregister(&tcp4_seq_afinfo);
2417 }
2418 #endif /* CONFIG_PROC_FS */
2419 
2420 DEFINE_PROTO_INUSE(tcp)
2421 
2422 struct proto tcp_prot = {
2423 	.name			= "TCP",
2424 	.owner			= THIS_MODULE,
2425 	.close			= tcp_close,
2426 	.connect		= tcp_v4_connect,
2427 	.disconnect		= tcp_disconnect,
2428 	.accept			= inet_csk_accept,
2429 	.ioctl			= tcp_ioctl,
2430 	.init			= tcp_v4_init_sock,
2431 	.destroy		= tcp_v4_destroy_sock,
2432 	.shutdown		= tcp_shutdown,
2433 	.setsockopt		= tcp_setsockopt,
2434 	.getsockopt		= tcp_getsockopt,
2435 	.recvmsg		= tcp_recvmsg,
2436 	.backlog_rcv		= tcp_v4_do_rcv,
2437 	.hash			= tcp_v4_hash,
2438 	.unhash			= tcp_unhash,
2439 	.get_port		= tcp_v4_get_port,
2440 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2441 	.sockets_allocated	= &tcp_sockets_allocated,
2442 	.orphan_count		= &tcp_orphan_count,
2443 	.memory_allocated	= &tcp_memory_allocated,
2444 	.memory_pressure	= &tcp_memory_pressure,
2445 	.sysctl_mem		= sysctl_tcp_mem,
2446 	.sysctl_wmem		= sysctl_tcp_wmem,
2447 	.sysctl_rmem		= sysctl_tcp_rmem,
2448 	.max_header		= MAX_TCP_HEADER,
2449 	.obj_size		= sizeof(struct tcp_sock),
2450 	.twsk_prot		= &tcp_timewait_sock_ops,
2451 	.rsk_prot		= &tcp_request_sock_ops,
2452 #ifdef CONFIG_COMPAT
2453 	.compat_setsockopt	= compat_tcp_setsockopt,
2454 	.compat_getsockopt	= compat_tcp_getsockopt,
2455 #endif
2456 	REF_PROTO_INUSE(tcp)
2457 };
2458 
2459 void __init tcp_v4_init(struct net_proto_family *ops)
2460 {
2461 	if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW,
2462 				     IPPROTO_TCP) < 0)
2463 		panic("Failed to create the TCP control socket.\n");
2464 }
2465 
2466 EXPORT_SYMBOL(ipv4_specific);
2467 EXPORT_SYMBOL(tcp_hashinfo);
2468 EXPORT_SYMBOL(tcp_prot);
2469 EXPORT_SYMBOL(tcp_unhash);
2470 EXPORT_SYMBOL(tcp_v4_conn_request);
2471 EXPORT_SYMBOL(tcp_v4_connect);
2472 EXPORT_SYMBOL(tcp_v4_do_rcv);
2473 EXPORT_SYMBOL(tcp_v4_remember_stamp);
2474 EXPORT_SYMBOL(tcp_v4_send_check);
2475 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2476 
2477 #ifdef CONFIG_PROC_FS
2478 EXPORT_SYMBOL(tcp_proc_register);
2479 EXPORT_SYMBOL(tcp_proc_unregister);
2480 #endif
2481 EXPORT_SYMBOL(sysctl_tcp_low_latency);
2482 
2483