xref: /linux/net/ipv4/tcp_ipv4.c (revision f2ee442115c9b6219083c019939a9cc0c9abb2f8)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Implementation of the Transmission Control Protocol(TCP).
7  *
8  *		IPv4 specific functions
9  *
10  *
11  *		code split from:
12  *		linux/ipv4/tcp.c
13  *		linux/ipv4/tcp_input.c
14  *		linux/ipv4/tcp_output.c
15  *
16  *		See tcp.c for author information
17  *
18  *	This program is free software; you can redistribute it and/or
19  *      modify it under the terms of the GNU General Public License
20  *      as published by the Free Software Foundation; either version
21  *      2 of the License, or (at your option) any later version.
22  */
23 
24 /*
25  * Changes:
26  *		David S. Miller	:	New socket lookup architecture.
27  *					This code is dedicated to John Dyson.
28  *		David S. Miller :	Change semantics of established hash,
29  *					half is devoted to TIME_WAIT sockets
30  *					and the rest go in the other half.
31  *		Andi Kleen :		Add support for syncookies and fixed
32  *					some bugs: ip options weren't passed to
33  *					the TCP layer, missed a check for an
34  *					ACK bit.
35  *		Andi Kleen :		Implemented fast path mtu discovery.
36  *	     				Fixed many serious bugs in the
37  *					request_sock handling and moved
38  *					most of it into the af independent code.
39  *					Added tail drop and some other bugfixes.
40  *					Added new listen semantics.
41  *		Mike McLagan	:	Routing by source
42  *	Juan Jose Ciarlante:		ip_dynaddr bits
43  *		Andi Kleen:		various fixes.
44  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
45  *					coma.
46  *	Andi Kleen		:	Fix new listen.
47  *	Andi Kleen		:	Fix accept error reporting.
48  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
49  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
50  *					a single port at the same time.
51  */
52 
53 
54 #include <linux/bottom_half.h>
55 #include <linux/types.h>
56 #include <linux/fcntl.h>
57 #include <linux/module.h>
58 #include <linux/random.h>
59 #include <linux/cache.h>
60 #include <linux/jhash.h>
61 #include <linux/init.h>
62 #include <linux/times.h>
63 #include <linux/slab.h>
64 
65 #include <net/net_namespace.h>
66 #include <net/icmp.h>
67 #include <net/inet_hashtables.h>
68 #include <net/tcp.h>
69 #include <net/transp_v6.h>
70 #include <net/ipv6.h>
71 #include <net/inet_common.h>
72 #include <net/timewait_sock.h>
73 #include <net/xfrm.h>
74 #include <net/netdma.h>
75 #include <net/secure_seq.h>
76 
77 #include <linux/inet.h>
78 #include <linux/ipv6.h>
79 #include <linux/stddef.h>
80 #include <linux/proc_fs.h>
81 #include <linux/seq_file.h>
82 
83 #include <linux/crypto.h>
84 #include <linux/scatterlist.h>
85 
86 int sysctl_tcp_tw_reuse __read_mostly;
87 int sysctl_tcp_low_latency __read_mostly;
88 EXPORT_SYMBOL(sysctl_tcp_low_latency);
89 
90 
91 #ifdef CONFIG_TCP_MD5SIG
92 static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
93 						   __be32 addr);
94 static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
95 			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
96 #else
97 static inline
98 struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
99 {
100 	return NULL;
101 }
102 #endif
103 
104 struct inet_hashinfo tcp_hashinfo;
105 EXPORT_SYMBOL(tcp_hashinfo);
106 
107 static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
108 {
109 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
110 					  ip_hdr(skb)->saddr,
111 					  tcp_hdr(skb)->dest,
112 					  tcp_hdr(skb)->source);
113 }
114 
115 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
116 {
117 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
118 	struct tcp_sock *tp = tcp_sk(sk);
119 
120 	/* With PAWS, it is safe from the viewpoint
121 	   of data integrity. Even without PAWS it is safe provided sequence
122 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
123 
124 	   Actually, the idea is close to VJ's one, only timestamp cache is
125 	   held not per host, but per port pair and TW bucket is used as state
126 	   holder.
127 
128 	   If TW bucket has been already destroyed we fall back to VJ's scheme
129 	   and use initial timestamp retrieved from peer table.
130 	 */
131 	if (tcptw->tw_ts_recent_stamp &&
132 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
133 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
134 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
135 		if (tp->write_seq == 0)
136 			tp->write_seq = 1;
137 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
138 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
139 		sock_hold(sktw);
140 		return 1;
141 	}
142 
143 	return 0;
144 }
145 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
146 
147 /* This will initiate an outgoing connection. */
148 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
149 {
150 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
151 	struct inet_sock *inet = inet_sk(sk);
152 	struct tcp_sock *tp = tcp_sk(sk);
153 	__be16 orig_sport, orig_dport;
154 	__be32 daddr, nexthop;
155 	struct flowi4 *fl4;
156 	struct rtable *rt;
157 	int err;
158 	struct ip_options_rcu *inet_opt;
159 
160 	if (addr_len < sizeof(struct sockaddr_in))
161 		return -EINVAL;
162 
163 	if (usin->sin_family != AF_INET)
164 		return -EAFNOSUPPORT;
165 
166 	nexthop = daddr = usin->sin_addr.s_addr;
167 	inet_opt = rcu_dereference_protected(inet->inet_opt,
168 					     sock_owned_by_user(sk));
169 	if (inet_opt && inet_opt->opt.srr) {
170 		if (!daddr)
171 			return -EINVAL;
172 		nexthop = inet_opt->opt.faddr;
173 	}
174 
175 	orig_sport = inet->inet_sport;
176 	orig_dport = usin->sin_port;
177 	fl4 = &inet->cork.fl.u.ip4;
178 	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
179 			      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
180 			      IPPROTO_TCP,
181 			      orig_sport, orig_dport, sk, true);
182 	if (IS_ERR(rt)) {
183 		err = PTR_ERR(rt);
184 		if (err == -ENETUNREACH)
185 			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
186 		return err;
187 	}
188 
189 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
190 		ip_rt_put(rt);
191 		return -ENETUNREACH;
192 	}
193 
194 	if (!inet_opt || !inet_opt->opt.srr)
195 		daddr = fl4->daddr;
196 
197 	if (!inet->inet_saddr)
198 		inet->inet_saddr = fl4->saddr;
199 	inet->inet_rcv_saddr = inet->inet_saddr;
200 
201 	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
202 		/* Reset inherited state */
203 		tp->rx_opt.ts_recent	   = 0;
204 		tp->rx_opt.ts_recent_stamp = 0;
205 		tp->write_seq		   = 0;
206 	}
207 
208 	if (tcp_death_row.sysctl_tw_recycle &&
209 	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
210 		struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
211 		/*
212 		 * VJ's idea. We save last timestamp seen from
213 		 * the destination in peer table, when entering state
214 		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
215 		 * when trying new connection.
216 		 */
217 		if (peer) {
218 			inet_peer_refcheck(peer);
219 			if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
220 				tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
221 				tp->rx_opt.ts_recent = peer->tcp_ts;
222 			}
223 		}
224 	}
225 
226 	inet->inet_dport = usin->sin_port;
227 	inet->inet_daddr = daddr;
228 
229 	inet_csk(sk)->icsk_ext_hdr_len = 0;
230 	if (inet_opt)
231 		inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
232 
233 	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
234 
235 	/* Socket identity is still unknown (sport may be zero).
236 	 * However we set state to SYN-SENT and not releasing socket
237 	 * lock select source port, enter ourselves into the hash tables and
238 	 * complete initialization after this.
239 	 */
240 	tcp_set_state(sk, TCP_SYN_SENT);
241 	err = inet_hash_connect(&tcp_death_row, sk);
242 	if (err)
243 		goto failure;
244 
245 	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
246 			       inet->inet_sport, inet->inet_dport, sk);
247 	if (IS_ERR(rt)) {
248 		err = PTR_ERR(rt);
249 		rt = NULL;
250 		goto failure;
251 	}
252 	/* OK, now commit destination to socket.  */
253 	sk->sk_gso_type = SKB_GSO_TCPV4;
254 	sk_setup_caps(sk, &rt->dst);
255 
256 	if (!tp->write_seq)
257 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
258 							   inet->inet_daddr,
259 							   inet->inet_sport,
260 							   usin->sin_port);
261 
262 	inet->inet_id = tp->write_seq ^ jiffies;
263 
264 	err = tcp_connect(sk);
265 	rt = NULL;
266 	if (err)
267 		goto failure;
268 
269 	return 0;
270 
271 failure:
272 	/*
273 	 * This unhashes the socket and releases the local port,
274 	 * if necessary.
275 	 */
276 	tcp_set_state(sk, TCP_CLOSE);
277 	ip_rt_put(rt);
278 	sk->sk_route_caps = 0;
279 	inet->inet_dport = 0;
280 	return err;
281 }
282 EXPORT_SYMBOL(tcp_v4_connect);
283 
284 /*
285  * This routine does path mtu discovery as defined in RFC1191.
286  */
287 static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
288 {
289 	struct dst_entry *dst;
290 	struct inet_sock *inet = inet_sk(sk);
291 
292 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
293 	 * send out by Linux are always <576bytes so they should go through
294 	 * unfragmented).
295 	 */
296 	if (sk->sk_state == TCP_LISTEN)
297 		return;
298 
299 	/* We don't check in the destentry if pmtu discovery is forbidden
300 	 * on this route. We just assume that no packet_to_big packets
301 	 * are send back when pmtu discovery is not active.
302 	 * There is a small race when the user changes this flag in the
303 	 * route, but I think that's acceptable.
304 	 */
305 	if ((dst = __sk_dst_check(sk, 0)) == NULL)
306 		return;
307 
308 	dst->ops->update_pmtu(dst, mtu);
309 
310 	/* Something is about to be wrong... Remember soft error
311 	 * for the case, if this connection will not able to recover.
312 	 */
313 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
314 		sk->sk_err_soft = EMSGSIZE;
315 
316 	mtu = dst_mtu(dst);
317 
318 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
319 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
320 		tcp_sync_mss(sk, mtu);
321 
322 		/* Resend the TCP packet because it's
323 		 * clear that the old packet has been
324 		 * dropped. This is the new "fast" path mtu
325 		 * discovery.
326 		 */
327 		tcp_simple_retransmit(sk);
328 	} /* else let the usual retransmit timer handle it */
329 }
330 
331 /*
332  * This routine is called by the ICMP module when it gets some
333  * sort of error condition.  If err < 0 then the socket should
334  * be closed and the error returned to the user.  If err > 0
335  * it's just the icmp type << 8 | icmp code.  After adjustment
336  * header points to the first 8 bytes of the tcp header.  We need
337  * to find the appropriate port.
338  *
339  * The locking strategy used here is very "optimistic". When
340  * someone else accesses the socket the ICMP is just dropped
341  * and for some paths there is no check at all.
342  * A more general error queue to queue errors for later handling
343  * is probably better.
344  *
345  */
346 
347 void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
348 {
349 	const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
350 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
351 	struct inet_connection_sock *icsk;
352 	struct tcp_sock *tp;
353 	struct inet_sock *inet;
354 	const int type = icmp_hdr(icmp_skb)->type;
355 	const int code = icmp_hdr(icmp_skb)->code;
356 	struct sock *sk;
357 	struct sk_buff *skb;
358 	__u32 seq;
359 	__u32 remaining;
360 	int err;
361 	struct net *net = dev_net(icmp_skb->dev);
362 
363 	if (icmp_skb->len < (iph->ihl << 2) + 8) {
364 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
365 		return;
366 	}
367 
368 	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
369 			iph->saddr, th->source, inet_iif(icmp_skb));
370 	if (!sk) {
371 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
372 		return;
373 	}
374 	if (sk->sk_state == TCP_TIME_WAIT) {
375 		inet_twsk_put(inet_twsk(sk));
376 		return;
377 	}
378 
379 	bh_lock_sock(sk);
380 	/* If too many ICMPs get dropped on busy
381 	 * servers this needs to be solved differently.
382 	 */
383 	if (sock_owned_by_user(sk))
384 		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
385 
386 	if (sk->sk_state == TCP_CLOSE)
387 		goto out;
388 
389 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
390 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
391 		goto out;
392 	}
393 
394 	icsk = inet_csk(sk);
395 	tp = tcp_sk(sk);
396 	seq = ntohl(th->seq);
397 	if (sk->sk_state != TCP_LISTEN &&
398 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
399 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
400 		goto out;
401 	}
402 
403 	switch (type) {
404 	case ICMP_SOURCE_QUENCH:
405 		/* Just silently ignore these. */
406 		goto out;
407 	case ICMP_PARAMETERPROB:
408 		err = EPROTO;
409 		break;
410 	case ICMP_DEST_UNREACH:
411 		if (code > NR_ICMP_UNREACH)
412 			goto out;
413 
414 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
415 			if (!sock_owned_by_user(sk))
416 				do_pmtu_discovery(sk, iph, info);
417 			goto out;
418 		}
419 
420 		err = icmp_err_convert[code].errno;
421 		/* check if icmp_skb allows revert of backoff
422 		 * (see draft-zimmermann-tcp-lcd) */
423 		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
424 			break;
425 		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
426 		    !icsk->icsk_backoff)
427 			break;
428 
429 		if (sock_owned_by_user(sk))
430 			break;
431 
432 		icsk->icsk_backoff--;
433 		inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
434 			TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
435 		tcp_bound_rto(sk);
436 
437 		skb = tcp_write_queue_head(sk);
438 		BUG_ON(!skb);
439 
440 		remaining = icsk->icsk_rto - min(icsk->icsk_rto,
441 				tcp_time_stamp - TCP_SKB_CB(skb)->when);
442 
443 		if (remaining) {
444 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
445 						  remaining, TCP_RTO_MAX);
446 		} else {
447 			/* RTO revert clocked out retransmission.
448 			 * Will retransmit now */
449 			tcp_retransmit_timer(sk);
450 		}
451 
452 		break;
453 	case ICMP_TIME_EXCEEDED:
454 		err = EHOSTUNREACH;
455 		break;
456 	default:
457 		goto out;
458 	}
459 
460 	switch (sk->sk_state) {
461 		struct request_sock *req, **prev;
462 	case TCP_LISTEN:
463 		if (sock_owned_by_user(sk))
464 			goto out;
465 
466 		req = inet_csk_search_req(sk, &prev, th->dest,
467 					  iph->daddr, iph->saddr);
468 		if (!req)
469 			goto out;
470 
471 		/* ICMPs are not backlogged, hence we cannot get
472 		   an established socket here.
473 		 */
474 		WARN_ON(req->sk);
475 
476 		if (seq != tcp_rsk(req)->snt_isn) {
477 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
478 			goto out;
479 		}
480 
481 		/*
482 		 * Still in SYN_RECV, just remove it silently.
483 		 * There is no good way to pass the error to the newly
484 		 * created socket, and POSIX does not want network
485 		 * errors returned from accept().
486 		 */
487 		inet_csk_reqsk_queue_drop(sk, req, prev);
488 		goto out;
489 
490 	case TCP_SYN_SENT:
491 	case TCP_SYN_RECV:  /* Cannot happen.
492 			       It can f.e. if SYNs crossed.
493 			     */
494 		if (!sock_owned_by_user(sk)) {
495 			sk->sk_err = err;
496 
497 			sk->sk_error_report(sk);
498 
499 			tcp_done(sk);
500 		} else {
501 			sk->sk_err_soft = err;
502 		}
503 		goto out;
504 	}
505 
506 	/* If we've already connected we will keep trying
507 	 * until we time out, or the user gives up.
508 	 *
509 	 * rfc1122 4.2.3.9 allows to consider as hard errors
510 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
511 	 * but it is obsoleted by pmtu discovery).
512 	 *
513 	 * Note, that in modern internet, where routing is unreliable
514 	 * and in each dark corner broken firewalls sit, sending random
515 	 * errors ordered by their masters even this two messages finally lose
516 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
517 	 *
518 	 * Now we are in compliance with RFCs.
519 	 *							--ANK (980905)
520 	 */
521 
522 	inet = inet_sk(sk);
523 	if (!sock_owned_by_user(sk) && inet->recverr) {
524 		sk->sk_err = err;
525 		sk->sk_error_report(sk);
526 	} else	{ /* Only an error on timeout */
527 		sk->sk_err_soft = err;
528 	}
529 
530 out:
531 	bh_unlock_sock(sk);
532 	sock_put(sk);
533 }
534 
535 static void __tcp_v4_send_check(struct sk_buff *skb,
536 				__be32 saddr, __be32 daddr)
537 {
538 	struct tcphdr *th = tcp_hdr(skb);
539 
540 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
541 		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
542 		skb->csum_start = skb_transport_header(skb) - skb->head;
543 		skb->csum_offset = offsetof(struct tcphdr, check);
544 	} else {
545 		th->check = tcp_v4_check(skb->len, saddr, daddr,
546 					 csum_partial(th,
547 						      th->doff << 2,
548 						      skb->csum));
549 	}
550 }
551 
552 /* This routine computes an IPv4 TCP checksum. */
553 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
554 {
555 	const struct inet_sock *inet = inet_sk(sk);
556 
557 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
558 }
559 EXPORT_SYMBOL(tcp_v4_send_check);
560 
561 int tcp_v4_gso_send_check(struct sk_buff *skb)
562 {
563 	const struct iphdr *iph;
564 	struct tcphdr *th;
565 
566 	if (!pskb_may_pull(skb, sizeof(*th)))
567 		return -EINVAL;
568 
569 	iph = ip_hdr(skb);
570 	th = tcp_hdr(skb);
571 
572 	th->check = 0;
573 	skb->ip_summed = CHECKSUM_PARTIAL;
574 	__tcp_v4_send_check(skb, iph->saddr, iph->daddr);
575 	return 0;
576 }
577 
578 /*
579  *	This routine will send an RST to the other tcp.
580  *
581  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
582  *		      for reset.
583  *	Answer: if a packet caused RST, it is not for a socket
584  *		existing in our system, if it is matched to a socket,
585  *		it is just duplicate segment or bug in other side's TCP.
586  *		So that we build reply only basing on parameters
587  *		arrived with segment.
588  *	Exception: precedence violation. We do not implement it in any case.
589  */
590 
591 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
592 {
593 	const struct tcphdr *th = tcp_hdr(skb);
594 	struct {
595 		struct tcphdr th;
596 #ifdef CONFIG_TCP_MD5SIG
597 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
598 #endif
599 	} rep;
600 	struct ip_reply_arg arg;
601 #ifdef CONFIG_TCP_MD5SIG
602 	struct tcp_md5sig_key *key;
603 #endif
604 	struct net *net;
605 
606 	/* Never send a reset in response to a reset. */
607 	if (th->rst)
608 		return;
609 
610 	if (skb_rtable(skb)->rt_type != RTN_LOCAL)
611 		return;
612 
613 	/* Swap the send and the receive. */
614 	memset(&rep, 0, sizeof(rep));
615 	rep.th.dest   = th->source;
616 	rep.th.source = th->dest;
617 	rep.th.doff   = sizeof(struct tcphdr) / 4;
618 	rep.th.rst    = 1;
619 
620 	if (th->ack) {
621 		rep.th.seq = th->ack_seq;
622 	} else {
623 		rep.th.ack = 1;
624 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
625 				       skb->len - (th->doff << 2));
626 	}
627 
628 	memset(&arg, 0, sizeof(arg));
629 	arg.iov[0].iov_base = (unsigned char *)&rep;
630 	arg.iov[0].iov_len  = sizeof(rep.th);
631 
632 #ifdef CONFIG_TCP_MD5SIG
633 	key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
634 	if (key) {
635 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
636 				   (TCPOPT_NOP << 16) |
637 				   (TCPOPT_MD5SIG << 8) |
638 				   TCPOLEN_MD5SIG);
639 		/* Update length and the length the header thinks exists */
640 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
641 		rep.th.doff = arg.iov[0].iov_len / 4;
642 
643 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
644 				     key, ip_hdr(skb)->saddr,
645 				     ip_hdr(skb)->daddr, &rep.th);
646 	}
647 #endif
648 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
649 				      ip_hdr(skb)->saddr, /* XXX */
650 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
651 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
652 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
653 
654 	net = dev_net(skb_dst(skb)->dev);
655 	arg.tos = ip_hdr(skb)->tos;
656 	ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
657 		      &arg, arg.iov[0].iov_len);
658 
659 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
660 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
661 }
662 
663 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
664    outside socket context is ugly, certainly. What can I do?
665  */
666 
667 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
668 			    u32 win, u32 ts, int oif,
669 			    struct tcp_md5sig_key *key,
670 			    int reply_flags, u8 tos)
671 {
672 	const struct tcphdr *th = tcp_hdr(skb);
673 	struct {
674 		struct tcphdr th;
675 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
676 #ifdef CONFIG_TCP_MD5SIG
677 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
678 #endif
679 			];
680 	} rep;
681 	struct ip_reply_arg arg;
682 	struct net *net = dev_net(skb_dst(skb)->dev);
683 
684 	memset(&rep.th, 0, sizeof(struct tcphdr));
685 	memset(&arg, 0, sizeof(arg));
686 
687 	arg.iov[0].iov_base = (unsigned char *)&rep;
688 	arg.iov[0].iov_len  = sizeof(rep.th);
689 	if (ts) {
690 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
691 				   (TCPOPT_TIMESTAMP << 8) |
692 				   TCPOLEN_TIMESTAMP);
693 		rep.opt[1] = htonl(tcp_time_stamp);
694 		rep.opt[2] = htonl(ts);
695 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
696 	}
697 
698 	/* Swap the send and the receive. */
699 	rep.th.dest    = th->source;
700 	rep.th.source  = th->dest;
701 	rep.th.doff    = arg.iov[0].iov_len / 4;
702 	rep.th.seq     = htonl(seq);
703 	rep.th.ack_seq = htonl(ack);
704 	rep.th.ack     = 1;
705 	rep.th.window  = htons(win);
706 
707 #ifdef CONFIG_TCP_MD5SIG
708 	if (key) {
709 		int offset = (ts) ? 3 : 0;
710 
711 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
712 					  (TCPOPT_NOP << 16) |
713 					  (TCPOPT_MD5SIG << 8) |
714 					  TCPOLEN_MD5SIG);
715 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
716 		rep.th.doff = arg.iov[0].iov_len/4;
717 
718 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
719 				    key, ip_hdr(skb)->saddr,
720 				    ip_hdr(skb)->daddr, &rep.th);
721 	}
722 #endif
723 	arg.flags = reply_flags;
724 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
725 				      ip_hdr(skb)->saddr, /* XXX */
726 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
727 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
728 	if (oif)
729 		arg.bound_dev_if = oif;
730 	arg.tos = tos;
731 	ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
732 		      &arg, arg.iov[0].iov_len);
733 
734 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
735 }
736 
737 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
738 {
739 	struct inet_timewait_sock *tw = inet_twsk(sk);
740 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
741 
742 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
743 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
744 			tcptw->tw_ts_recent,
745 			tw->tw_bound_dev_if,
746 			tcp_twsk_md5_key(tcptw),
747 			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
748 			tw->tw_tos
749 			);
750 
751 	inet_twsk_put(tw);
752 }
753 
754 static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
755 				  struct request_sock *req)
756 {
757 	tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
758 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
759 			req->ts_recent,
760 			0,
761 			tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
762 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
763 			ip_hdr(skb)->tos);
764 }
765 
766 /*
767  *	Send a SYN-ACK after having received a SYN.
768  *	This still operates on a request_sock only, not on a big
769  *	socket.
770  */
771 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
772 			      struct request_sock *req,
773 			      struct request_values *rvp)
774 {
775 	const struct inet_request_sock *ireq = inet_rsk(req);
776 	struct flowi4 fl4;
777 	int err = -1;
778 	struct sk_buff * skb;
779 
780 	/* First, grab a route. */
781 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
782 		return -1;
783 
784 	skb = tcp_make_synack(sk, dst, req, rvp);
785 
786 	if (skb) {
787 		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
788 
789 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
790 					    ireq->rmt_addr,
791 					    ireq->opt);
792 		err = net_xmit_eval(err);
793 	}
794 
795 	dst_release(dst);
796 	return err;
797 }
798 
799 static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
800 			      struct request_values *rvp)
801 {
802 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
803 	return tcp_v4_send_synack(sk, NULL, req, rvp);
804 }
805 
806 /*
807  *	IPv4 request_sock destructor.
808  */
809 static void tcp_v4_reqsk_destructor(struct request_sock *req)
810 {
811 	kfree(inet_rsk(req)->opt);
812 }
813 
814 /*
815  * Return 1 if a syncookie should be sent
816  */
817 int tcp_syn_flood_action(struct sock *sk,
818 			 const struct sk_buff *skb,
819 			 const char *proto)
820 {
821 	const char *msg = "Dropping request";
822 	int want_cookie = 0;
823 	struct listen_sock *lopt;
824 
825 
826 
827 #ifdef CONFIG_SYN_COOKIES
828 	if (sysctl_tcp_syncookies) {
829 		msg = "Sending cookies";
830 		want_cookie = 1;
831 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
832 	} else
833 #endif
834 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
835 
836 	lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
837 	if (!lopt->synflood_warned) {
838 		lopt->synflood_warned = 1;
839 		pr_info("%s: Possible SYN flooding on port %d. %s. "
840 			" Check SNMP counters.\n",
841 			proto, ntohs(tcp_hdr(skb)->dest), msg);
842 	}
843 	return want_cookie;
844 }
845 EXPORT_SYMBOL(tcp_syn_flood_action);
846 
847 /*
848  * Save and compile IPv4 options into the request_sock if needed.
849  */
850 static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
851 						  struct sk_buff *skb)
852 {
853 	const struct ip_options *opt = &(IPCB(skb)->opt);
854 	struct ip_options_rcu *dopt = NULL;
855 
856 	if (opt && opt->optlen) {
857 		int opt_size = sizeof(*dopt) + opt->optlen;
858 
859 		dopt = kmalloc(opt_size, GFP_ATOMIC);
860 		if (dopt) {
861 			if (ip_options_echo(&dopt->opt, skb)) {
862 				kfree(dopt);
863 				dopt = NULL;
864 			}
865 		}
866 	}
867 	return dopt;
868 }
869 
870 #ifdef CONFIG_TCP_MD5SIG
871 /*
872  * RFC2385 MD5 checksumming requires a mapping of
873  * IP address->MD5 Key.
874  * We need to maintain these in the sk structure.
875  */
876 
877 /* Find the Key structure for an address.  */
878 static struct tcp_md5sig_key *
879 			tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
880 {
881 	struct tcp_sock *tp = tcp_sk(sk);
882 	int i;
883 
884 	if (!tp->md5sig_info || !tp->md5sig_info->entries4)
885 		return NULL;
886 	for (i = 0; i < tp->md5sig_info->entries4; i++) {
887 		if (tp->md5sig_info->keys4[i].addr == addr)
888 			return &tp->md5sig_info->keys4[i].base;
889 	}
890 	return NULL;
891 }
892 
893 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
894 					 struct sock *addr_sk)
895 {
896 	return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
897 }
898 EXPORT_SYMBOL(tcp_v4_md5_lookup);
899 
900 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
901 						      struct request_sock *req)
902 {
903 	return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
904 }
905 
906 /* This can be called on a newly created socket, from other files */
907 int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
908 		      u8 *newkey, u8 newkeylen)
909 {
910 	/* Add Key to the list */
911 	struct tcp_md5sig_key *key;
912 	struct tcp_sock *tp = tcp_sk(sk);
913 	struct tcp4_md5sig_key *keys;
914 
915 	key = tcp_v4_md5_do_lookup(sk, addr);
916 	if (key) {
917 		/* Pre-existing entry - just update that one. */
918 		kfree(key->key);
919 		key->key = newkey;
920 		key->keylen = newkeylen;
921 	} else {
922 		struct tcp_md5sig_info *md5sig;
923 
924 		if (!tp->md5sig_info) {
925 			tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
926 						  GFP_ATOMIC);
927 			if (!tp->md5sig_info) {
928 				kfree(newkey);
929 				return -ENOMEM;
930 			}
931 			sk_nocaps_add(sk, NETIF_F_GSO_MASK);
932 		}
933 
934 		md5sig = tp->md5sig_info;
935 		if (md5sig->entries4 == 0 &&
936 		    tcp_alloc_md5sig_pool(sk) == NULL) {
937 			kfree(newkey);
938 			return -ENOMEM;
939 		}
940 
941 		if (md5sig->alloced4 == md5sig->entries4) {
942 			keys = kmalloc((sizeof(*keys) *
943 					(md5sig->entries4 + 1)), GFP_ATOMIC);
944 			if (!keys) {
945 				kfree(newkey);
946 				if (md5sig->entries4 == 0)
947 					tcp_free_md5sig_pool();
948 				return -ENOMEM;
949 			}
950 
951 			if (md5sig->entries4)
952 				memcpy(keys, md5sig->keys4,
953 				       sizeof(*keys) * md5sig->entries4);
954 
955 			/* Free old key list, and reference new one */
956 			kfree(md5sig->keys4);
957 			md5sig->keys4 = keys;
958 			md5sig->alloced4++;
959 		}
960 		md5sig->entries4++;
961 		md5sig->keys4[md5sig->entries4 - 1].addr        = addr;
962 		md5sig->keys4[md5sig->entries4 - 1].base.key    = newkey;
963 		md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
964 	}
965 	return 0;
966 }
967 EXPORT_SYMBOL(tcp_v4_md5_do_add);
968 
969 static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
970 			       u8 *newkey, u8 newkeylen)
971 {
972 	return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
973 				 newkey, newkeylen);
974 }
975 
976 int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
977 {
978 	struct tcp_sock *tp = tcp_sk(sk);
979 	int i;
980 
981 	for (i = 0; i < tp->md5sig_info->entries4; i++) {
982 		if (tp->md5sig_info->keys4[i].addr == addr) {
983 			/* Free the key */
984 			kfree(tp->md5sig_info->keys4[i].base.key);
985 			tp->md5sig_info->entries4--;
986 
987 			if (tp->md5sig_info->entries4 == 0) {
988 				kfree(tp->md5sig_info->keys4);
989 				tp->md5sig_info->keys4 = NULL;
990 				tp->md5sig_info->alloced4 = 0;
991 				tcp_free_md5sig_pool();
992 			} else if (tp->md5sig_info->entries4 != i) {
993 				/* Need to do some manipulation */
994 				memmove(&tp->md5sig_info->keys4[i],
995 					&tp->md5sig_info->keys4[i+1],
996 					(tp->md5sig_info->entries4 - i) *
997 					 sizeof(struct tcp4_md5sig_key));
998 			}
999 			return 0;
1000 		}
1001 	}
1002 	return -ENOENT;
1003 }
1004 EXPORT_SYMBOL(tcp_v4_md5_do_del);
1005 
1006 static void tcp_v4_clear_md5_list(struct sock *sk)
1007 {
1008 	struct tcp_sock *tp = tcp_sk(sk);
1009 
1010 	/* Free each key, then the set of key keys,
1011 	 * the crypto element, and then decrement our
1012 	 * hold on the last resort crypto.
1013 	 */
1014 	if (tp->md5sig_info->entries4) {
1015 		int i;
1016 		for (i = 0; i < tp->md5sig_info->entries4; i++)
1017 			kfree(tp->md5sig_info->keys4[i].base.key);
1018 		tp->md5sig_info->entries4 = 0;
1019 		tcp_free_md5sig_pool();
1020 	}
1021 	if (tp->md5sig_info->keys4) {
1022 		kfree(tp->md5sig_info->keys4);
1023 		tp->md5sig_info->keys4 = NULL;
1024 		tp->md5sig_info->alloced4  = 0;
1025 	}
1026 }
1027 
1028 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1029 				 int optlen)
1030 {
1031 	struct tcp_md5sig cmd;
1032 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1033 	u8 *newkey;
1034 
1035 	if (optlen < sizeof(cmd))
1036 		return -EINVAL;
1037 
1038 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
1039 		return -EFAULT;
1040 
1041 	if (sin->sin_family != AF_INET)
1042 		return -EINVAL;
1043 
1044 	if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
1045 		if (!tcp_sk(sk)->md5sig_info)
1046 			return -ENOENT;
1047 		return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
1048 	}
1049 
1050 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1051 		return -EINVAL;
1052 
1053 	if (!tcp_sk(sk)->md5sig_info) {
1054 		struct tcp_sock *tp = tcp_sk(sk);
1055 		struct tcp_md5sig_info *p;
1056 
1057 		p = kzalloc(sizeof(*p), sk->sk_allocation);
1058 		if (!p)
1059 			return -EINVAL;
1060 
1061 		tp->md5sig_info = p;
1062 		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1063 	}
1064 
1065 	newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
1066 	if (!newkey)
1067 		return -ENOMEM;
1068 	return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1069 				 newkey, cmd.tcpm_keylen);
1070 }
1071 
1072 static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1073 					__be32 daddr, __be32 saddr, int nbytes)
1074 {
1075 	struct tcp4_pseudohdr *bp;
1076 	struct scatterlist sg;
1077 
1078 	bp = &hp->md5_blk.ip4;
1079 
1080 	/*
1081 	 * 1. the TCP pseudo-header (in the order: source IP address,
1082 	 * destination IP address, zero-padded protocol number, and
1083 	 * segment length)
1084 	 */
1085 	bp->saddr = saddr;
1086 	bp->daddr = daddr;
1087 	bp->pad = 0;
1088 	bp->protocol = IPPROTO_TCP;
1089 	bp->len = cpu_to_be16(nbytes);
1090 
1091 	sg_init_one(&sg, bp, sizeof(*bp));
1092 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1093 }
1094 
1095 static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1096 			       __be32 daddr, __be32 saddr, const struct tcphdr *th)
1097 {
1098 	struct tcp_md5sig_pool *hp;
1099 	struct hash_desc *desc;
1100 
1101 	hp = tcp_get_md5sig_pool();
1102 	if (!hp)
1103 		goto clear_hash_noput;
1104 	desc = &hp->md5_desc;
1105 
1106 	if (crypto_hash_init(desc))
1107 		goto clear_hash;
1108 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1109 		goto clear_hash;
1110 	if (tcp_md5_hash_header(hp, th))
1111 		goto clear_hash;
1112 	if (tcp_md5_hash_key(hp, key))
1113 		goto clear_hash;
1114 	if (crypto_hash_final(desc, md5_hash))
1115 		goto clear_hash;
1116 
1117 	tcp_put_md5sig_pool();
1118 	return 0;
1119 
1120 clear_hash:
1121 	tcp_put_md5sig_pool();
1122 clear_hash_noput:
1123 	memset(md5_hash, 0, 16);
1124 	return 1;
1125 }
1126 
1127 int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1128 			const struct sock *sk, const struct request_sock *req,
1129 			const struct sk_buff *skb)
1130 {
1131 	struct tcp_md5sig_pool *hp;
1132 	struct hash_desc *desc;
1133 	const struct tcphdr *th = tcp_hdr(skb);
1134 	__be32 saddr, daddr;
1135 
1136 	if (sk) {
1137 		saddr = inet_sk(sk)->inet_saddr;
1138 		daddr = inet_sk(sk)->inet_daddr;
1139 	} else if (req) {
1140 		saddr = inet_rsk(req)->loc_addr;
1141 		daddr = inet_rsk(req)->rmt_addr;
1142 	} else {
1143 		const struct iphdr *iph = ip_hdr(skb);
1144 		saddr = iph->saddr;
1145 		daddr = iph->daddr;
1146 	}
1147 
1148 	hp = tcp_get_md5sig_pool();
1149 	if (!hp)
1150 		goto clear_hash_noput;
1151 	desc = &hp->md5_desc;
1152 
1153 	if (crypto_hash_init(desc))
1154 		goto clear_hash;
1155 
1156 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1157 		goto clear_hash;
1158 	if (tcp_md5_hash_header(hp, th))
1159 		goto clear_hash;
1160 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1161 		goto clear_hash;
1162 	if (tcp_md5_hash_key(hp, key))
1163 		goto clear_hash;
1164 	if (crypto_hash_final(desc, md5_hash))
1165 		goto clear_hash;
1166 
1167 	tcp_put_md5sig_pool();
1168 	return 0;
1169 
1170 clear_hash:
1171 	tcp_put_md5sig_pool();
1172 clear_hash_noput:
1173 	memset(md5_hash, 0, 16);
1174 	return 1;
1175 }
1176 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1177 
1178 static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1179 {
1180 	/*
1181 	 * This gets called for each TCP segment that arrives
1182 	 * so we want to be efficient.
1183 	 * We have 3 drop cases:
1184 	 * o No MD5 hash and one expected.
1185 	 * o MD5 hash and we're not expecting one.
1186 	 * o MD5 hash and its wrong.
1187 	 */
1188 	const __u8 *hash_location = NULL;
1189 	struct tcp_md5sig_key *hash_expected;
1190 	const struct iphdr *iph = ip_hdr(skb);
1191 	const struct tcphdr *th = tcp_hdr(skb);
1192 	int genhash;
1193 	unsigned char newhash[16];
1194 
1195 	hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1196 	hash_location = tcp_parse_md5sig_option(th);
1197 
1198 	/* We've parsed the options - do we have a hash? */
1199 	if (!hash_expected && !hash_location)
1200 		return 0;
1201 
1202 	if (hash_expected && !hash_location) {
1203 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1204 		return 1;
1205 	}
1206 
1207 	if (!hash_expected && hash_location) {
1208 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1209 		return 1;
1210 	}
1211 
1212 	/* Okay, so this is hash_expected and hash_location -
1213 	 * so we need to calculate the checksum.
1214 	 */
1215 	genhash = tcp_v4_md5_hash_skb(newhash,
1216 				      hash_expected,
1217 				      NULL, NULL, skb);
1218 
1219 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1220 		if (net_ratelimit()) {
1221 			printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1222 			       &iph->saddr, ntohs(th->source),
1223 			       &iph->daddr, ntohs(th->dest),
1224 			       genhash ? " tcp_v4_calc_md5_hash failed" : "");
1225 		}
1226 		return 1;
1227 	}
1228 	return 0;
1229 }
1230 
1231 #endif
1232 
1233 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1234 	.family		=	PF_INET,
1235 	.obj_size	=	sizeof(struct tcp_request_sock),
1236 	.rtx_syn_ack	=	tcp_v4_rtx_synack,
1237 	.send_ack	=	tcp_v4_reqsk_send_ack,
1238 	.destructor	=	tcp_v4_reqsk_destructor,
1239 	.send_reset	=	tcp_v4_send_reset,
1240 	.syn_ack_timeout = 	tcp_syn_ack_timeout,
1241 };
1242 
1243 #ifdef CONFIG_TCP_MD5SIG
1244 static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1245 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1246 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1247 };
1248 #endif
1249 
1250 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1251 {
1252 	struct tcp_extend_values tmp_ext;
1253 	struct tcp_options_received tmp_opt;
1254 	const u8 *hash_location;
1255 	struct request_sock *req;
1256 	struct inet_request_sock *ireq;
1257 	struct tcp_sock *tp = tcp_sk(sk);
1258 	struct dst_entry *dst = NULL;
1259 	__be32 saddr = ip_hdr(skb)->saddr;
1260 	__be32 daddr = ip_hdr(skb)->daddr;
1261 	__u32 isn = TCP_SKB_CB(skb)->when;
1262 	int want_cookie = 0;
1263 
1264 	/* Never answer to SYNs send to broadcast or multicast */
1265 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1266 		goto drop;
1267 
1268 	/* TW buckets are converted to open requests without
1269 	 * limitations, they conserve resources and peer is
1270 	 * evidently real one.
1271 	 */
1272 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1273 		want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1274 		if (!want_cookie)
1275 			goto drop;
1276 	}
1277 
1278 	/* Accept backlog is full. If we have already queued enough
1279 	 * of warm entries in syn queue, drop request. It is better than
1280 	 * clogging syn queue with openreqs with exponentially increasing
1281 	 * timeout.
1282 	 */
1283 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1284 		goto drop;
1285 
1286 	req = inet_reqsk_alloc(&tcp_request_sock_ops);
1287 	if (!req)
1288 		goto drop;
1289 
1290 #ifdef CONFIG_TCP_MD5SIG
1291 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1292 #endif
1293 
1294 	tcp_clear_options(&tmp_opt);
1295 	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1296 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
1297 	tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1298 
1299 	if (tmp_opt.cookie_plus > 0 &&
1300 	    tmp_opt.saw_tstamp &&
1301 	    !tp->rx_opt.cookie_out_never &&
1302 	    (sysctl_tcp_cookie_size > 0 ||
1303 	     (tp->cookie_values != NULL &&
1304 	      tp->cookie_values->cookie_desired > 0))) {
1305 		u8 *c;
1306 		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1307 		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1308 
1309 		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1310 			goto drop_and_release;
1311 
1312 		/* Secret recipe starts with IP addresses */
1313 		*mess++ ^= (__force u32)daddr;
1314 		*mess++ ^= (__force u32)saddr;
1315 
1316 		/* plus variable length Initiator Cookie */
1317 		c = (u8 *)mess;
1318 		while (l-- > 0)
1319 			*c++ ^= *hash_location++;
1320 
1321 		want_cookie = 0;	/* not our kind of cookie */
1322 		tmp_ext.cookie_out_never = 0; /* false */
1323 		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1324 	} else if (!tp->rx_opt.cookie_in_always) {
1325 		/* redundant indications, but ensure initialization. */
1326 		tmp_ext.cookie_out_never = 1; /* true */
1327 		tmp_ext.cookie_plus = 0;
1328 	} else {
1329 		goto drop_and_release;
1330 	}
1331 	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1332 
1333 	if (want_cookie && !tmp_opt.saw_tstamp)
1334 		tcp_clear_options(&tmp_opt);
1335 
1336 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1337 	tcp_openreq_init(req, &tmp_opt, skb);
1338 
1339 	ireq = inet_rsk(req);
1340 	ireq->loc_addr = daddr;
1341 	ireq->rmt_addr = saddr;
1342 	ireq->no_srccheck = inet_sk(sk)->transparent;
1343 	ireq->opt = tcp_v4_save_options(sk, skb);
1344 
1345 	if (security_inet_conn_request(sk, skb, req))
1346 		goto drop_and_free;
1347 
1348 	if (!want_cookie || tmp_opt.tstamp_ok)
1349 		TCP_ECN_create_request(req, tcp_hdr(skb));
1350 
1351 	if (want_cookie) {
1352 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1353 		req->cookie_ts = tmp_opt.tstamp_ok;
1354 	} else if (!isn) {
1355 		struct inet_peer *peer = NULL;
1356 		struct flowi4 fl4;
1357 
1358 		/* VJ's idea. We save last timestamp seen
1359 		 * from the destination in peer table, when entering
1360 		 * state TIME-WAIT, and check against it before
1361 		 * accepting new connection request.
1362 		 *
1363 		 * If "isn" is not zero, this request hit alive
1364 		 * timewait bucket, so that all the necessary checks
1365 		 * are made in the function processing timewait state.
1366 		 */
1367 		if (tmp_opt.saw_tstamp &&
1368 		    tcp_death_row.sysctl_tw_recycle &&
1369 		    (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1370 		    fl4.daddr == saddr &&
1371 		    (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
1372 			inet_peer_refcheck(peer);
1373 			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1374 			    (s32)(peer->tcp_ts - req->ts_recent) >
1375 							TCP_PAWS_WINDOW) {
1376 				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1377 				goto drop_and_release;
1378 			}
1379 		}
1380 		/* Kill the following clause, if you dislike this way. */
1381 		else if (!sysctl_tcp_syncookies &&
1382 			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1383 			  (sysctl_max_syn_backlog >> 2)) &&
1384 			 (!peer || !peer->tcp_ts_stamp) &&
1385 			 (!dst || !dst_metric(dst, RTAX_RTT))) {
1386 			/* Without syncookies last quarter of
1387 			 * backlog is filled with destinations,
1388 			 * proven to be alive.
1389 			 * It means that we continue to communicate
1390 			 * to destinations, already remembered
1391 			 * to the moment of synflood.
1392 			 */
1393 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1394 				       &saddr, ntohs(tcp_hdr(skb)->source));
1395 			goto drop_and_release;
1396 		}
1397 
1398 		isn = tcp_v4_init_sequence(skb);
1399 	}
1400 	tcp_rsk(req)->snt_isn = isn;
1401 	tcp_rsk(req)->snt_synack = tcp_time_stamp;
1402 
1403 	if (tcp_v4_send_synack(sk, dst, req,
1404 			       (struct request_values *)&tmp_ext) ||
1405 	    want_cookie)
1406 		goto drop_and_free;
1407 
1408 	inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1409 	return 0;
1410 
1411 drop_and_release:
1412 	dst_release(dst);
1413 drop_and_free:
1414 	reqsk_free(req);
1415 drop:
1416 	return 0;
1417 }
1418 EXPORT_SYMBOL(tcp_v4_conn_request);
1419 
1420 
1421 /*
1422  * The three way handshake has completed - we got a valid synack -
1423  * now create the new socket.
1424  */
1425 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1426 				  struct request_sock *req,
1427 				  struct dst_entry *dst)
1428 {
1429 	struct inet_request_sock *ireq;
1430 	struct inet_sock *newinet;
1431 	struct tcp_sock *newtp;
1432 	struct sock *newsk;
1433 #ifdef CONFIG_TCP_MD5SIG
1434 	struct tcp_md5sig_key *key;
1435 #endif
1436 	struct ip_options_rcu *inet_opt;
1437 
1438 	if (sk_acceptq_is_full(sk))
1439 		goto exit_overflow;
1440 
1441 	newsk = tcp_create_openreq_child(sk, req, skb);
1442 	if (!newsk)
1443 		goto exit_nonewsk;
1444 
1445 	newsk->sk_gso_type = SKB_GSO_TCPV4;
1446 
1447 	newtp		      = tcp_sk(newsk);
1448 	newinet		      = inet_sk(newsk);
1449 	ireq		      = inet_rsk(req);
1450 	newinet->inet_daddr   = ireq->rmt_addr;
1451 	newinet->inet_rcv_saddr = ireq->loc_addr;
1452 	newinet->inet_saddr	      = ireq->loc_addr;
1453 	inet_opt	      = ireq->opt;
1454 	rcu_assign_pointer(newinet->inet_opt, inet_opt);
1455 	ireq->opt	      = NULL;
1456 	newinet->mc_index     = inet_iif(skb);
1457 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
1458 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1459 	if (inet_opt)
1460 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1461 	newinet->inet_id = newtp->write_seq ^ jiffies;
1462 
1463 	if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
1464 		goto put_and_exit;
1465 
1466 	sk_setup_caps(newsk, dst);
1467 
1468 	tcp_mtup_init(newsk);
1469 	tcp_sync_mss(newsk, dst_mtu(dst));
1470 	newtp->advmss = dst_metric_advmss(dst);
1471 	if (tcp_sk(sk)->rx_opt.user_mss &&
1472 	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1473 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1474 
1475 	tcp_initialize_rcv_mss(newsk);
1476 	if (tcp_rsk(req)->snt_synack)
1477 		tcp_valid_rtt_meas(newsk,
1478 		    tcp_time_stamp - tcp_rsk(req)->snt_synack);
1479 	newtp->total_retrans = req->retrans;
1480 
1481 #ifdef CONFIG_TCP_MD5SIG
1482 	/* Copy over the MD5 key from the original socket */
1483 	key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
1484 	if (key != NULL) {
1485 		/*
1486 		 * We're using one, so create a matching key
1487 		 * on the newsk structure. If we fail to get
1488 		 * memory, then we end up not copying the key
1489 		 * across. Shucks.
1490 		 */
1491 		char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1492 		if (newkey != NULL)
1493 			tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
1494 					  newkey, key->keylen);
1495 		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1496 	}
1497 #endif
1498 
1499 	if (__inet_inherit_port(sk, newsk) < 0)
1500 		goto put_and_exit;
1501 	__inet_hash_nolisten(newsk, NULL);
1502 
1503 	return newsk;
1504 
1505 exit_overflow:
1506 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1507 exit_nonewsk:
1508 	dst_release(dst);
1509 exit:
1510 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1511 	return NULL;
1512 put_and_exit:
1513 	bh_unlock_sock(newsk);
1514 	sock_put(newsk);
1515 	goto exit;
1516 }
1517 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1518 
1519 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1520 {
1521 	struct tcphdr *th = tcp_hdr(skb);
1522 	const struct iphdr *iph = ip_hdr(skb);
1523 	struct sock *nsk;
1524 	struct request_sock **prev;
1525 	/* Find possible connection requests. */
1526 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1527 						       iph->saddr, iph->daddr);
1528 	if (req)
1529 		return tcp_check_req(sk, skb, req, prev);
1530 
1531 	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1532 			th->source, iph->daddr, th->dest, inet_iif(skb));
1533 
1534 	if (nsk) {
1535 		if (nsk->sk_state != TCP_TIME_WAIT) {
1536 			bh_lock_sock(nsk);
1537 			return nsk;
1538 		}
1539 		inet_twsk_put(inet_twsk(nsk));
1540 		return NULL;
1541 	}
1542 
1543 #ifdef CONFIG_SYN_COOKIES
1544 	if (!th->syn)
1545 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1546 #endif
1547 	return sk;
1548 }
1549 
1550 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1551 {
1552 	const struct iphdr *iph = ip_hdr(skb);
1553 
1554 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
1555 		if (!tcp_v4_check(skb->len, iph->saddr,
1556 				  iph->daddr, skb->csum)) {
1557 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1558 			return 0;
1559 		}
1560 	}
1561 
1562 	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1563 				       skb->len, IPPROTO_TCP, 0);
1564 
1565 	if (skb->len <= 76) {
1566 		return __skb_checksum_complete(skb);
1567 	}
1568 	return 0;
1569 }
1570 
1571 
1572 /* The socket must have it's spinlock held when we get
1573  * here.
1574  *
1575  * We have a potential double-lock case here, so even when
1576  * doing backlog processing we use the BH locking scheme.
1577  * This is because we cannot sleep with the original spinlock
1578  * held.
1579  */
1580 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1581 {
1582 	struct sock *rsk;
1583 #ifdef CONFIG_TCP_MD5SIG
1584 	/*
1585 	 * We really want to reject the packet as early as possible
1586 	 * if:
1587 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1588 	 *  o There is an MD5 option and we're not expecting one
1589 	 */
1590 	if (tcp_v4_inbound_md5_hash(sk, skb))
1591 		goto discard;
1592 #endif
1593 
1594 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1595 		sock_rps_save_rxhash(sk, skb);
1596 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1597 			rsk = sk;
1598 			goto reset;
1599 		}
1600 		return 0;
1601 	}
1602 
1603 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1604 		goto csum_err;
1605 
1606 	if (sk->sk_state == TCP_LISTEN) {
1607 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1608 		if (!nsk)
1609 			goto discard;
1610 
1611 		if (nsk != sk) {
1612 			sock_rps_save_rxhash(nsk, skb);
1613 			if (tcp_child_process(sk, nsk, skb)) {
1614 				rsk = nsk;
1615 				goto reset;
1616 			}
1617 			return 0;
1618 		}
1619 	} else
1620 		sock_rps_save_rxhash(sk, skb);
1621 
1622 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1623 		rsk = sk;
1624 		goto reset;
1625 	}
1626 	return 0;
1627 
1628 reset:
1629 	tcp_v4_send_reset(rsk, skb);
1630 discard:
1631 	kfree_skb(skb);
1632 	/* Be careful here. If this function gets more complicated and
1633 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
1634 	 * might be destroyed here. This current version compiles correctly,
1635 	 * but you have been warned.
1636 	 */
1637 	return 0;
1638 
1639 csum_err:
1640 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1641 	goto discard;
1642 }
1643 EXPORT_SYMBOL(tcp_v4_do_rcv);
1644 
1645 /*
1646  *	From tcp_input.c
1647  */
1648 
1649 int tcp_v4_rcv(struct sk_buff *skb)
1650 {
1651 	const struct iphdr *iph;
1652 	const struct tcphdr *th;
1653 	struct sock *sk;
1654 	int ret;
1655 	struct net *net = dev_net(skb->dev);
1656 
1657 	if (skb->pkt_type != PACKET_HOST)
1658 		goto discard_it;
1659 
1660 	/* Count it even if it's bad */
1661 	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1662 
1663 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1664 		goto discard_it;
1665 
1666 	th = tcp_hdr(skb);
1667 
1668 	if (th->doff < sizeof(struct tcphdr) / 4)
1669 		goto bad_packet;
1670 	if (!pskb_may_pull(skb, th->doff * 4))
1671 		goto discard_it;
1672 
1673 	/* An explanation is required here, I think.
1674 	 * Packet length and doff are validated by header prediction,
1675 	 * provided case of th->doff==0 is eliminated.
1676 	 * So, we defer the checks. */
1677 	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1678 		goto bad_packet;
1679 
1680 	th = tcp_hdr(skb);
1681 	iph = ip_hdr(skb);
1682 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1683 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1684 				    skb->len - th->doff * 4);
1685 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1686 	TCP_SKB_CB(skb)->when	 = 0;
1687 	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1688 	TCP_SKB_CB(skb)->sacked	 = 0;
1689 
1690 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1691 	if (!sk)
1692 		goto no_tcp_socket;
1693 
1694 process:
1695 	if (sk->sk_state == TCP_TIME_WAIT)
1696 		goto do_time_wait;
1697 
1698 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1699 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1700 		goto discard_and_relse;
1701 	}
1702 
1703 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1704 		goto discard_and_relse;
1705 	nf_reset(skb);
1706 
1707 	if (sk_filter(sk, skb))
1708 		goto discard_and_relse;
1709 
1710 	skb->dev = NULL;
1711 
1712 	bh_lock_sock_nested(sk);
1713 	ret = 0;
1714 	if (!sock_owned_by_user(sk)) {
1715 #ifdef CONFIG_NET_DMA
1716 		struct tcp_sock *tp = tcp_sk(sk);
1717 		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1718 			tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1719 		if (tp->ucopy.dma_chan)
1720 			ret = tcp_v4_do_rcv(sk, skb);
1721 		else
1722 #endif
1723 		{
1724 			if (!tcp_prequeue(sk, skb))
1725 				ret = tcp_v4_do_rcv(sk, skb);
1726 		}
1727 	} else if (unlikely(sk_add_backlog(sk, skb))) {
1728 		bh_unlock_sock(sk);
1729 		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1730 		goto discard_and_relse;
1731 	}
1732 	bh_unlock_sock(sk);
1733 
1734 	sock_put(sk);
1735 
1736 	return ret;
1737 
1738 no_tcp_socket:
1739 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1740 		goto discard_it;
1741 
1742 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1743 bad_packet:
1744 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1745 	} else {
1746 		tcp_v4_send_reset(NULL, skb);
1747 	}
1748 
1749 discard_it:
1750 	/* Discard frame. */
1751 	kfree_skb(skb);
1752 	return 0;
1753 
1754 discard_and_relse:
1755 	sock_put(sk);
1756 	goto discard_it;
1757 
1758 do_time_wait:
1759 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1760 		inet_twsk_put(inet_twsk(sk));
1761 		goto discard_it;
1762 	}
1763 
1764 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1765 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1766 		inet_twsk_put(inet_twsk(sk));
1767 		goto discard_it;
1768 	}
1769 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1770 	case TCP_TW_SYN: {
1771 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1772 							&tcp_hashinfo,
1773 							iph->daddr, th->dest,
1774 							inet_iif(skb));
1775 		if (sk2) {
1776 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1777 			inet_twsk_put(inet_twsk(sk));
1778 			sk = sk2;
1779 			goto process;
1780 		}
1781 		/* Fall through to ACK */
1782 	}
1783 	case TCP_TW_ACK:
1784 		tcp_v4_timewait_ack(sk, skb);
1785 		break;
1786 	case TCP_TW_RST:
1787 		goto no_tcp_socket;
1788 	case TCP_TW_SUCCESS:;
1789 	}
1790 	goto discard_it;
1791 }
1792 
1793 struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1794 {
1795 	struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
1796 	struct inet_sock *inet = inet_sk(sk);
1797 	struct inet_peer *peer;
1798 
1799 	if (!rt ||
1800 	    inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
1801 		peer = inet_getpeer_v4(inet->inet_daddr, 1);
1802 		*release_it = true;
1803 	} else {
1804 		if (!rt->peer)
1805 			rt_bind_peer(rt, inet->inet_daddr, 1);
1806 		peer = rt->peer;
1807 		*release_it = false;
1808 	}
1809 
1810 	return peer;
1811 }
1812 EXPORT_SYMBOL(tcp_v4_get_peer);
1813 
1814 void *tcp_v4_tw_get_peer(struct sock *sk)
1815 {
1816 	const struct inet_timewait_sock *tw = inet_twsk(sk);
1817 
1818 	return inet_getpeer_v4(tw->tw_daddr, 1);
1819 }
1820 EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1821 
1822 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1823 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1824 	.twsk_unique	= tcp_twsk_unique,
1825 	.twsk_destructor= tcp_twsk_destructor,
1826 	.twsk_getpeer	= tcp_v4_tw_get_peer,
1827 };
1828 
1829 const struct inet_connection_sock_af_ops ipv4_specific = {
1830 	.queue_xmit	   = ip_queue_xmit,
1831 	.send_check	   = tcp_v4_send_check,
1832 	.rebuild_header	   = inet_sk_rebuild_header,
1833 	.conn_request	   = tcp_v4_conn_request,
1834 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
1835 	.get_peer	   = tcp_v4_get_peer,
1836 	.net_header_len	   = sizeof(struct iphdr),
1837 	.setsockopt	   = ip_setsockopt,
1838 	.getsockopt	   = ip_getsockopt,
1839 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1840 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1841 	.bind_conflict	   = inet_csk_bind_conflict,
1842 #ifdef CONFIG_COMPAT
1843 	.compat_setsockopt = compat_ip_setsockopt,
1844 	.compat_getsockopt = compat_ip_getsockopt,
1845 #endif
1846 };
1847 EXPORT_SYMBOL(ipv4_specific);
1848 
1849 #ifdef CONFIG_TCP_MD5SIG
1850 static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1851 	.md5_lookup		= tcp_v4_md5_lookup,
1852 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
1853 	.md5_add		= tcp_v4_md5_add_func,
1854 	.md5_parse		= tcp_v4_parse_md5_keys,
1855 };
1856 #endif
1857 
1858 /* NOTE: A lot of things set to zero explicitly by call to
1859  *       sk_alloc() so need not be done here.
1860  */
1861 static int tcp_v4_init_sock(struct sock *sk)
1862 {
1863 	struct inet_connection_sock *icsk = inet_csk(sk);
1864 	struct tcp_sock *tp = tcp_sk(sk);
1865 
1866 	skb_queue_head_init(&tp->out_of_order_queue);
1867 	tcp_init_xmit_timers(sk);
1868 	tcp_prequeue_init(tp);
1869 
1870 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
1871 	tp->mdev = TCP_TIMEOUT_INIT;
1872 
1873 	/* So many TCP implementations out there (incorrectly) count the
1874 	 * initial SYN frame in their delayed-ACK and congestion control
1875 	 * algorithms that we must have the following bandaid to talk
1876 	 * efficiently to them.  -DaveM
1877 	 */
1878 	tp->snd_cwnd = TCP_INIT_CWND;
1879 
1880 	/* See draft-stevens-tcpca-spec-01 for discussion of the
1881 	 * initialization of these values.
1882 	 */
1883 	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1884 	tp->snd_cwnd_clamp = ~0;
1885 	tp->mss_cache = TCP_MSS_DEFAULT;
1886 
1887 	tp->reordering = sysctl_tcp_reordering;
1888 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1889 
1890 	sk->sk_state = TCP_CLOSE;
1891 
1892 	sk->sk_write_space = sk_stream_write_space;
1893 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1894 
1895 	icsk->icsk_af_ops = &ipv4_specific;
1896 	icsk->icsk_sync_mss = tcp_sync_mss;
1897 #ifdef CONFIG_TCP_MD5SIG
1898 	tp->af_specific = &tcp_sock_ipv4_specific;
1899 #endif
1900 
1901 	/* TCP Cookie Transactions */
1902 	if (sysctl_tcp_cookie_size > 0) {
1903 		/* Default, cookies without s_data_payload. */
1904 		tp->cookie_values =
1905 			kzalloc(sizeof(*tp->cookie_values),
1906 				sk->sk_allocation);
1907 		if (tp->cookie_values != NULL)
1908 			kref_init(&tp->cookie_values->kref);
1909 	}
1910 	/* Presumed zeroed, in order of appearance:
1911 	 *	cookie_in_always, cookie_out_never,
1912 	 *	s_data_constant, s_data_in, s_data_out
1913 	 */
1914 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
1915 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1916 
1917 	local_bh_disable();
1918 	percpu_counter_inc(&tcp_sockets_allocated);
1919 	local_bh_enable();
1920 
1921 	return 0;
1922 }
1923 
1924 void tcp_v4_destroy_sock(struct sock *sk)
1925 {
1926 	struct tcp_sock *tp = tcp_sk(sk);
1927 
1928 	tcp_clear_xmit_timers(sk);
1929 
1930 	tcp_cleanup_congestion_control(sk);
1931 
1932 	/* Cleanup up the write buffer. */
1933 	tcp_write_queue_purge(sk);
1934 
1935 	/* Cleans up our, hopefully empty, out_of_order_queue. */
1936 	__skb_queue_purge(&tp->out_of_order_queue);
1937 
1938 #ifdef CONFIG_TCP_MD5SIG
1939 	/* Clean up the MD5 key list, if any */
1940 	if (tp->md5sig_info) {
1941 		tcp_v4_clear_md5_list(sk);
1942 		kfree(tp->md5sig_info);
1943 		tp->md5sig_info = NULL;
1944 	}
1945 #endif
1946 
1947 #ifdef CONFIG_NET_DMA
1948 	/* Cleans up our sk_async_wait_queue */
1949 	__skb_queue_purge(&sk->sk_async_wait_queue);
1950 #endif
1951 
1952 	/* Clean prequeue, it must be empty really */
1953 	__skb_queue_purge(&tp->ucopy.prequeue);
1954 
1955 	/* Clean up a referenced TCP bind bucket. */
1956 	if (inet_csk(sk)->icsk_bind_hash)
1957 		inet_put_port(sk);
1958 
1959 	/*
1960 	 * If sendmsg cached page exists, toss it.
1961 	 */
1962 	if (sk->sk_sndmsg_page) {
1963 		__free_page(sk->sk_sndmsg_page);
1964 		sk->sk_sndmsg_page = NULL;
1965 	}
1966 
1967 	/* TCP Cookie Transactions */
1968 	if (tp->cookie_values != NULL) {
1969 		kref_put(&tp->cookie_values->kref,
1970 			 tcp_cookie_values_release);
1971 		tp->cookie_values = NULL;
1972 	}
1973 
1974 	percpu_counter_dec(&tcp_sockets_allocated);
1975 }
1976 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1977 
1978 #ifdef CONFIG_PROC_FS
1979 /* Proc filesystem TCP sock list dumping. */
1980 
1981 static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1982 {
1983 	return hlist_nulls_empty(head) ? NULL :
1984 		list_entry(head->first, struct inet_timewait_sock, tw_node);
1985 }
1986 
1987 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1988 {
1989 	return !is_a_nulls(tw->tw_node.next) ?
1990 		hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1991 }
1992 
1993 /*
1994  * Get next listener socket follow cur.  If cur is NULL, get first socket
1995  * starting from bucket given in st->bucket; when st->bucket is zero the
1996  * very first socket in the hash table is returned.
1997  */
1998 static void *listening_get_next(struct seq_file *seq, void *cur)
1999 {
2000 	struct inet_connection_sock *icsk;
2001 	struct hlist_nulls_node *node;
2002 	struct sock *sk = cur;
2003 	struct inet_listen_hashbucket *ilb;
2004 	struct tcp_iter_state *st = seq->private;
2005 	struct net *net = seq_file_net(seq);
2006 
2007 	if (!sk) {
2008 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
2009 		spin_lock_bh(&ilb->lock);
2010 		sk = sk_nulls_head(&ilb->head);
2011 		st->offset = 0;
2012 		goto get_sk;
2013 	}
2014 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
2015 	++st->num;
2016 	++st->offset;
2017 
2018 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
2019 		struct request_sock *req = cur;
2020 
2021 		icsk = inet_csk(st->syn_wait_sk);
2022 		req = req->dl_next;
2023 		while (1) {
2024 			while (req) {
2025 				if (req->rsk_ops->family == st->family) {
2026 					cur = req;
2027 					goto out;
2028 				}
2029 				req = req->dl_next;
2030 			}
2031 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2032 				break;
2033 get_req:
2034 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2035 		}
2036 		sk	  = sk_nulls_next(st->syn_wait_sk);
2037 		st->state = TCP_SEQ_STATE_LISTENING;
2038 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2039 	} else {
2040 		icsk = inet_csk(sk);
2041 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2042 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
2043 			goto start_req;
2044 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2045 		sk = sk_nulls_next(sk);
2046 	}
2047 get_sk:
2048 	sk_nulls_for_each_from(sk, node) {
2049 		if (!net_eq(sock_net(sk), net))
2050 			continue;
2051 		if (sk->sk_family == st->family) {
2052 			cur = sk;
2053 			goto out;
2054 		}
2055 		icsk = inet_csk(sk);
2056 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2057 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2058 start_req:
2059 			st->uid		= sock_i_uid(sk);
2060 			st->syn_wait_sk = sk;
2061 			st->state	= TCP_SEQ_STATE_OPENREQ;
2062 			st->sbucket	= 0;
2063 			goto get_req;
2064 		}
2065 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2066 	}
2067 	spin_unlock_bh(&ilb->lock);
2068 	st->offset = 0;
2069 	if (++st->bucket < INET_LHTABLE_SIZE) {
2070 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
2071 		spin_lock_bh(&ilb->lock);
2072 		sk = sk_nulls_head(&ilb->head);
2073 		goto get_sk;
2074 	}
2075 	cur = NULL;
2076 out:
2077 	return cur;
2078 }
2079 
2080 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2081 {
2082 	struct tcp_iter_state *st = seq->private;
2083 	void *rc;
2084 
2085 	st->bucket = 0;
2086 	st->offset = 0;
2087 	rc = listening_get_next(seq, NULL);
2088 
2089 	while (rc && *pos) {
2090 		rc = listening_get_next(seq, rc);
2091 		--*pos;
2092 	}
2093 	return rc;
2094 }
2095 
2096 static inline int empty_bucket(struct tcp_iter_state *st)
2097 {
2098 	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2099 		hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2100 }
2101 
2102 /*
2103  * Get first established socket starting from bucket given in st->bucket.
2104  * If st->bucket is zero, the very first socket in the hash is returned.
2105  */
2106 static void *established_get_first(struct seq_file *seq)
2107 {
2108 	struct tcp_iter_state *st = seq->private;
2109 	struct net *net = seq_file_net(seq);
2110 	void *rc = NULL;
2111 
2112 	st->offset = 0;
2113 	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2114 		struct sock *sk;
2115 		struct hlist_nulls_node *node;
2116 		struct inet_timewait_sock *tw;
2117 		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2118 
2119 		/* Lockless fast path for the common case of empty buckets */
2120 		if (empty_bucket(st))
2121 			continue;
2122 
2123 		spin_lock_bh(lock);
2124 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2125 			if (sk->sk_family != st->family ||
2126 			    !net_eq(sock_net(sk), net)) {
2127 				continue;
2128 			}
2129 			rc = sk;
2130 			goto out;
2131 		}
2132 		st->state = TCP_SEQ_STATE_TIME_WAIT;
2133 		inet_twsk_for_each(tw, node,
2134 				   &tcp_hashinfo.ehash[st->bucket].twchain) {
2135 			if (tw->tw_family != st->family ||
2136 			    !net_eq(twsk_net(tw), net)) {
2137 				continue;
2138 			}
2139 			rc = tw;
2140 			goto out;
2141 		}
2142 		spin_unlock_bh(lock);
2143 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2144 	}
2145 out:
2146 	return rc;
2147 }
2148 
2149 static void *established_get_next(struct seq_file *seq, void *cur)
2150 {
2151 	struct sock *sk = cur;
2152 	struct inet_timewait_sock *tw;
2153 	struct hlist_nulls_node *node;
2154 	struct tcp_iter_state *st = seq->private;
2155 	struct net *net = seq_file_net(seq);
2156 
2157 	++st->num;
2158 	++st->offset;
2159 
2160 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2161 		tw = cur;
2162 		tw = tw_next(tw);
2163 get_tw:
2164 		while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2165 			tw = tw_next(tw);
2166 		}
2167 		if (tw) {
2168 			cur = tw;
2169 			goto out;
2170 		}
2171 		spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2172 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2173 
2174 		/* Look for next non empty bucket */
2175 		st->offset = 0;
2176 		while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2177 				empty_bucket(st))
2178 			;
2179 		if (st->bucket > tcp_hashinfo.ehash_mask)
2180 			return NULL;
2181 
2182 		spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2183 		sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2184 	} else
2185 		sk = sk_nulls_next(sk);
2186 
2187 	sk_nulls_for_each_from(sk, node) {
2188 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2189 			goto found;
2190 	}
2191 
2192 	st->state = TCP_SEQ_STATE_TIME_WAIT;
2193 	tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2194 	goto get_tw;
2195 found:
2196 	cur = sk;
2197 out:
2198 	return cur;
2199 }
2200 
2201 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2202 {
2203 	struct tcp_iter_state *st = seq->private;
2204 	void *rc;
2205 
2206 	st->bucket = 0;
2207 	rc = established_get_first(seq);
2208 
2209 	while (rc && pos) {
2210 		rc = established_get_next(seq, rc);
2211 		--pos;
2212 	}
2213 	return rc;
2214 }
2215 
2216 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2217 {
2218 	void *rc;
2219 	struct tcp_iter_state *st = seq->private;
2220 
2221 	st->state = TCP_SEQ_STATE_LISTENING;
2222 	rc	  = listening_get_idx(seq, &pos);
2223 
2224 	if (!rc) {
2225 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2226 		rc	  = established_get_idx(seq, pos);
2227 	}
2228 
2229 	return rc;
2230 }
2231 
2232 static void *tcp_seek_last_pos(struct seq_file *seq)
2233 {
2234 	struct tcp_iter_state *st = seq->private;
2235 	int offset = st->offset;
2236 	int orig_num = st->num;
2237 	void *rc = NULL;
2238 
2239 	switch (st->state) {
2240 	case TCP_SEQ_STATE_OPENREQ:
2241 	case TCP_SEQ_STATE_LISTENING:
2242 		if (st->bucket >= INET_LHTABLE_SIZE)
2243 			break;
2244 		st->state = TCP_SEQ_STATE_LISTENING;
2245 		rc = listening_get_next(seq, NULL);
2246 		while (offset-- && rc)
2247 			rc = listening_get_next(seq, rc);
2248 		if (rc)
2249 			break;
2250 		st->bucket = 0;
2251 		/* Fallthrough */
2252 	case TCP_SEQ_STATE_ESTABLISHED:
2253 	case TCP_SEQ_STATE_TIME_WAIT:
2254 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2255 		if (st->bucket > tcp_hashinfo.ehash_mask)
2256 			break;
2257 		rc = established_get_first(seq);
2258 		while (offset-- && rc)
2259 			rc = established_get_next(seq, rc);
2260 	}
2261 
2262 	st->num = orig_num;
2263 
2264 	return rc;
2265 }
2266 
2267 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2268 {
2269 	struct tcp_iter_state *st = seq->private;
2270 	void *rc;
2271 
2272 	if (*pos && *pos == st->last_pos) {
2273 		rc = tcp_seek_last_pos(seq);
2274 		if (rc)
2275 			goto out;
2276 	}
2277 
2278 	st->state = TCP_SEQ_STATE_LISTENING;
2279 	st->num = 0;
2280 	st->bucket = 0;
2281 	st->offset = 0;
2282 	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2283 
2284 out:
2285 	st->last_pos = *pos;
2286 	return rc;
2287 }
2288 
2289 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2290 {
2291 	struct tcp_iter_state *st = seq->private;
2292 	void *rc = NULL;
2293 
2294 	if (v == SEQ_START_TOKEN) {
2295 		rc = tcp_get_idx(seq, 0);
2296 		goto out;
2297 	}
2298 
2299 	switch (st->state) {
2300 	case TCP_SEQ_STATE_OPENREQ:
2301 	case TCP_SEQ_STATE_LISTENING:
2302 		rc = listening_get_next(seq, v);
2303 		if (!rc) {
2304 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2305 			st->bucket = 0;
2306 			st->offset = 0;
2307 			rc	  = established_get_first(seq);
2308 		}
2309 		break;
2310 	case TCP_SEQ_STATE_ESTABLISHED:
2311 	case TCP_SEQ_STATE_TIME_WAIT:
2312 		rc = established_get_next(seq, v);
2313 		break;
2314 	}
2315 out:
2316 	++*pos;
2317 	st->last_pos = *pos;
2318 	return rc;
2319 }
2320 
2321 static void tcp_seq_stop(struct seq_file *seq, void *v)
2322 {
2323 	struct tcp_iter_state *st = seq->private;
2324 
2325 	switch (st->state) {
2326 	case TCP_SEQ_STATE_OPENREQ:
2327 		if (v) {
2328 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2329 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2330 		}
2331 	case TCP_SEQ_STATE_LISTENING:
2332 		if (v != SEQ_START_TOKEN)
2333 			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2334 		break;
2335 	case TCP_SEQ_STATE_TIME_WAIT:
2336 	case TCP_SEQ_STATE_ESTABLISHED:
2337 		if (v)
2338 			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2339 		break;
2340 	}
2341 }
2342 
2343 int tcp_seq_open(struct inode *inode, struct file *file)
2344 {
2345 	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2346 	struct tcp_iter_state *s;
2347 	int err;
2348 
2349 	err = seq_open_net(inode, file, &afinfo->seq_ops,
2350 			  sizeof(struct tcp_iter_state));
2351 	if (err < 0)
2352 		return err;
2353 
2354 	s = ((struct seq_file *)file->private_data)->private;
2355 	s->family		= afinfo->family;
2356 	s->last_pos 		= 0;
2357 	return 0;
2358 }
2359 EXPORT_SYMBOL(tcp_seq_open);
2360 
2361 int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2362 {
2363 	int rc = 0;
2364 	struct proc_dir_entry *p;
2365 
2366 	afinfo->seq_ops.start		= tcp_seq_start;
2367 	afinfo->seq_ops.next		= tcp_seq_next;
2368 	afinfo->seq_ops.stop		= tcp_seq_stop;
2369 
2370 	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2371 			     afinfo->seq_fops, afinfo);
2372 	if (!p)
2373 		rc = -ENOMEM;
2374 	return rc;
2375 }
2376 EXPORT_SYMBOL(tcp_proc_register);
2377 
2378 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2379 {
2380 	proc_net_remove(net, afinfo->name);
2381 }
2382 EXPORT_SYMBOL(tcp_proc_unregister);
2383 
2384 static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2385 			 struct seq_file *f, int i, int uid, int *len)
2386 {
2387 	const struct inet_request_sock *ireq = inet_rsk(req);
2388 	int ttd = req->expires - jiffies;
2389 
2390 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2391 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
2392 		i,
2393 		ireq->loc_addr,
2394 		ntohs(inet_sk(sk)->inet_sport),
2395 		ireq->rmt_addr,
2396 		ntohs(ireq->rmt_port),
2397 		TCP_SYN_RECV,
2398 		0, 0, /* could print option size, but that is af dependent. */
2399 		1,    /* timers active (only the expire timer) */
2400 		jiffies_to_clock_t(ttd),
2401 		req->retrans,
2402 		uid,
2403 		0,  /* non standard timer */
2404 		0, /* open_requests have no inode */
2405 		atomic_read(&sk->sk_refcnt),
2406 		req,
2407 		len);
2408 }
2409 
2410 static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2411 {
2412 	int timer_active;
2413 	unsigned long timer_expires;
2414 	const struct tcp_sock *tp = tcp_sk(sk);
2415 	const struct inet_connection_sock *icsk = inet_csk(sk);
2416 	const struct inet_sock *inet = inet_sk(sk);
2417 	__be32 dest = inet->inet_daddr;
2418 	__be32 src = inet->inet_rcv_saddr;
2419 	__u16 destp = ntohs(inet->inet_dport);
2420 	__u16 srcp = ntohs(inet->inet_sport);
2421 	int rx_queue;
2422 
2423 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2424 		timer_active	= 1;
2425 		timer_expires	= icsk->icsk_timeout;
2426 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2427 		timer_active	= 4;
2428 		timer_expires	= icsk->icsk_timeout;
2429 	} else if (timer_pending(&sk->sk_timer)) {
2430 		timer_active	= 2;
2431 		timer_expires	= sk->sk_timer.expires;
2432 	} else {
2433 		timer_active	= 0;
2434 		timer_expires = jiffies;
2435 	}
2436 
2437 	if (sk->sk_state == TCP_LISTEN)
2438 		rx_queue = sk->sk_ack_backlog;
2439 	else
2440 		/*
2441 		 * because we dont lock socket, we might find a transient negative value
2442 		 */
2443 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2444 
2445 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2446 			"%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
2447 		i, src, srcp, dest, destp, sk->sk_state,
2448 		tp->write_seq - tp->snd_una,
2449 		rx_queue,
2450 		timer_active,
2451 		jiffies_to_clock_t(timer_expires - jiffies),
2452 		icsk->icsk_retransmits,
2453 		sock_i_uid(sk),
2454 		icsk->icsk_probes_out,
2455 		sock_i_ino(sk),
2456 		atomic_read(&sk->sk_refcnt), sk,
2457 		jiffies_to_clock_t(icsk->icsk_rto),
2458 		jiffies_to_clock_t(icsk->icsk_ack.ato),
2459 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2460 		tp->snd_cwnd,
2461 		tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2462 		len);
2463 }
2464 
2465 static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2466 			       struct seq_file *f, int i, int *len)
2467 {
2468 	__be32 dest, src;
2469 	__u16 destp, srcp;
2470 	int ttd = tw->tw_ttd - jiffies;
2471 
2472 	if (ttd < 0)
2473 		ttd = 0;
2474 
2475 	dest  = tw->tw_daddr;
2476 	src   = tw->tw_rcv_saddr;
2477 	destp = ntohs(tw->tw_dport);
2478 	srcp  = ntohs(tw->tw_sport);
2479 
2480 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2481 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
2482 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2483 		3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2484 		atomic_read(&tw->tw_refcnt), tw, len);
2485 }
2486 
2487 #define TMPSZ 150
2488 
2489 static int tcp4_seq_show(struct seq_file *seq, void *v)
2490 {
2491 	struct tcp_iter_state *st;
2492 	int len;
2493 
2494 	if (v == SEQ_START_TOKEN) {
2495 		seq_printf(seq, "%-*s\n", TMPSZ - 1,
2496 			   "  sl  local_address rem_address   st tx_queue "
2497 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
2498 			   "inode");
2499 		goto out;
2500 	}
2501 	st = seq->private;
2502 
2503 	switch (st->state) {
2504 	case TCP_SEQ_STATE_LISTENING:
2505 	case TCP_SEQ_STATE_ESTABLISHED:
2506 		get_tcp4_sock(v, seq, st->num, &len);
2507 		break;
2508 	case TCP_SEQ_STATE_OPENREQ:
2509 		get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2510 		break;
2511 	case TCP_SEQ_STATE_TIME_WAIT:
2512 		get_timewait4_sock(v, seq, st->num, &len);
2513 		break;
2514 	}
2515 	seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2516 out:
2517 	return 0;
2518 }
2519 
2520 static const struct file_operations tcp_afinfo_seq_fops = {
2521 	.owner   = THIS_MODULE,
2522 	.open    = tcp_seq_open,
2523 	.read    = seq_read,
2524 	.llseek  = seq_lseek,
2525 	.release = seq_release_net
2526 };
2527 
2528 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2529 	.name		= "tcp",
2530 	.family		= AF_INET,
2531 	.seq_fops	= &tcp_afinfo_seq_fops,
2532 	.seq_ops	= {
2533 		.show		= tcp4_seq_show,
2534 	},
2535 };
2536 
2537 static int __net_init tcp4_proc_init_net(struct net *net)
2538 {
2539 	return tcp_proc_register(net, &tcp4_seq_afinfo);
2540 }
2541 
2542 static void __net_exit tcp4_proc_exit_net(struct net *net)
2543 {
2544 	tcp_proc_unregister(net, &tcp4_seq_afinfo);
2545 }
2546 
2547 static struct pernet_operations tcp4_net_ops = {
2548 	.init = tcp4_proc_init_net,
2549 	.exit = tcp4_proc_exit_net,
2550 };
2551 
2552 int __init tcp4_proc_init(void)
2553 {
2554 	return register_pernet_subsys(&tcp4_net_ops);
2555 }
2556 
2557 void tcp4_proc_exit(void)
2558 {
2559 	unregister_pernet_subsys(&tcp4_net_ops);
2560 }
2561 #endif /* CONFIG_PROC_FS */
2562 
2563 struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2564 {
2565 	const struct iphdr *iph = skb_gro_network_header(skb);
2566 
2567 	switch (skb->ip_summed) {
2568 	case CHECKSUM_COMPLETE:
2569 		if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2570 				  skb->csum)) {
2571 			skb->ip_summed = CHECKSUM_UNNECESSARY;
2572 			break;
2573 		}
2574 
2575 		/* fall through */
2576 	case CHECKSUM_NONE:
2577 		NAPI_GRO_CB(skb)->flush = 1;
2578 		return NULL;
2579 	}
2580 
2581 	return tcp_gro_receive(head, skb);
2582 }
2583 
2584 int tcp4_gro_complete(struct sk_buff *skb)
2585 {
2586 	const struct iphdr *iph = ip_hdr(skb);
2587 	struct tcphdr *th = tcp_hdr(skb);
2588 
2589 	th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2590 				  iph->saddr, iph->daddr, 0);
2591 	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2592 
2593 	return tcp_gro_complete(skb);
2594 }
2595 
2596 struct proto tcp_prot = {
2597 	.name			= "TCP",
2598 	.owner			= THIS_MODULE,
2599 	.close			= tcp_close,
2600 	.connect		= tcp_v4_connect,
2601 	.disconnect		= tcp_disconnect,
2602 	.accept			= inet_csk_accept,
2603 	.ioctl			= tcp_ioctl,
2604 	.init			= tcp_v4_init_sock,
2605 	.destroy		= tcp_v4_destroy_sock,
2606 	.shutdown		= tcp_shutdown,
2607 	.setsockopt		= tcp_setsockopt,
2608 	.getsockopt		= tcp_getsockopt,
2609 	.recvmsg		= tcp_recvmsg,
2610 	.sendmsg		= tcp_sendmsg,
2611 	.sendpage		= tcp_sendpage,
2612 	.backlog_rcv		= tcp_v4_do_rcv,
2613 	.hash			= inet_hash,
2614 	.unhash			= inet_unhash,
2615 	.get_port		= inet_csk_get_port,
2616 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2617 	.sockets_allocated	= &tcp_sockets_allocated,
2618 	.orphan_count		= &tcp_orphan_count,
2619 	.memory_allocated	= &tcp_memory_allocated,
2620 	.memory_pressure	= &tcp_memory_pressure,
2621 	.sysctl_mem		= sysctl_tcp_mem,
2622 	.sysctl_wmem		= sysctl_tcp_wmem,
2623 	.sysctl_rmem		= sysctl_tcp_rmem,
2624 	.max_header		= MAX_TCP_HEADER,
2625 	.obj_size		= sizeof(struct tcp_sock),
2626 	.slab_flags		= SLAB_DESTROY_BY_RCU,
2627 	.twsk_prot		= &tcp_timewait_sock_ops,
2628 	.rsk_prot		= &tcp_request_sock_ops,
2629 	.h.hashinfo		= &tcp_hashinfo,
2630 	.no_autobind		= true,
2631 #ifdef CONFIG_COMPAT
2632 	.compat_setsockopt	= compat_tcp_setsockopt,
2633 	.compat_getsockopt	= compat_tcp_getsockopt,
2634 #endif
2635 };
2636 EXPORT_SYMBOL(tcp_prot);
2637 
2638 
2639 static int __net_init tcp_sk_init(struct net *net)
2640 {
2641 	return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2642 				    PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2643 }
2644 
2645 static void __net_exit tcp_sk_exit(struct net *net)
2646 {
2647 	inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2648 }
2649 
2650 static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2651 {
2652 	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2653 }
2654 
2655 static struct pernet_operations __net_initdata tcp_sk_ops = {
2656        .init	   = tcp_sk_init,
2657        .exit	   = tcp_sk_exit,
2658        .exit_batch = tcp_sk_exit_batch,
2659 };
2660 
2661 void __init tcp_v4_init(void)
2662 {
2663 	inet_hashinfo_init(&tcp_hashinfo);
2664 	if (register_pernet_subsys(&tcp_sk_ops))
2665 		panic("Failed to create the TCP control socket.\n");
2666 }
2667