xref: /linux/net/ipv4/tcp_ipv4.c (revision 800c5eb7b5eba6cb2a32738d763fd59f0fbcdde4)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Implementation of the Transmission Control Protocol(TCP).
7  *
8  *		IPv4 specific functions
9  *
10  *
11  *		code split from:
12  *		linux/ipv4/tcp.c
13  *		linux/ipv4/tcp_input.c
14  *		linux/ipv4/tcp_output.c
15  *
16  *		See tcp.c for author information
17  *
18  *	This program is free software; you can redistribute it and/or
19  *      modify it under the terms of the GNU General Public License
20  *      as published by the Free Software Foundation; either version
21  *      2 of the License, or (at your option) any later version.
22  */
23 
24 /*
25  * Changes:
26  *		David S. Miller	:	New socket lookup architecture.
27  *					This code is dedicated to John Dyson.
28  *		David S. Miller :	Change semantics of established hash,
29  *					half is devoted to TIME_WAIT sockets
30  *					and the rest go in the other half.
31  *		Andi Kleen :		Add support for syncookies and fixed
32  *					some bugs: ip options weren't passed to
33  *					the TCP layer, missed a check for an
34  *					ACK bit.
35  *		Andi Kleen :		Implemented fast path mtu discovery.
36  *	     				Fixed many serious bugs in the
37  *					request_sock handling and moved
38  *					most of it into the af independent code.
39  *					Added tail drop and some other bugfixes.
40  *					Added new listen semantics.
41  *		Mike McLagan	:	Routing by source
42  *	Juan Jose Ciarlante:		ip_dynaddr bits
43  *		Andi Kleen:		various fixes.
44  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
45  *					coma.
46  *	Andi Kleen		:	Fix new listen.
47  *	Andi Kleen		:	Fix accept error reporting.
48  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
49  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
50  *					a single port at the same time.
51  */
52 
53 
54 #include <linux/bottom_half.h>
55 #include <linux/types.h>
56 #include <linux/fcntl.h>
57 #include <linux/module.h>
58 #include <linux/random.h>
59 #include <linux/cache.h>
60 #include <linux/jhash.h>
61 #include <linux/init.h>
62 #include <linux/times.h>
63 #include <linux/slab.h>
64 
65 #include <net/net_namespace.h>
66 #include <net/icmp.h>
67 #include <net/inet_hashtables.h>
68 #include <net/tcp.h>
69 #include <net/transp_v6.h>
70 #include <net/ipv6.h>
71 #include <net/inet_common.h>
72 #include <net/timewait_sock.h>
73 #include <net/xfrm.h>
74 #include <net/netdma.h>
75 #include <net/secure_seq.h>
76 #include <net/tcp_memcontrol.h>
77 
78 #include <linux/inet.h>
79 #include <linux/ipv6.h>
80 #include <linux/stddef.h>
81 #include <linux/proc_fs.h>
82 #include <linux/seq_file.h>
83 
84 #include <linux/crypto.h>
85 #include <linux/scatterlist.h>
86 
87 int sysctl_tcp_tw_reuse __read_mostly;
88 int sysctl_tcp_low_latency __read_mostly;
89 EXPORT_SYMBOL(sysctl_tcp_low_latency);
90 
91 
92 #ifdef CONFIG_TCP_MD5SIG
93 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
94 			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
95 #endif
96 
97 struct inet_hashinfo tcp_hashinfo;
98 EXPORT_SYMBOL(tcp_hashinfo);
99 
100 static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
101 {
102 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
103 					  ip_hdr(skb)->saddr,
104 					  tcp_hdr(skb)->dest,
105 					  tcp_hdr(skb)->source);
106 }
107 
108 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
109 {
110 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
111 	struct tcp_sock *tp = tcp_sk(sk);
112 
113 	/* With PAWS, it is safe from the viewpoint
114 	   of data integrity. Even without PAWS it is safe provided sequence
115 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
116 
117 	   Actually, the idea is close to VJ's one, only timestamp cache is
118 	   held not per host, but per port pair and TW bucket is used as state
119 	   holder.
120 
121 	   If TW bucket has been already destroyed we fall back to VJ's scheme
122 	   and use initial timestamp retrieved from peer table.
123 	 */
124 	if (tcptw->tw_ts_recent_stamp &&
125 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
126 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
127 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
128 		if (tp->write_seq == 0)
129 			tp->write_seq = 1;
130 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
131 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
132 		sock_hold(sktw);
133 		return 1;
134 	}
135 
136 	return 0;
137 }
138 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
139 
140 /* This will initiate an outgoing connection. */
141 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
142 {
143 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
144 	struct inet_sock *inet = inet_sk(sk);
145 	struct tcp_sock *tp = tcp_sk(sk);
146 	__be16 orig_sport, orig_dport;
147 	__be32 daddr, nexthop;
148 	struct flowi4 *fl4;
149 	struct rtable *rt;
150 	int err;
151 	struct ip_options_rcu *inet_opt;
152 
153 	if (addr_len < sizeof(struct sockaddr_in))
154 		return -EINVAL;
155 
156 	if (usin->sin_family != AF_INET)
157 		return -EAFNOSUPPORT;
158 
159 	nexthop = daddr = usin->sin_addr.s_addr;
160 	inet_opt = rcu_dereference_protected(inet->inet_opt,
161 					     sock_owned_by_user(sk));
162 	if (inet_opt && inet_opt->opt.srr) {
163 		if (!daddr)
164 			return -EINVAL;
165 		nexthop = inet_opt->opt.faddr;
166 	}
167 
168 	orig_sport = inet->inet_sport;
169 	orig_dport = usin->sin_port;
170 	fl4 = &inet->cork.fl.u.ip4;
171 	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
172 			      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
173 			      IPPROTO_TCP,
174 			      orig_sport, orig_dport, sk, true);
175 	if (IS_ERR(rt)) {
176 		err = PTR_ERR(rt);
177 		if (err == -ENETUNREACH)
178 			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
179 		return err;
180 	}
181 
182 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
183 		ip_rt_put(rt);
184 		return -ENETUNREACH;
185 	}
186 
187 	if (!inet_opt || !inet_opt->opt.srr)
188 		daddr = fl4->daddr;
189 
190 	if (!inet->inet_saddr)
191 		inet->inet_saddr = fl4->saddr;
192 	inet->inet_rcv_saddr = inet->inet_saddr;
193 
194 	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
195 		/* Reset inherited state */
196 		tp->rx_opt.ts_recent	   = 0;
197 		tp->rx_opt.ts_recent_stamp = 0;
198 		tp->write_seq		   = 0;
199 	}
200 
201 	if (tcp_death_row.sysctl_tw_recycle &&
202 	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
203 		struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
204 		/*
205 		 * VJ's idea. We save last timestamp seen from
206 		 * the destination in peer table, when entering state
207 		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
208 		 * when trying new connection.
209 		 */
210 		if (peer) {
211 			inet_peer_refcheck(peer);
212 			if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
213 				tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
214 				tp->rx_opt.ts_recent = peer->tcp_ts;
215 			}
216 		}
217 	}
218 
219 	inet->inet_dport = usin->sin_port;
220 	inet->inet_daddr = daddr;
221 
222 	inet_csk(sk)->icsk_ext_hdr_len = 0;
223 	if (inet_opt)
224 		inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
225 
226 	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
227 
228 	/* Socket identity is still unknown (sport may be zero).
229 	 * However we set state to SYN-SENT and not releasing socket
230 	 * lock select source port, enter ourselves into the hash tables and
231 	 * complete initialization after this.
232 	 */
233 	tcp_set_state(sk, TCP_SYN_SENT);
234 	err = inet_hash_connect(&tcp_death_row, sk);
235 	if (err)
236 		goto failure;
237 
238 	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
239 			       inet->inet_sport, inet->inet_dport, sk);
240 	if (IS_ERR(rt)) {
241 		err = PTR_ERR(rt);
242 		rt = NULL;
243 		goto failure;
244 	}
245 	/* OK, now commit destination to socket.  */
246 	sk->sk_gso_type = SKB_GSO_TCPV4;
247 	sk_setup_caps(sk, &rt->dst);
248 
249 	if (!tp->write_seq)
250 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
251 							   inet->inet_daddr,
252 							   inet->inet_sport,
253 							   usin->sin_port);
254 
255 	inet->inet_id = tp->write_seq ^ jiffies;
256 
257 	err = tcp_connect(sk);
258 	rt = NULL;
259 	if (err)
260 		goto failure;
261 
262 	return 0;
263 
264 failure:
265 	/*
266 	 * This unhashes the socket and releases the local port,
267 	 * if necessary.
268 	 */
269 	tcp_set_state(sk, TCP_CLOSE);
270 	ip_rt_put(rt);
271 	sk->sk_route_caps = 0;
272 	inet->inet_dport = 0;
273 	return err;
274 }
275 EXPORT_SYMBOL(tcp_v4_connect);
276 
277 /*
278  * This routine does path mtu discovery as defined in RFC1191.
279  */
280 static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
281 {
282 	struct dst_entry *dst;
283 	struct inet_sock *inet = inet_sk(sk);
284 
285 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
286 	 * send out by Linux are always <576bytes so they should go through
287 	 * unfragmented).
288 	 */
289 	if (sk->sk_state == TCP_LISTEN)
290 		return;
291 
292 	/* We don't check in the destentry if pmtu discovery is forbidden
293 	 * on this route. We just assume that no packet_to_big packets
294 	 * are send back when pmtu discovery is not active.
295 	 * There is a small race when the user changes this flag in the
296 	 * route, but I think that's acceptable.
297 	 */
298 	if ((dst = __sk_dst_check(sk, 0)) == NULL)
299 		return;
300 
301 	dst->ops->update_pmtu(dst, mtu);
302 
303 	/* Something is about to be wrong... Remember soft error
304 	 * for the case, if this connection will not able to recover.
305 	 */
306 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
307 		sk->sk_err_soft = EMSGSIZE;
308 
309 	mtu = dst_mtu(dst);
310 
311 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
312 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
313 		tcp_sync_mss(sk, mtu);
314 
315 		/* Resend the TCP packet because it's
316 		 * clear that the old packet has been
317 		 * dropped. This is the new "fast" path mtu
318 		 * discovery.
319 		 */
320 		tcp_simple_retransmit(sk);
321 	} /* else let the usual retransmit timer handle it */
322 }
323 
324 /*
325  * This routine is called by the ICMP module when it gets some
326  * sort of error condition.  If err < 0 then the socket should
327  * be closed and the error returned to the user.  If err > 0
328  * it's just the icmp type << 8 | icmp code.  After adjustment
329  * header points to the first 8 bytes of the tcp header.  We need
330  * to find the appropriate port.
331  *
332  * The locking strategy used here is very "optimistic". When
333  * someone else accesses the socket the ICMP is just dropped
334  * and for some paths there is no check at all.
335  * A more general error queue to queue errors for later handling
336  * is probably better.
337  *
338  */
339 
340 void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
341 {
342 	const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
343 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
344 	struct inet_connection_sock *icsk;
345 	struct tcp_sock *tp;
346 	struct inet_sock *inet;
347 	const int type = icmp_hdr(icmp_skb)->type;
348 	const int code = icmp_hdr(icmp_skb)->code;
349 	struct sock *sk;
350 	struct sk_buff *skb;
351 	__u32 seq;
352 	__u32 remaining;
353 	int err;
354 	struct net *net = dev_net(icmp_skb->dev);
355 
356 	if (icmp_skb->len < (iph->ihl << 2) + 8) {
357 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
358 		return;
359 	}
360 
361 	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
362 			iph->saddr, th->source, inet_iif(icmp_skb));
363 	if (!sk) {
364 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
365 		return;
366 	}
367 	if (sk->sk_state == TCP_TIME_WAIT) {
368 		inet_twsk_put(inet_twsk(sk));
369 		return;
370 	}
371 
372 	bh_lock_sock(sk);
373 	/* If too many ICMPs get dropped on busy
374 	 * servers this needs to be solved differently.
375 	 */
376 	if (sock_owned_by_user(sk))
377 		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
378 
379 	if (sk->sk_state == TCP_CLOSE)
380 		goto out;
381 
382 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
383 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
384 		goto out;
385 	}
386 
387 	icsk = inet_csk(sk);
388 	tp = tcp_sk(sk);
389 	seq = ntohl(th->seq);
390 	if (sk->sk_state != TCP_LISTEN &&
391 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
392 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
393 		goto out;
394 	}
395 
396 	switch (type) {
397 	case ICMP_SOURCE_QUENCH:
398 		/* Just silently ignore these. */
399 		goto out;
400 	case ICMP_PARAMETERPROB:
401 		err = EPROTO;
402 		break;
403 	case ICMP_DEST_UNREACH:
404 		if (code > NR_ICMP_UNREACH)
405 			goto out;
406 
407 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
408 			if (!sock_owned_by_user(sk))
409 				do_pmtu_discovery(sk, iph, info);
410 			goto out;
411 		}
412 
413 		err = icmp_err_convert[code].errno;
414 		/* check if icmp_skb allows revert of backoff
415 		 * (see draft-zimmermann-tcp-lcd) */
416 		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
417 			break;
418 		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
419 		    !icsk->icsk_backoff)
420 			break;
421 
422 		if (sock_owned_by_user(sk))
423 			break;
424 
425 		icsk->icsk_backoff--;
426 		inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
427 			TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
428 		tcp_bound_rto(sk);
429 
430 		skb = tcp_write_queue_head(sk);
431 		BUG_ON(!skb);
432 
433 		remaining = icsk->icsk_rto - min(icsk->icsk_rto,
434 				tcp_time_stamp - TCP_SKB_CB(skb)->when);
435 
436 		if (remaining) {
437 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
438 						  remaining, TCP_RTO_MAX);
439 		} else {
440 			/* RTO revert clocked out retransmission.
441 			 * Will retransmit now */
442 			tcp_retransmit_timer(sk);
443 		}
444 
445 		break;
446 	case ICMP_TIME_EXCEEDED:
447 		err = EHOSTUNREACH;
448 		break;
449 	default:
450 		goto out;
451 	}
452 
453 	switch (sk->sk_state) {
454 		struct request_sock *req, **prev;
455 	case TCP_LISTEN:
456 		if (sock_owned_by_user(sk))
457 			goto out;
458 
459 		req = inet_csk_search_req(sk, &prev, th->dest,
460 					  iph->daddr, iph->saddr);
461 		if (!req)
462 			goto out;
463 
464 		/* ICMPs are not backlogged, hence we cannot get
465 		   an established socket here.
466 		 */
467 		WARN_ON(req->sk);
468 
469 		if (seq != tcp_rsk(req)->snt_isn) {
470 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
471 			goto out;
472 		}
473 
474 		/*
475 		 * Still in SYN_RECV, just remove it silently.
476 		 * There is no good way to pass the error to the newly
477 		 * created socket, and POSIX does not want network
478 		 * errors returned from accept().
479 		 */
480 		inet_csk_reqsk_queue_drop(sk, req, prev);
481 		goto out;
482 
483 	case TCP_SYN_SENT:
484 	case TCP_SYN_RECV:  /* Cannot happen.
485 			       It can f.e. if SYNs crossed.
486 			     */
487 		if (!sock_owned_by_user(sk)) {
488 			sk->sk_err = err;
489 
490 			sk->sk_error_report(sk);
491 
492 			tcp_done(sk);
493 		} else {
494 			sk->sk_err_soft = err;
495 		}
496 		goto out;
497 	}
498 
499 	/* If we've already connected we will keep trying
500 	 * until we time out, or the user gives up.
501 	 *
502 	 * rfc1122 4.2.3.9 allows to consider as hard errors
503 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
504 	 * but it is obsoleted by pmtu discovery).
505 	 *
506 	 * Note, that in modern internet, where routing is unreliable
507 	 * and in each dark corner broken firewalls sit, sending random
508 	 * errors ordered by their masters even this two messages finally lose
509 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
510 	 *
511 	 * Now we are in compliance with RFCs.
512 	 *							--ANK (980905)
513 	 */
514 
515 	inet = inet_sk(sk);
516 	if (!sock_owned_by_user(sk) && inet->recverr) {
517 		sk->sk_err = err;
518 		sk->sk_error_report(sk);
519 	} else	{ /* Only an error on timeout */
520 		sk->sk_err_soft = err;
521 	}
522 
523 out:
524 	bh_unlock_sock(sk);
525 	sock_put(sk);
526 }
527 
528 static void __tcp_v4_send_check(struct sk_buff *skb,
529 				__be32 saddr, __be32 daddr)
530 {
531 	struct tcphdr *th = tcp_hdr(skb);
532 
533 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
534 		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
535 		skb->csum_start = skb_transport_header(skb) - skb->head;
536 		skb->csum_offset = offsetof(struct tcphdr, check);
537 	} else {
538 		th->check = tcp_v4_check(skb->len, saddr, daddr,
539 					 csum_partial(th,
540 						      th->doff << 2,
541 						      skb->csum));
542 	}
543 }
544 
545 /* This routine computes an IPv4 TCP checksum. */
546 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
547 {
548 	const struct inet_sock *inet = inet_sk(sk);
549 
550 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
551 }
552 EXPORT_SYMBOL(tcp_v4_send_check);
553 
554 int tcp_v4_gso_send_check(struct sk_buff *skb)
555 {
556 	const struct iphdr *iph;
557 	struct tcphdr *th;
558 
559 	if (!pskb_may_pull(skb, sizeof(*th)))
560 		return -EINVAL;
561 
562 	iph = ip_hdr(skb);
563 	th = tcp_hdr(skb);
564 
565 	th->check = 0;
566 	skb->ip_summed = CHECKSUM_PARTIAL;
567 	__tcp_v4_send_check(skb, iph->saddr, iph->daddr);
568 	return 0;
569 }
570 
571 /*
572  *	This routine will send an RST to the other tcp.
573  *
574  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
575  *		      for reset.
576  *	Answer: if a packet caused RST, it is not for a socket
577  *		existing in our system, if it is matched to a socket,
578  *		it is just duplicate segment or bug in other side's TCP.
579  *		So that we build reply only basing on parameters
580  *		arrived with segment.
581  *	Exception: precedence violation. We do not implement it in any case.
582  */
583 
584 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
585 {
586 	const struct tcphdr *th = tcp_hdr(skb);
587 	struct {
588 		struct tcphdr th;
589 #ifdef CONFIG_TCP_MD5SIG
590 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
591 #endif
592 	} rep;
593 	struct ip_reply_arg arg;
594 #ifdef CONFIG_TCP_MD5SIG
595 	struct tcp_md5sig_key *key;
596 	const __u8 *hash_location = NULL;
597 	unsigned char newhash[16];
598 	int genhash;
599 	struct sock *sk1 = NULL;
600 #endif
601 	struct net *net;
602 
603 	/* Never send a reset in response to a reset. */
604 	if (th->rst)
605 		return;
606 
607 	if (skb_rtable(skb)->rt_type != RTN_LOCAL)
608 		return;
609 
610 	/* Swap the send and the receive. */
611 	memset(&rep, 0, sizeof(rep));
612 	rep.th.dest   = th->source;
613 	rep.th.source = th->dest;
614 	rep.th.doff   = sizeof(struct tcphdr) / 4;
615 	rep.th.rst    = 1;
616 
617 	if (th->ack) {
618 		rep.th.seq = th->ack_seq;
619 	} else {
620 		rep.th.ack = 1;
621 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
622 				       skb->len - (th->doff << 2));
623 	}
624 
625 	memset(&arg, 0, sizeof(arg));
626 	arg.iov[0].iov_base = (unsigned char *)&rep;
627 	arg.iov[0].iov_len  = sizeof(rep.th);
628 
629 #ifdef CONFIG_TCP_MD5SIG
630 	hash_location = tcp_parse_md5sig_option(th);
631 	if (!sk && hash_location) {
632 		/*
633 		 * active side is lost. Try to find listening socket through
634 		 * source port, and then find md5 key through listening socket.
635 		 * we are not loose security here:
636 		 * Incoming packet is checked with md5 hash with finding key,
637 		 * no RST generated if md5 hash doesn't match.
638 		 */
639 		sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
640 					     &tcp_hashinfo, ip_hdr(skb)->daddr,
641 					     ntohs(th->source), inet_iif(skb));
642 		/* don't send rst if it can't find key */
643 		if (!sk1)
644 			return;
645 		rcu_read_lock();
646 		key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
647 					&ip_hdr(skb)->saddr, AF_INET);
648 		if (!key)
649 			goto release_sk1;
650 
651 		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
652 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
653 			goto release_sk1;
654 	} else {
655 		key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
656 					     &ip_hdr(skb)->saddr,
657 					     AF_INET) : NULL;
658 	}
659 
660 	if (key) {
661 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
662 				   (TCPOPT_NOP << 16) |
663 				   (TCPOPT_MD5SIG << 8) |
664 				   TCPOLEN_MD5SIG);
665 		/* Update length and the length the header thinks exists */
666 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
667 		rep.th.doff = arg.iov[0].iov_len / 4;
668 
669 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
670 				     key, ip_hdr(skb)->saddr,
671 				     ip_hdr(skb)->daddr, &rep.th);
672 	}
673 #endif
674 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
675 				      ip_hdr(skb)->saddr, /* XXX */
676 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
677 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
678 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
679 
680 	net = dev_net(skb_dst(skb)->dev);
681 	arg.tos = ip_hdr(skb)->tos;
682 	ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
683 		      &arg, arg.iov[0].iov_len);
684 
685 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
686 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
687 
688 #ifdef CONFIG_TCP_MD5SIG
689 release_sk1:
690 	if (sk1) {
691 		rcu_read_unlock();
692 		sock_put(sk1);
693 	}
694 #endif
695 }
696 
697 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
698    outside socket context is ugly, certainly. What can I do?
699  */
700 
701 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
702 			    u32 win, u32 ts, int oif,
703 			    struct tcp_md5sig_key *key,
704 			    int reply_flags, u8 tos)
705 {
706 	const struct tcphdr *th = tcp_hdr(skb);
707 	struct {
708 		struct tcphdr th;
709 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
710 #ifdef CONFIG_TCP_MD5SIG
711 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
712 #endif
713 			];
714 	} rep;
715 	struct ip_reply_arg arg;
716 	struct net *net = dev_net(skb_dst(skb)->dev);
717 
718 	memset(&rep.th, 0, sizeof(struct tcphdr));
719 	memset(&arg, 0, sizeof(arg));
720 
721 	arg.iov[0].iov_base = (unsigned char *)&rep;
722 	arg.iov[0].iov_len  = sizeof(rep.th);
723 	if (ts) {
724 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
725 				   (TCPOPT_TIMESTAMP << 8) |
726 				   TCPOLEN_TIMESTAMP);
727 		rep.opt[1] = htonl(tcp_time_stamp);
728 		rep.opt[2] = htonl(ts);
729 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
730 	}
731 
732 	/* Swap the send and the receive. */
733 	rep.th.dest    = th->source;
734 	rep.th.source  = th->dest;
735 	rep.th.doff    = arg.iov[0].iov_len / 4;
736 	rep.th.seq     = htonl(seq);
737 	rep.th.ack_seq = htonl(ack);
738 	rep.th.ack     = 1;
739 	rep.th.window  = htons(win);
740 
741 #ifdef CONFIG_TCP_MD5SIG
742 	if (key) {
743 		int offset = (ts) ? 3 : 0;
744 
745 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
746 					  (TCPOPT_NOP << 16) |
747 					  (TCPOPT_MD5SIG << 8) |
748 					  TCPOLEN_MD5SIG);
749 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
750 		rep.th.doff = arg.iov[0].iov_len/4;
751 
752 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
753 				    key, ip_hdr(skb)->saddr,
754 				    ip_hdr(skb)->daddr, &rep.th);
755 	}
756 #endif
757 	arg.flags = reply_flags;
758 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
759 				      ip_hdr(skb)->saddr, /* XXX */
760 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
761 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
762 	if (oif)
763 		arg.bound_dev_if = oif;
764 	arg.tos = tos;
765 	ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
766 		      &arg, arg.iov[0].iov_len);
767 
768 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
769 }
770 
771 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
772 {
773 	struct inet_timewait_sock *tw = inet_twsk(sk);
774 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
775 
776 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
777 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
778 			tcptw->tw_ts_recent,
779 			tw->tw_bound_dev_if,
780 			tcp_twsk_md5_key(tcptw),
781 			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
782 			tw->tw_tos
783 			);
784 
785 	inet_twsk_put(tw);
786 }
787 
788 static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
789 				  struct request_sock *req)
790 {
791 	tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
792 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
793 			req->ts_recent,
794 			0,
795 			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
796 					  AF_INET),
797 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
798 			ip_hdr(skb)->tos);
799 }
800 
801 /*
802  *	Send a SYN-ACK after having received a SYN.
803  *	This still operates on a request_sock only, not on a big
804  *	socket.
805  */
806 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
807 			      struct request_sock *req,
808 			      struct request_values *rvp)
809 {
810 	const struct inet_request_sock *ireq = inet_rsk(req);
811 	struct flowi4 fl4;
812 	int err = -1;
813 	struct sk_buff * skb;
814 
815 	/* First, grab a route. */
816 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
817 		return -1;
818 
819 	skb = tcp_make_synack(sk, dst, req, rvp);
820 
821 	if (skb) {
822 		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
823 
824 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
825 					    ireq->rmt_addr,
826 					    ireq->opt);
827 		err = net_xmit_eval(err);
828 	}
829 
830 	dst_release(dst);
831 	return err;
832 }
833 
834 static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
835 			      struct request_values *rvp)
836 {
837 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
838 	return tcp_v4_send_synack(sk, NULL, req, rvp);
839 }
840 
841 /*
842  *	IPv4 request_sock destructor.
843  */
844 static void tcp_v4_reqsk_destructor(struct request_sock *req)
845 {
846 	kfree(inet_rsk(req)->opt);
847 }
848 
849 /*
850  * Return 1 if a syncookie should be sent
851  */
852 int tcp_syn_flood_action(struct sock *sk,
853 			 const struct sk_buff *skb,
854 			 const char *proto)
855 {
856 	const char *msg = "Dropping request";
857 	int want_cookie = 0;
858 	struct listen_sock *lopt;
859 
860 
861 
862 #ifdef CONFIG_SYN_COOKIES
863 	if (sysctl_tcp_syncookies) {
864 		msg = "Sending cookies";
865 		want_cookie = 1;
866 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
867 	} else
868 #endif
869 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
870 
871 	lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
872 	if (!lopt->synflood_warned) {
873 		lopt->synflood_warned = 1;
874 		pr_info("%s: Possible SYN flooding on port %d. %s. "
875 			" Check SNMP counters.\n",
876 			proto, ntohs(tcp_hdr(skb)->dest), msg);
877 	}
878 	return want_cookie;
879 }
880 EXPORT_SYMBOL(tcp_syn_flood_action);
881 
882 /*
883  * Save and compile IPv4 options into the request_sock if needed.
884  */
885 static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
886 						  struct sk_buff *skb)
887 {
888 	const struct ip_options *opt = &(IPCB(skb)->opt);
889 	struct ip_options_rcu *dopt = NULL;
890 
891 	if (opt && opt->optlen) {
892 		int opt_size = sizeof(*dopt) + opt->optlen;
893 
894 		dopt = kmalloc(opt_size, GFP_ATOMIC);
895 		if (dopt) {
896 			if (ip_options_echo(&dopt->opt, skb)) {
897 				kfree(dopt);
898 				dopt = NULL;
899 			}
900 		}
901 	}
902 	return dopt;
903 }
904 
905 #ifdef CONFIG_TCP_MD5SIG
906 /*
907  * RFC2385 MD5 checksumming requires a mapping of
908  * IP address->MD5 Key.
909  * We need to maintain these in the sk structure.
910  */
911 
912 /* Find the Key structure for an address.  */
913 struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
914 					 const union tcp_md5_addr *addr,
915 					 int family)
916 {
917 	struct tcp_sock *tp = tcp_sk(sk);
918 	struct tcp_md5sig_key *key;
919 	struct hlist_node *pos;
920 	unsigned int size = sizeof(struct in_addr);
921 	struct tcp_md5sig_info *md5sig;
922 
923 	/* caller either holds rcu_read_lock() or socket lock */
924 	md5sig = rcu_dereference_check(tp->md5sig_info,
925 				       sock_owned_by_user(sk));
926 	if (!md5sig)
927 		return NULL;
928 #if IS_ENABLED(CONFIG_IPV6)
929 	if (family == AF_INET6)
930 		size = sizeof(struct in6_addr);
931 #endif
932 	hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
933 		if (key->family != family)
934 			continue;
935 		if (!memcmp(&key->addr, addr, size))
936 			return key;
937 	}
938 	return NULL;
939 }
940 EXPORT_SYMBOL(tcp_md5_do_lookup);
941 
942 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
943 					 struct sock *addr_sk)
944 {
945 	union tcp_md5_addr *addr;
946 
947 	addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
948 	return tcp_md5_do_lookup(sk, addr, AF_INET);
949 }
950 EXPORT_SYMBOL(tcp_v4_md5_lookup);
951 
952 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
953 						      struct request_sock *req)
954 {
955 	union tcp_md5_addr *addr;
956 
957 	addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
958 	return tcp_md5_do_lookup(sk, addr, AF_INET);
959 }
960 
961 /* This can be called on a newly created socket, from other files */
962 int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
963 		   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
964 {
965 	/* Add Key to the list */
966 	struct tcp_md5sig_key *key;
967 	struct tcp_sock *tp = tcp_sk(sk);
968 	struct tcp_md5sig_info *md5sig;
969 
970 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
971 	if (key) {
972 		/* Pre-existing entry - just update that one. */
973 		memcpy(key->key, newkey, newkeylen);
974 		key->keylen = newkeylen;
975 		return 0;
976 	}
977 
978 	md5sig = rcu_dereference_protected(tp->md5sig_info,
979 					   sock_owned_by_user(sk));
980 	if (!md5sig) {
981 		md5sig = kmalloc(sizeof(*md5sig), gfp);
982 		if (!md5sig)
983 			return -ENOMEM;
984 
985 		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
986 		INIT_HLIST_HEAD(&md5sig->head);
987 		rcu_assign_pointer(tp->md5sig_info, md5sig);
988 	}
989 
990 	key = sock_kmalloc(sk, sizeof(*key), gfp);
991 	if (!key)
992 		return -ENOMEM;
993 	if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
994 		sock_kfree_s(sk, key, sizeof(*key));
995 		return -ENOMEM;
996 	}
997 
998 	memcpy(key->key, newkey, newkeylen);
999 	key->keylen = newkeylen;
1000 	key->family = family;
1001 	memcpy(&key->addr, addr,
1002 	       (family == AF_INET6) ? sizeof(struct in6_addr) :
1003 				      sizeof(struct in_addr));
1004 	hlist_add_head_rcu(&key->node, &md5sig->head);
1005 	return 0;
1006 }
1007 EXPORT_SYMBOL(tcp_md5_do_add);
1008 
1009 int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
1010 {
1011 	struct tcp_sock *tp = tcp_sk(sk);
1012 	struct tcp_md5sig_key *key;
1013 	struct tcp_md5sig_info *md5sig;
1014 
1015 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
1016 	if (!key)
1017 		return -ENOENT;
1018 	hlist_del_rcu(&key->node);
1019 	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1020 	kfree_rcu(key, rcu);
1021 	md5sig = rcu_dereference_protected(tp->md5sig_info,
1022 					   sock_owned_by_user(sk));
1023 	if (hlist_empty(&md5sig->head))
1024 		tcp_free_md5sig_pool();
1025 	return 0;
1026 }
1027 EXPORT_SYMBOL(tcp_md5_do_del);
1028 
1029 void tcp_clear_md5_list(struct sock *sk)
1030 {
1031 	struct tcp_sock *tp = tcp_sk(sk);
1032 	struct tcp_md5sig_key *key;
1033 	struct hlist_node *pos, *n;
1034 	struct tcp_md5sig_info *md5sig;
1035 
1036 	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1037 
1038 	if (!hlist_empty(&md5sig->head))
1039 		tcp_free_md5sig_pool();
1040 	hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
1041 		hlist_del_rcu(&key->node);
1042 		atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1043 		kfree_rcu(key, rcu);
1044 	}
1045 }
1046 
1047 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1048 				 int optlen)
1049 {
1050 	struct tcp_md5sig cmd;
1051 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1052 
1053 	if (optlen < sizeof(cmd))
1054 		return -EINVAL;
1055 
1056 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
1057 		return -EFAULT;
1058 
1059 	if (sin->sin_family != AF_INET)
1060 		return -EINVAL;
1061 
1062 	if (!cmd.tcpm_key || !cmd.tcpm_keylen)
1063 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1064 				      AF_INET);
1065 
1066 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1067 		return -EINVAL;
1068 
1069 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1070 			      AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1071 			      GFP_KERNEL);
1072 }
1073 
1074 static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1075 					__be32 daddr, __be32 saddr, int nbytes)
1076 {
1077 	struct tcp4_pseudohdr *bp;
1078 	struct scatterlist sg;
1079 
1080 	bp = &hp->md5_blk.ip4;
1081 
1082 	/*
1083 	 * 1. the TCP pseudo-header (in the order: source IP address,
1084 	 * destination IP address, zero-padded protocol number, and
1085 	 * segment length)
1086 	 */
1087 	bp->saddr = saddr;
1088 	bp->daddr = daddr;
1089 	bp->pad = 0;
1090 	bp->protocol = IPPROTO_TCP;
1091 	bp->len = cpu_to_be16(nbytes);
1092 
1093 	sg_init_one(&sg, bp, sizeof(*bp));
1094 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1095 }
1096 
1097 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1098 			       __be32 daddr, __be32 saddr, const struct tcphdr *th)
1099 {
1100 	struct tcp_md5sig_pool *hp;
1101 	struct hash_desc *desc;
1102 
1103 	hp = tcp_get_md5sig_pool();
1104 	if (!hp)
1105 		goto clear_hash_noput;
1106 	desc = &hp->md5_desc;
1107 
1108 	if (crypto_hash_init(desc))
1109 		goto clear_hash;
1110 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1111 		goto clear_hash;
1112 	if (tcp_md5_hash_header(hp, th))
1113 		goto clear_hash;
1114 	if (tcp_md5_hash_key(hp, key))
1115 		goto clear_hash;
1116 	if (crypto_hash_final(desc, md5_hash))
1117 		goto clear_hash;
1118 
1119 	tcp_put_md5sig_pool();
1120 	return 0;
1121 
1122 clear_hash:
1123 	tcp_put_md5sig_pool();
1124 clear_hash_noput:
1125 	memset(md5_hash, 0, 16);
1126 	return 1;
1127 }
1128 
1129 int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1130 			const struct sock *sk, const struct request_sock *req,
1131 			const struct sk_buff *skb)
1132 {
1133 	struct tcp_md5sig_pool *hp;
1134 	struct hash_desc *desc;
1135 	const struct tcphdr *th = tcp_hdr(skb);
1136 	__be32 saddr, daddr;
1137 
1138 	if (sk) {
1139 		saddr = inet_sk(sk)->inet_saddr;
1140 		daddr = inet_sk(sk)->inet_daddr;
1141 	} else if (req) {
1142 		saddr = inet_rsk(req)->loc_addr;
1143 		daddr = inet_rsk(req)->rmt_addr;
1144 	} else {
1145 		const struct iphdr *iph = ip_hdr(skb);
1146 		saddr = iph->saddr;
1147 		daddr = iph->daddr;
1148 	}
1149 
1150 	hp = tcp_get_md5sig_pool();
1151 	if (!hp)
1152 		goto clear_hash_noput;
1153 	desc = &hp->md5_desc;
1154 
1155 	if (crypto_hash_init(desc))
1156 		goto clear_hash;
1157 
1158 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1159 		goto clear_hash;
1160 	if (tcp_md5_hash_header(hp, th))
1161 		goto clear_hash;
1162 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1163 		goto clear_hash;
1164 	if (tcp_md5_hash_key(hp, key))
1165 		goto clear_hash;
1166 	if (crypto_hash_final(desc, md5_hash))
1167 		goto clear_hash;
1168 
1169 	tcp_put_md5sig_pool();
1170 	return 0;
1171 
1172 clear_hash:
1173 	tcp_put_md5sig_pool();
1174 clear_hash_noput:
1175 	memset(md5_hash, 0, 16);
1176 	return 1;
1177 }
1178 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1179 
1180 static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1181 {
1182 	/*
1183 	 * This gets called for each TCP segment that arrives
1184 	 * so we want to be efficient.
1185 	 * We have 3 drop cases:
1186 	 * o No MD5 hash and one expected.
1187 	 * o MD5 hash and we're not expecting one.
1188 	 * o MD5 hash and its wrong.
1189 	 */
1190 	const __u8 *hash_location = NULL;
1191 	struct tcp_md5sig_key *hash_expected;
1192 	const struct iphdr *iph = ip_hdr(skb);
1193 	const struct tcphdr *th = tcp_hdr(skb);
1194 	int genhash;
1195 	unsigned char newhash[16];
1196 
1197 	hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1198 					  AF_INET);
1199 	hash_location = tcp_parse_md5sig_option(th);
1200 
1201 	/* We've parsed the options - do we have a hash? */
1202 	if (!hash_expected && !hash_location)
1203 		return 0;
1204 
1205 	if (hash_expected && !hash_location) {
1206 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1207 		return 1;
1208 	}
1209 
1210 	if (!hash_expected && hash_location) {
1211 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1212 		return 1;
1213 	}
1214 
1215 	/* Okay, so this is hash_expected and hash_location -
1216 	 * so we need to calculate the checksum.
1217 	 */
1218 	genhash = tcp_v4_md5_hash_skb(newhash,
1219 				      hash_expected,
1220 				      NULL, NULL, skb);
1221 
1222 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1223 		if (net_ratelimit()) {
1224 			printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1225 			       &iph->saddr, ntohs(th->source),
1226 			       &iph->daddr, ntohs(th->dest),
1227 			       genhash ? " tcp_v4_calc_md5_hash failed" : "");
1228 		}
1229 		return 1;
1230 	}
1231 	return 0;
1232 }
1233 
1234 #endif
1235 
1236 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1237 	.family		=	PF_INET,
1238 	.obj_size	=	sizeof(struct tcp_request_sock),
1239 	.rtx_syn_ack	=	tcp_v4_rtx_synack,
1240 	.send_ack	=	tcp_v4_reqsk_send_ack,
1241 	.destructor	=	tcp_v4_reqsk_destructor,
1242 	.send_reset	=	tcp_v4_send_reset,
1243 	.syn_ack_timeout = 	tcp_syn_ack_timeout,
1244 };
1245 
1246 #ifdef CONFIG_TCP_MD5SIG
1247 static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1248 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1249 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1250 };
1251 #endif
1252 
1253 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1254 {
1255 	struct tcp_extend_values tmp_ext;
1256 	struct tcp_options_received tmp_opt;
1257 	const u8 *hash_location;
1258 	struct request_sock *req;
1259 	struct inet_request_sock *ireq;
1260 	struct tcp_sock *tp = tcp_sk(sk);
1261 	struct dst_entry *dst = NULL;
1262 	__be32 saddr = ip_hdr(skb)->saddr;
1263 	__be32 daddr = ip_hdr(skb)->daddr;
1264 	__u32 isn = TCP_SKB_CB(skb)->when;
1265 	int want_cookie = 0;
1266 
1267 	/* Never answer to SYNs send to broadcast or multicast */
1268 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1269 		goto drop;
1270 
1271 	/* TW buckets are converted to open requests without
1272 	 * limitations, they conserve resources and peer is
1273 	 * evidently real one.
1274 	 */
1275 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1276 		want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1277 		if (!want_cookie)
1278 			goto drop;
1279 	}
1280 
1281 	/* Accept backlog is full. If we have already queued enough
1282 	 * of warm entries in syn queue, drop request. It is better than
1283 	 * clogging syn queue with openreqs with exponentially increasing
1284 	 * timeout.
1285 	 */
1286 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1287 		goto drop;
1288 
1289 	req = inet_reqsk_alloc(&tcp_request_sock_ops);
1290 	if (!req)
1291 		goto drop;
1292 
1293 #ifdef CONFIG_TCP_MD5SIG
1294 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1295 #endif
1296 
1297 	tcp_clear_options(&tmp_opt);
1298 	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1299 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
1300 	tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1301 
1302 	if (tmp_opt.cookie_plus > 0 &&
1303 	    tmp_opt.saw_tstamp &&
1304 	    !tp->rx_opt.cookie_out_never &&
1305 	    (sysctl_tcp_cookie_size > 0 ||
1306 	     (tp->cookie_values != NULL &&
1307 	      tp->cookie_values->cookie_desired > 0))) {
1308 		u8 *c;
1309 		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1310 		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1311 
1312 		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1313 			goto drop_and_release;
1314 
1315 		/* Secret recipe starts with IP addresses */
1316 		*mess++ ^= (__force u32)daddr;
1317 		*mess++ ^= (__force u32)saddr;
1318 
1319 		/* plus variable length Initiator Cookie */
1320 		c = (u8 *)mess;
1321 		while (l-- > 0)
1322 			*c++ ^= *hash_location++;
1323 
1324 		want_cookie = 0;	/* not our kind of cookie */
1325 		tmp_ext.cookie_out_never = 0; /* false */
1326 		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1327 	} else if (!tp->rx_opt.cookie_in_always) {
1328 		/* redundant indications, but ensure initialization. */
1329 		tmp_ext.cookie_out_never = 1; /* true */
1330 		tmp_ext.cookie_plus = 0;
1331 	} else {
1332 		goto drop_and_release;
1333 	}
1334 	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1335 
1336 	if (want_cookie && !tmp_opt.saw_tstamp)
1337 		tcp_clear_options(&tmp_opt);
1338 
1339 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1340 	tcp_openreq_init(req, &tmp_opt, skb);
1341 
1342 	ireq = inet_rsk(req);
1343 	ireq->loc_addr = daddr;
1344 	ireq->rmt_addr = saddr;
1345 	ireq->no_srccheck = inet_sk(sk)->transparent;
1346 	ireq->opt = tcp_v4_save_options(sk, skb);
1347 
1348 	if (security_inet_conn_request(sk, skb, req))
1349 		goto drop_and_free;
1350 
1351 	if (!want_cookie || tmp_opt.tstamp_ok)
1352 		TCP_ECN_create_request(req, tcp_hdr(skb));
1353 
1354 	if (want_cookie) {
1355 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1356 		req->cookie_ts = tmp_opt.tstamp_ok;
1357 	} else if (!isn) {
1358 		struct inet_peer *peer = NULL;
1359 		struct flowi4 fl4;
1360 
1361 		/* VJ's idea. We save last timestamp seen
1362 		 * from the destination in peer table, when entering
1363 		 * state TIME-WAIT, and check against it before
1364 		 * accepting new connection request.
1365 		 *
1366 		 * If "isn" is not zero, this request hit alive
1367 		 * timewait bucket, so that all the necessary checks
1368 		 * are made in the function processing timewait state.
1369 		 */
1370 		if (tmp_opt.saw_tstamp &&
1371 		    tcp_death_row.sysctl_tw_recycle &&
1372 		    (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1373 		    fl4.daddr == saddr &&
1374 		    (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
1375 			inet_peer_refcheck(peer);
1376 			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1377 			    (s32)(peer->tcp_ts - req->ts_recent) >
1378 							TCP_PAWS_WINDOW) {
1379 				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1380 				goto drop_and_release;
1381 			}
1382 		}
1383 		/* Kill the following clause, if you dislike this way. */
1384 		else if (!sysctl_tcp_syncookies &&
1385 			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1386 			  (sysctl_max_syn_backlog >> 2)) &&
1387 			 (!peer || !peer->tcp_ts_stamp) &&
1388 			 (!dst || !dst_metric(dst, RTAX_RTT))) {
1389 			/* Without syncookies last quarter of
1390 			 * backlog is filled with destinations,
1391 			 * proven to be alive.
1392 			 * It means that we continue to communicate
1393 			 * to destinations, already remembered
1394 			 * to the moment of synflood.
1395 			 */
1396 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1397 				       &saddr, ntohs(tcp_hdr(skb)->source));
1398 			goto drop_and_release;
1399 		}
1400 
1401 		isn = tcp_v4_init_sequence(skb);
1402 	}
1403 	tcp_rsk(req)->snt_isn = isn;
1404 	tcp_rsk(req)->snt_synack = tcp_time_stamp;
1405 
1406 	if (tcp_v4_send_synack(sk, dst, req,
1407 			       (struct request_values *)&tmp_ext) ||
1408 	    want_cookie)
1409 		goto drop_and_free;
1410 
1411 	inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1412 	return 0;
1413 
1414 drop_and_release:
1415 	dst_release(dst);
1416 drop_and_free:
1417 	reqsk_free(req);
1418 drop:
1419 	return 0;
1420 }
1421 EXPORT_SYMBOL(tcp_v4_conn_request);
1422 
1423 
1424 /*
1425  * The three way handshake has completed - we got a valid synack -
1426  * now create the new socket.
1427  */
1428 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1429 				  struct request_sock *req,
1430 				  struct dst_entry *dst)
1431 {
1432 	struct inet_request_sock *ireq;
1433 	struct inet_sock *newinet;
1434 	struct tcp_sock *newtp;
1435 	struct sock *newsk;
1436 #ifdef CONFIG_TCP_MD5SIG
1437 	struct tcp_md5sig_key *key;
1438 #endif
1439 	struct ip_options_rcu *inet_opt;
1440 
1441 	if (sk_acceptq_is_full(sk))
1442 		goto exit_overflow;
1443 
1444 	newsk = tcp_create_openreq_child(sk, req, skb);
1445 	if (!newsk)
1446 		goto exit_nonewsk;
1447 
1448 	newsk->sk_gso_type = SKB_GSO_TCPV4;
1449 
1450 	newtp		      = tcp_sk(newsk);
1451 	newinet		      = inet_sk(newsk);
1452 	ireq		      = inet_rsk(req);
1453 	newinet->inet_daddr   = ireq->rmt_addr;
1454 	newinet->inet_rcv_saddr = ireq->loc_addr;
1455 	newinet->inet_saddr	      = ireq->loc_addr;
1456 	inet_opt	      = ireq->opt;
1457 	rcu_assign_pointer(newinet->inet_opt, inet_opt);
1458 	ireq->opt	      = NULL;
1459 	newinet->mc_index     = inet_iif(skb);
1460 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
1461 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1462 	if (inet_opt)
1463 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1464 	newinet->inet_id = newtp->write_seq ^ jiffies;
1465 
1466 	if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
1467 		goto put_and_exit;
1468 
1469 	sk_setup_caps(newsk, dst);
1470 
1471 	tcp_mtup_init(newsk);
1472 	tcp_sync_mss(newsk, dst_mtu(dst));
1473 	newtp->advmss = dst_metric_advmss(dst);
1474 	if (tcp_sk(sk)->rx_opt.user_mss &&
1475 	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1476 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1477 
1478 	tcp_initialize_rcv_mss(newsk);
1479 	if (tcp_rsk(req)->snt_synack)
1480 		tcp_valid_rtt_meas(newsk,
1481 		    tcp_time_stamp - tcp_rsk(req)->snt_synack);
1482 	newtp->total_retrans = req->retrans;
1483 
1484 #ifdef CONFIG_TCP_MD5SIG
1485 	/* Copy over the MD5 key from the original socket */
1486 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1487 				AF_INET);
1488 	if (key != NULL) {
1489 		/*
1490 		 * We're using one, so create a matching key
1491 		 * on the newsk structure. If we fail to get
1492 		 * memory, then we end up not copying the key
1493 		 * across. Shucks.
1494 		 */
1495 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1496 			       AF_INET, key->key, key->keylen, GFP_ATOMIC);
1497 		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1498 	}
1499 #endif
1500 
1501 	if (__inet_inherit_port(sk, newsk) < 0)
1502 		goto put_and_exit;
1503 	__inet_hash_nolisten(newsk, NULL);
1504 
1505 	return newsk;
1506 
1507 exit_overflow:
1508 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1509 exit_nonewsk:
1510 	dst_release(dst);
1511 exit:
1512 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1513 	return NULL;
1514 put_and_exit:
1515 	tcp_clear_xmit_timers(newsk);
1516 	tcp_cleanup_congestion_control(newsk);
1517 	bh_unlock_sock(newsk);
1518 	sock_put(newsk);
1519 	goto exit;
1520 }
1521 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1522 
1523 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1524 {
1525 	struct tcphdr *th = tcp_hdr(skb);
1526 	const struct iphdr *iph = ip_hdr(skb);
1527 	struct sock *nsk;
1528 	struct request_sock **prev;
1529 	/* Find possible connection requests. */
1530 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1531 						       iph->saddr, iph->daddr);
1532 	if (req)
1533 		return tcp_check_req(sk, skb, req, prev);
1534 
1535 	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1536 			th->source, iph->daddr, th->dest, inet_iif(skb));
1537 
1538 	if (nsk) {
1539 		if (nsk->sk_state != TCP_TIME_WAIT) {
1540 			bh_lock_sock(nsk);
1541 			return nsk;
1542 		}
1543 		inet_twsk_put(inet_twsk(nsk));
1544 		return NULL;
1545 	}
1546 
1547 #ifdef CONFIG_SYN_COOKIES
1548 	if (!th->syn)
1549 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1550 #endif
1551 	return sk;
1552 }
1553 
1554 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1555 {
1556 	const struct iphdr *iph = ip_hdr(skb);
1557 
1558 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
1559 		if (!tcp_v4_check(skb->len, iph->saddr,
1560 				  iph->daddr, skb->csum)) {
1561 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1562 			return 0;
1563 		}
1564 	}
1565 
1566 	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1567 				       skb->len, IPPROTO_TCP, 0);
1568 
1569 	if (skb->len <= 76) {
1570 		return __skb_checksum_complete(skb);
1571 	}
1572 	return 0;
1573 }
1574 
1575 
1576 /* The socket must have it's spinlock held when we get
1577  * here.
1578  *
1579  * We have a potential double-lock case here, so even when
1580  * doing backlog processing we use the BH locking scheme.
1581  * This is because we cannot sleep with the original spinlock
1582  * held.
1583  */
1584 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1585 {
1586 	struct sock *rsk;
1587 #ifdef CONFIG_TCP_MD5SIG
1588 	/*
1589 	 * We really want to reject the packet as early as possible
1590 	 * if:
1591 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1592 	 *  o There is an MD5 option and we're not expecting one
1593 	 */
1594 	if (tcp_v4_inbound_md5_hash(sk, skb))
1595 		goto discard;
1596 #endif
1597 
1598 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1599 		sock_rps_save_rxhash(sk, skb);
1600 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1601 			rsk = sk;
1602 			goto reset;
1603 		}
1604 		return 0;
1605 	}
1606 
1607 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1608 		goto csum_err;
1609 
1610 	if (sk->sk_state == TCP_LISTEN) {
1611 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1612 		if (!nsk)
1613 			goto discard;
1614 
1615 		if (nsk != sk) {
1616 			sock_rps_save_rxhash(nsk, skb);
1617 			if (tcp_child_process(sk, nsk, skb)) {
1618 				rsk = nsk;
1619 				goto reset;
1620 			}
1621 			return 0;
1622 		}
1623 	} else
1624 		sock_rps_save_rxhash(sk, skb);
1625 
1626 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1627 		rsk = sk;
1628 		goto reset;
1629 	}
1630 	return 0;
1631 
1632 reset:
1633 	tcp_v4_send_reset(rsk, skb);
1634 discard:
1635 	kfree_skb(skb);
1636 	/* Be careful here. If this function gets more complicated and
1637 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
1638 	 * might be destroyed here. This current version compiles correctly,
1639 	 * but you have been warned.
1640 	 */
1641 	return 0;
1642 
1643 csum_err:
1644 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1645 	goto discard;
1646 }
1647 EXPORT_SYMBOL(tcp_v4_do_rcv);
1648 
1649 /*
1650  *	From tcp_input.c
1651  */
1652 
1653 int tcp_v4_rcv(struct sk_buff *skb)
1654 {
1655 	const struct iphdr *iph;
1656 	const struct tcphdr *th;
1657 	struct sock *sk;
1658 	int ret;
1659 	struct net *net = dev_net(skb->dev);
1660 
1661 	if (skb->pkt_type != PACKET_HOST)
1662 		goto discard_it;
1663 
1664 	/* Count it even if it's bad */
1665 	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1666 
1667 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1668 		goto discard_it;
1669 
1670 	th = tcp_hdr(skb);
1671 
1672 	if (th->doff < sizeof(struct tcphdr) / 4)
1673 		goto bad_packet;
1674 	if (!pskb_may_pull(skb, th->doff * 4))
1675 		goto discard_it;
1676 
1677 	/* An explanation is required here, I think.
1678 	 * Packet length and doff are validated by header prediction,
1679 	 * provided case of th->doff==0 is eliminated.
1680 	 * So, we defer the checks. */
1681 	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1682 		goto bad_packet;
1683 
1684 	th = tcp_hdr(skb);
1685 	iph = ip_hdr(skb);
1686 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1687 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1688 				    skb->len - th->doff * 4);
1689 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1690 	TCP_SKB_CB(skb)->when	 = 0;
1691 	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1692 	TCP_SKB_CB(skb)->sacked	 = 0;
1693 
1694 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1695 	if (!sk)
1696 		goto no_tcp_socket;
1697 
1698 process:
1699 	if (sk->sk_state == TCP_TIME_WAIT)
1700 		goto do_time_wait;
1701 
1702 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1703 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1704 		goto discard_and_relse;
1705 	}
1706 
1707 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1708 		goto discard_and_relse;
1709 	nf_reset(skb);
1710 
1711 	if (sk_filter(sk, skb))
1712 		goto discard_and_relse;
1713 
1714 	skb->dev = NULL;
1715 
1716 	bh_lock_sock_nested(sk);
1717 	ret = 0;
1718 	if (!sock_owned_by_user(sk)) {
1719 #ifdef CONFIG_NET_DMA
1720 		struct tcp_sock *tp = tcp_sk(sk);
1721 		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1722 			tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1723 		if (tp->ucopy.dma_chan)
1724 			ret = tcp_v4_do_rcv(sk, skb);
1725 		else
1726 #endif
1727 		{
1728 			if (!tcp_prequeue(sk, skb))
1729 				ret = tcp_v4_do_rcv(sk, skb);
1730 		}
1731 	} else if (unlikely(sk_add_backlog(sk, skb))) {
1732 		bh_unlock_sock(sk);
1733 		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1734 		goto discard_and_relse;
1735 	}
1736 	bh_unlock_sock(sk);
1737 
1738 	sock_put(sk);
1739 
1740 	return ret;
1741 
1742 no_tcp_socket:
1743 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1744 		goto discard_it;
1745 
1746 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1747 bad_packet:
1748 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1749 	} else {
1750 		tcp_v4_send_reset(NULL, skb);
1751 	}
1752 
1753 discard_it:
1754 	/* Discard frame. */
1755 	kfree_skb(skb);
1756 	return 0;
1757 
1758 discard_and_relse:
1759 	sock_put(sk);
1760 	goto discard_it;
1761 
1762 do_time_wait:
1763 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1764 		inet_twsk_put(inet_twsk(sk));
1765 		goto discard_it;
1766 	}
1767 
1768 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1769 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1770 		inet_twsk_put(inet_twsk(sk));
1771 		goto discard_it;
1772 	}
1773 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1774 	case TCP_TW_SYN: {
1775 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1776 							&tcp_hashinfo,
1777 							iph->daddr, th->dest,
1778 							inet_iif(skb));
1779 		if (sk2) {
1780 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1781 			inet_twsk_put(inet_twsk(sk));
1782 			sk = sk2;
1783 			goto process;
1784 		}
1785 		/* Fall through to ACK */
1786 	}
1787 	case TCP_TW_ACK:
1788 		tcp_v4_timewait_ack(sk, skb);
1789 		break;
1790 	case TCP_TW_RST:
1791 		goto no_tcp_socket;
1792 	case TCP_TW_SUCCESS:;
1793 	}
1794 	goto discard_it;
1795 }
1796 
1797 struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1798 {
1799 	struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
1800 	struct inet_sock *inet = inet_sk(sk);
1801 	struct inet_peer *peer;
1802 
1803 	if (!rt ||
1804 	    inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
1805 		peer = inet_getpeer_v4(inet->inet_daddr, 1);
1806 		*release_it = true;
1807 	} else {
1808 		if (!rt->peer)
1809 			rt_bind_peer(rt, inet->inet_daddr, 1);
1810 		peer = rt->peer;
1811 		*release_it = false;
1812 	}
1813 
1814 	return peer;
1815 }
1816 EXPORT_SYMBOL(tcp_v4_get_peer);
1817 
1818 void *tcp_v4_tw_get_peer(struct sock *sk)
1819 {
1820 	const struct inet_timewait_sock *tw = inet_twsk(sk);
1821 
1822 	return inet_getpeer_v4(tw->tw_daddr, 1);
1823 }
1824 EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1825 
1826 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1827 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1828 	.twsk_unique	= tcp_twsk_unique,
1829 	.twsk_destructor= tcp_twsk_destructor,
1830 	.twsk_getpeer	= tcp_v4_tw_get_peer,
1831 };
1832 
1833 const struct inet_connection_sock_af_ops ipv4_specific = {
1834 	.queue_xmit	   = ip_queue_xmit,
1835 	.send_check	   = tcp_v4_send_check,
1836 	.rebuild_header	   = inet_sk_rebuild_header,
1837 	.conn_request	   = tcp_v4_conn_request,
1838 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
1839 	.get_peer	   = tcp_v4_get_peer,
1840 	.net_header_len	   = sizeof(struct iphdr),
1841 	.setsockopt	   = ip_setsockopt,
1842 	.getsockopt	   = ip_getsockopt,
1843 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1844 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1845 	.bind_conflict	   = inet_csk_bind_conflict,
1846 #ifdef CONFIG_COMPAT
1847 	.compat_setsockopt = compat_ip_setsockopt,
1848 	.compat_getsockopt = compat_ip_getsockopt,
1849 #endif
1850 };
1851 EXPORT_SYMBOL(ipv4_specific);
1852 
1853 #ifdef CONFIG_TCP_MD5SIG
1854 static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1855 	.md5_lookup		= tcp_v4_md5_lookup,
1856 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
1857 	.md5_parse		= tcp_v4_parse_md5_keys,
1858 };
1859 #endif
1860 
1861 /* NOTE: A lot of things set to zero explicitly by call to
1862  *       sk_alloc() so need not be done here.
1863  */
1864 static int tcp_v4_init_sock(struct sock *sk)
1865 {
1866 	struct inet_connection_sock *icsk = inet_csk(sk);
1867 	struct tcp_sock *tp = tcp_sk(sk);
1868 
1869 	skb_queue_head_init(&tp->out_of_order_queue);
1870 	tcp_init_xmit_timers(sk);
1871 	tcp_prequeue_init(tp);
1872 
1873 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
1874 	tp->mdev = TCP_TIMEOUT_INIT;
1875 
1876 	/* So many TCP implementations out there (incorrectly) count the
1877 	 * initial SYN frame in their delayed-ACK and congestion control
1878 	 * algorithms that we must have the following bandaid to talk
1879 	 * efficiently to them.  -DaveM
1880 	 */
1881 	tp->snd_cwnd = TCP_INIT_CWND;
1882 
1883 	/* See draft-stevens-tcpca-spec-01 for discussion of the
1884 	 * initialization of these values.
1885 	 */
1886 	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1887 	tp->snd_cwnd_clamp = ~0;
1888 	tp->mss_cache = TCP_MSS_DEFAULT;
1889 
1890 	tp->reordering = sysctl_tcp_reordering;
1891 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1892 
1893 	sk->sk_state = TCP_CLOSE;
1894 
1895 	sk->sk_write_space = sk_stream_write_space;
1896 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1897 
1898 	icsk->icsk_af_ops = &ipv4_specific;
1899 	icsk->icsk_sync_mss = tcp_sync_mss;
1900 #ifdef CONFIG_TCP_MD5SIG
1901 	tp->af_specific = &tcp_sock_ipv4_specific;
1902 #endif
1903 
1904 	/* TCP Cookie Transactions */
1905 	if (sysctl_tcp_cookie_size > 0) {
1906 		/* Default, cookies without s_data_payload. */
1907 		tp->cookie_values =
1908 			kzalloc(sizeof(*tp->cookie_values),
1909 				sk->sk_allocation);
1910 		if (tp->cookie_values != NULL)
1911 			kref_init(&tp->cookie_values->kref);
1912 	}
1913 	/* Presumed zeroed, in order of appearance:
1914 	 *	cookie_in_always, cookie_out_never,
1915 	 *	s_data_constant, s_data_in, s_data_out
1916 	 */
1917 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
1918 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1919 
1920 	local_bh_disable();
1921 	sock_update_memcg(sk);
1922 	sk_sockets_allocated_inc(sk);
1923 	local_bh_enable();
1924 
1925 	return 0;
1926 }
1927 
1928 void tcp_v4_destroy_sock(struct sock *sk)
1929 {
1930 	struct tcp_sock *tp = tcp_sk(sk);
1931 
1932 	tcp_clear_xmit_timers(sk);
1933 
1934 	tcp_cleanup_congestion_control(sk);
1935 
1936 	/* Cleanup up the write buffer. */
1937 	tcp_write_queue_purge(sk);
1938 
1939 	/* Cleans up our, hopefully empty, out_of_order_queue. */
1940 	__skb_queue_purge(&tp->out_of_order_queue);
1941 
1942 #ifdef CONFIG_TCP_MD5SIG
1943 	/* Clean up the MD5 key list, if any */
1944 	if (tp->md5sig_info) {
1945 		tcp_clear_md5_list(sk);
1946 		kfree_rcu(tp->md5sig_info, rcu);
1947 		tp->md5sig_info = NULL;
1948 	}
1949 #endif
1950 
1951 #ifdef CONFIG_NET_DMA
1952 	/* Cleans up our sk_async_wait_queue */
1953 	__skb_queue_purge(&sk->sk_async_wait_queue);
1954 #endif
1955 
1956 	/* Clean prequeue, it must be empty really */
1957 	__skb_queue_purge(&tp->ucopy.prequeue);
1958 
1959 	/* Clean up a referenced TCP bind bucket. */
1960 	if (inet_csk(sk)->icsk_bind_hash)
1961 		inet_put_port(sk);
1962 
1963 	/*
1964 	 * If sendmsg cached page exists, toss it.
1965 	 */
1966 	if (sk->sk_sndmsg_page) {
1967 		__free_page(sk->sk_sndmsg_page);
1968 		sk->sk_sndmsg_page = NULL;
1969 	}
1970 
1971 	/* TCP Cookie Transactions */
1972 	if (tp->cookie_values != NULL) {
1973 		kref_put(&tp->cookie_values->kref,
1974 			 tcp_cookie_values_release);
1975 		tp->cookie_values = NULL;
1976 	}
1977 
1978 	sk_sockets_allocated_dec(sk);
1979 	sock_release_memcg(sk);
1980 }
1981 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1982 
1983 #ifdef CONFIG_PROC_FS
1984 /* Proc filesystem TCP sock list dumping. */
1985 
1986 static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1987 {
1988 	return hlist_nulls_empty(head) ? NULL :
1989 		list_entry(head->first, struct inet_timewait_sock, tw_node);
1990 }
1991 
1992 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1993 {
1994 	return !is_a_nulls(tw->tw_node.next) ?
1995 		hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1996 }
1997 
1998 /*
1999  * Get next listener socket follow cur.  If cur is NULL, get first socket
2000  * starting from bucket given in st->bucket; when st->bucket is zero the
2001  * very first socket in the hash table is returned.
2002  */
2003 static void *listening_get_next(struct seq_file *seq, void *cur)
2004 {
2005 	struct inet_connection_sock *icsk;
2006 	struct hlist_nulls_node *node;
2007 	struct sock *sk = cur;
2008 	struct inet_listen_hashbucket *ilb;
2009 	struct tcp_iter_state *st = seq->private;
2010 	struct net *net = seq_file_net(seq);
2011 
2012 	if (!sk) {
2013 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
2014 		spin_lock_bh(&ilb->lock);
2015 		sk = sk_nulls_head(&ilb->head);
2016 		st->offset = 0;
2017 		goto get_sk;
2018 	}
2019 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
2020 	++st->num;
2021 	++st->offset;
2022 
2023 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
2024 		struct request_sock *req = cur;
2025 
2026 		icsk = inet_csk(st->syn_wait_sk);
2027 		req = req->dl_next;
2028 		while (1) {
2029 			while (req) {
2030 				if (req->rsk_ops->family == st->family) {
2031 					cur = req;
2032 					goto out;
2033 				}
2034 				req = req->dl_next;
2035 			}
2036 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2037 				break;
2038 get_req:
2039 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2040 		}
2041 		sk	  = sk_nulls_next(st->syn_wait_sk);
2042 		st->state = TCP_SEQ_STATE_LISTENING;
2043 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2044 	} else {
2045 		icsk = inet_csk(sk);
2046 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2047 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
2048 			goto start_req;
2049 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2050 		sk = sk_nulls_next(sk);
2051 	}
2052 get_sk:
2053 	sk_nulls_for_each_from(sk, node) {
2054 		if (!net_eq(sock_net(sk), net))
2055 			continue;
2056 		if (sk->sk_family == st->family) {
2057 			cur = sk;
2058 			goto out;
2059 		}
2060 		icsk = inet_csk(sk);
2061 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2062 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2063 start_req:
2064 			st->uid		= sock_i_uid(sk);
2065 			st->syn_wait_sk = sk;
2066 			st->state	= TCP_SEQ_STATE_OPENREQ;
2067 			st->sbucket	= 0;
2068 			goto get_req;
2069 		}
2070 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2071 	}
2072 	spin_unlock_bh(&ilb->lock);
2073 	st->offset = 0;
2074 	if (++st->bucket < INET_LHTABLE_SIZE) {
2075 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
2076 		spin_lock_bh(&ilb->lock);
2077 		sk = sk_nulls_head(&ilb->head);
2078 		goto get_sk;
2079 	}
2080 	cur = NULL;
2081 out:
2082 	return cur;
2083 }
2084 
2085 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2086 {
2087 	struct tcp_iter_state *st = seq->private;
2088 	void *rc;
2089 
2090 	st->bucket = 0;
2091 	st->offset = 0;
2092 	rc = listening_get_next(seq, NULL);
2093 
2094 	while (rc && *pos) {
2095 		rc = listening_get_next(seq, rc);
2096 		--*pos;
2097 	}
2098 	return rc;
2099 }
2100 
2101 static inline int empty_bucket(struct tcp_iter_state *st)
2102 {
2103 	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2104 		hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2105 }
2106 
2107 /*
2108  * Get first established socket starting from bucket given in st->bucket.
2109  * If st->bucket is zero, the very first socket in the hash is returned.
2110  */
2111 static void *established_get_first(struct seq_file *seq)
2112 {
2113 	struct tcp_iter_state *st = seq->private;
2114 	struct net *net = seq_file_net(seq);
2115 	void *rc = NULL;
2116 
2117 	st->offset = 0;
2118 	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2119 		struct sock *sk;
2120 		struct hlist_nulls_node *node;
2121 		struct inet_timewait_sock *tw;
2122 		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2123 
2124 		/* Lockless fast path for the common case of empty buckets */
2125 		if (empty_bucket(st))
2126 			continue;
2127 
2128 		spin_lock_bh(lock);
2129 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2130 			if (sk->sk_family != st->family ||
2131 			    !net_eq(sock_net(sk), net)) {
2132 				continue;
2133 			}
2134 			rc = sk;
2135 			goto out;
2136 		}
2137 		st->state = TCP_SEQ_STATE_TIME_WAIT;
2138 		inet_twsk_for_each(tw, node,
2139 				   &tcp_hashinfo.ehash[st->bucket].twchain) {
2140 			if (tw->tw_family != st->family ||
2141 			    !net_eq(twsk_net(tw), net)) {
2142 				continue;
2143 			}
2144 			rc = tw;
2145 			goto out;
2146 		}
2147 		spin_unlock_bh(lock);
2148 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2149 	}
2150 out:
2151 	return rc;
2152 }
2153 
2154 static void *established_get_next(struct seq_file *seq, void *cur)
2155 {
2156 	struct sock *sk = cur;
2157 	struct inet_timewait_sock *tw;
2158 	struct hlist_nulls_node *node;
2159 	struct tcp_iter_state *st = seq->private;
2160 	struct net *net = seq_file_net(seq);
2161 
2162 	++st->num;
2163 	++st->offset;
2164 
2165 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2166 		tw = cur;
2167 		tw = tw_next(tw);
2168 get_tw:
2169 		while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2170 			tw = tw_next(tw);
2171 		}
2172 		if (tw) {
2173 			cur = tw;
2174 			goto out;
2175 		}
2176 		spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2177 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2178 
2179 		/* Look for next non empty bucket */
2180 		st->offset = 0;
2181 		while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2182 				empty_bucket(st))
2183 			;
2184 		if (st->bucket > tcp_hashinfo.ehash_mask)
2185 			return NULL;
2186 
2187 		spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2188 		sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2189 	} else
2190 		sk = sk_nulls_next(sk);
2191 
2192 	sk_nulls_for_each_from(sk, node) {
2193 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2194 			goto found;
2195 	}
2196 
2197 	st->state = TCP_SEQ_STATE_TIME_WAIT;
2198 	tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2199 	goto get_tw;
2200 found:
2201 	cur = sk;
2202 out:
2203 	return cur;
2204 }
2205 
2206 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2207 {
2208 	struct tcp_iter_state *st = seq->private;
2209 	void *rc;
2210 
2211 	st->bucket = 0;
2212 	rc = established_get_first(seq);
2213 
2214 	while (rc && pos) {
2215 		rc = established_get_next(seq, rc);
2216 		--pos;
2217 	}
2218 	return rc;
2219 }
2220 
2221 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2222 {
2223 	void *rc;
2224 	struct tcp_iter_state *st = seq->private;
2225 
2226 	st->state = TCP_SEQ_STATE_LISTENING;
2227 	rc	  = listening_get_idx(seq, &pos);
2228 
2229 	if (!rc) {
2230 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2231 		rc	  = established_get_idx(seq, pos);
2232 	}
2233 
2234 	return rc;
2235 }
2236 
2237 static void *tcp_seek_last_pos(struct seq_file *seq)
2238 {
2239 	struct tcp_iter_state *st = seq->private;
2240 	int offset = st->offset;
2241 	int orig_num = st->num;
2242 	void *rc = NULL;
2243 
2244 	switch (st->state) {
2245 	case TCP_SEQ_STATE_OPENREQ:
2246 	case TCP_SEQ_STATE_LISTENING:
2247 		if (st->bucket >= INET_LHTABLE_SIZE)
2248 			break;
2249 		st->state = TCP_SEQ_STATE_LISTENING;
2250 		rc = listening_get_next(seq, NULL);
2251 		while (offset-- && rc)
2252 			rc = listening_get_next(seq, rc);
2253 		if (rc)
2254 			break;
2255 		st->bucket = 0;
2256 		/* Fallthrough */
2257 	case TCP_SEQ_STATE_ESTABLISHED:
2258 	case TCP_SEQ_STATE_TIME_WAIT:
2259 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2260 		if (st->bucket > tcp_hashinfo.ehash_mask)
2261 			break;
2262 		rc = established_get_first(seq);
2263 		while (offset-- && rc)
2264 			rc = established_get_next(seq, rc);
2265 	}
2266 
2267 	st->num = orig_num;
2268 
2269 	return rc;
2270 }
2271 
2272 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2273 {
2274 	struct tcp_iter_state *st = seq->private;
2275 	void *rc;
2276 
2277 	if (*pos && *pos == st->last_pos) {
2278 		rc = tcp_seek_last_pos(seq);
2279 		if (rc)
2280 			goto out;
2281 	}
2282 
2283 	st->state = TCP_SEQ_STATE_LISTENING;
2284 	st->num = 0;
2285 	st->bucket = 0;
2286 	st->offset = 0;
2287 	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2288 
2289 out:
2290 	st->last_pos = *pos;
2291 	return rc;
2292 }
2293 
2294 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2295 {
2296 	struct tcp_iter_state *st = seq->private;
2297 	void *rc = NULL;
2298 
2299 	if (v == SEQ_START_TOKEN) {
2300 		rc = tcp_get_idx(seq, 0);
2301 		goto out;
2302 	}
2303 
2304 	switch (st->state) {
2305 	case TCP_SEQ_STATE_OPENREQ:
2306 	case TCP_SEQ_STATE_LISTENING:
2307 		rc = listening_get_next(seq, v);
2308 		if (!rc) {
2309 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2310 			st->bucket = 0;
2311 			st->offset = 0;
2312 			rc	  = established_get_first(seq);
2313 		}
2314 		break;
2315 	case TCP_SEQ_STATE_ESTABLISHED:
2316 	case TCP_SEQ_STATE_TIME_WAIT:
2317 		rc = established_get_next(seq, v);
2318 		break;
2319 	}
2320 out:
2321 	++*pos;
2322 	st->last_pos = *pos;
2323 	return rc;
2324 }
2325 
2326 static void tcp_seq_stop(struct seq_file *seq, void *v)
2327 {
2328 	struct tcp_iter_state *st = seq->private;
2329 
2330 	switch (st->state) {
2331 	case TCP_SEQ_STATE_OPENREQ:
2332 		if (v) {
2333 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2334 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2335 		}
2336 	case TCP_SEQ_STATE_LISTENING:
2337 		if (v != SEQ_START_TOKEN)
2338 			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2339 		break;
2340 	case TCP_SEQ_STATE_TIME_WAIT:
2341 	case TCP_SEQ_STATE_ESTABLISHED:
2342 		if (v)
2343 			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2344 		break;
2345 	}
2346 }
2347 
2348 int tcp_seq_open(struct inode *inode, struct file *file)
2349 {
2350 	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2351 	struct tcp_iter_state *s;
2352 	int err;
2353 
2354 	err = seq_open_net(inode, file, &afinfo->seq_ops,
2355 			  sizeof(struct tcp_iter_state));
2356 	if (err < 0)
2357 		return err;
2358 
2359 	s = ((struct seq_file *)file->private_data)->private;
2360 	s->family		= afinfo->family;
2361 	s->last_pos 		= 0;
2362 	return 0;
2363 }
2364 EXPORT_SYMBOL(tcp_seq_open);
2365 
2366 int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2367 {
2368 	int rc = 0;
2369 	struct proc_dir_entry *p;
2370 
2371 	afinfo->seq_ops.start		= tcp_seq_start;
2372 	afinfo->seq_ops.next		= tcp_seq_next;
2373 	afinfo->seq_ops.stop		= tcp_seq_stop;
2374 
2375 	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2376 			     afinfo->seq_fops, afinfo);
2377 	if (!p)
2378 		rc = -ENOMEM;
2379 	return rc;
2380 }
2381 EXPORT_SYMBOL(tcp_proc_register);
2382 
2383 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2384 {
2385 	proc_net_remove(net, afinfo->name);
2386 }
2387 EXPORT_SYMBOL(tcp_proc_unregister);
2388 
2389 static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2390 			 struct seq_file *f, int i, int uid, int *len)
2391 {
2392 	const struct inet_request_sock *ireq = inet_rsk(req);
2393 	int ttd = req->expires - jiffies;
2394 
2395 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2396 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
2397 		i,
2398 		ireq->loc_addr,
2399 		ntohs(inet_sk(sk)->inet_sport),
2400 		ireq->rmt_addr,
2401 		ntohs(ireq->rmt_port),
2402 		TCP_SYN_RECV,
2403 		0, 0, /* could print option size, but that is af dependent. */
2404 		1,    /* timers active (only the expire timer) */
2405 		jiffies_to_clock_t(ttd),
2406 		req->retrans,
2407 		uid,
2408 		0,  /* non standard timer */
2409 		0, /* open_requests have no inode */
2410 		atomic_read(&sk->sk_refcnt),
2411 		req,
2412 		len);
2413 }
2414 
2415 static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2416 {
2417 	int timer_active;
2418 	unsigned long timer_expires;
2419 	const struct tcp_sock *tp = tcp_sk(sk);
2420 	const struct inet_connection_sock *icsk = inet_csk(sk);
2421 	const struct inet_sock *inet = inet_sk(sk);
2422 	__be32 dest = inet->inet_daddr;
2423 	__be32 src = inet->inet_rcv_saddr;
2424 	__u16 destp = ntohs(inet->inet_dport);
2425 	__u16 srcp = ntohs(inet->inet_sport);
2426 	int rx_queue;
2427 
2428 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2429 		timer_active	= 1;
2430 		timer_expires	= icsk->icsk_timeout;
2431 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2432 		timer_active	= 4;
2433 		timer_expires	= icsk->icsk_timeout;
2434 	} else if (timer_pending(&sk->sk_timer)) {
2435 		timer_active	= 2;
2436 		timer_expires	= sk->sk_timer.expires;
2437 	} else {
2438 		timer_active	= 0;
2439 		timer_expires = jiffies;
2440 	}
2441 
2442 	if (sk->sk_state == TCP_LISTEN)
2443 		rx_queue = sk->sk_ack_backlog;
2444 	else
2445 		/*
2446 		 * because we dont lock socket, we might find a transient negative value
2447 		 */
2448 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2449 
2450 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2451 			"%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
2452 		i, src, srcp, dest, destp, sk->sk_state,
2453 		tp->write_seq - tp->snd_una,
2454 		rx_queue,
2455 		timer_active,
2456 		jiffies_to_clock_t(timer_expires - jiffies),
2457 		icsk->icsk_retransmits,
2458 		sock_i_uid(sk),
2459 		icsk->icsk_probes_out,
2460 		sock_i_ino(sk),
2461 		atomic_read(&sk->sk_refcnt), sk,
2462 		jiffies_to_clock_t(icsk->icsk_rto),
2463 		jiffies_to_clock_t(icsk->icsk_ack.ato),
2464 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2465 		tp->snd_cwnd,
2466 		tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2467 		len);
2468 }
2469 
2470 static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2471 			       struct seq_file *f, int i, int *len)
2472 {
2473 	__be32 dest, src;
2474 	__u16 destp, srcp;
2475 	int ttd = tw->tw_ttd - jiffies;
2476 
2477 	if (ttd < 0)
2478 		ttd = 0;
2479 
2480 	dest  = tw->tw_daddr;
2481 	src   = tw->tw_rcv_saddr;
2482 	destp = ntohs(tw->tw_dport);
2483 	srcp  = ntohs(tw->tw_sport);
2484 
2485 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2486 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
2487 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2488 		3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2489 		atomic_read(&tw->tw_refcnt), tw, len);
2490 }
2491 
2492 #define TMPSZ 150
2493 
2494 static int tcp4_seq_show(struct seq_file *seq, void *v)
2495 {
2496 	struct tcp_iter_state *st;
2497 	int len;
2498 
2499 	if (v == SEQ_START_TOKEN) {
2500 		seq_printf(seq, "%-*s\n", TMPSZ - 1,
2501 			   "  sl  local_address rem_address   st tx_queue "
2502 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
2503 			   "inode");
2504 		goto out;
2505 	}
2506 	st = seq->private;
2507 
2508 	switch (st->state) {
2509 	case TCP_SEQ_STATE_LISTENING:
2510 	case TCP_SEQ_STATE_ESTABLISHED:
2511 		get_tcp4_sock(v, seq, st->num, &len);
2512 		break;
2513 	case TCP_SEQ_STATE_OPENREQ:
2514 		get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2515 		break;
2516 	case TCP_SEQ_STATE_TIME_WAIT:
2517 		get_timewait4_sock(v, seq, st->num, &len);
2518 		break;
2519 	}
2520 	seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2521 out:
2522 	return 0;
2523 }
2524 
2525 static const struct file_operations tcp_afinfo_seq_fops = {
2526 	.owner   = THIS_MODULE,
2527 	.open    = tcp_seq_open,
2528 	.read    = seq_read,
2529 	.llseek  = seq_lseek,
2530 	.release = seq_release_net
2531 };
2532 
2533 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2534 	.name		= "tcp",
2535 	.family		= AF_INET,
2536 	.seq_fops	= &tcp_afinfo_seq_fops,
2537 	.seq_ops	= {
2538 		.show		= tcp4_seq_show,
2539 	},
2540 };
2541 
2542 static int __net_init tcp4_proc_init_net(struct net *net)
2543 {
2544 	return tcp_proc_register(net, &tcp4_seq_afinfo);
2545 }
2546 
2547 static void __net_exit tcp4_proc_exit_net(struct net *net)
2548 {
2549 	tcp_proc_unregister(net, &tcp4_seq_afinfo);
2550 }
2551 
2552 static struct pernet_operations tcp4_net_ops = {
2553 	.init = tcp4_proc_init_net,
2554 	.exit = tcp4_proc_exit_net,
2555 };
2556 
2557 int __init tcp4_proc_init(void)
2558 {
2559 	return register_pernet_subsys(&tcp4_net_ops);
2560 }
2561 
2562 void tcp4_proc_exit(void)
2563 {
2564 	unregister_pernet_subsys(&tcp4_net_ops);
2565 }
2566 #endif /* CONFIG_PROC_FS */
2567 
2568 struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2569 {
2570 	const struct iphdr *iph = skb_gro_network_header(skb);
2571 
2572 	switch (skb->ip_summed) {
2573 	case CHECKSUM_COMPLETE:
2574 		if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2575 				  skb->csum)) {
2576 			skb->ip_summed = CHECKSUM_UNNECESSARY;
2577 			break;
2578 		}
2579 
2580 		/* fall through */
2581 	case CHECKSUM_NONE:
2582 		NAPI_GRO_CB(skb)->flush = 1;
2583 		return NULL;
2584 	}
2585 
2586 	return tcp_gro_receive(head, skb);
2587 }
2588 
2589 int tcp4_gro_complete(struct sk_buff *skb)
2590 {
2591 	const struct iphdr *iph = ip_hdr(skb);
2592 	struct tcphdr *th = tcp_hdr(skb);
2593 
2594 	th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2595 				  iph->saddr, iph->daddr, 0);
2596 	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2597 
2598 	return tcp_gro_complete(skb);
2599 }
2600 
2601 struct proto tcp_prot = {
2602 	.name			= "TCP",
2603 	.owner			= THIS_MODULE,
2604 	.close			= tcp_close,
2605 	.connect		= tcp_v4_connect,
2606 	.disconnect		= tcp_disconnect,
2607 	.accept			= inet_csk_accept,
2608 	.ioctl			= tcp_ioctl,
2609 	.init			= tcp_v4_init_sock,
2610 	.destroy		= tcp_v4_destroy_sock,
2611 	.shutdown		= tcp_shutdown,
2612 	.setsockopt		= tcp_setsockopt,
2613 	.getsockopt		= tcp_getsockopt,
2614 	.recvmsg		= tcp_recvmsg,
2615 	.sendmsg		= tcp_sendmsg,
2616 	.sendpage		= tcp_sendpage,
2617 	.backlog_rcv		= tcp_v4_do_rcv,
2618 	.hash			= inet_hash,
2619 	.unhash			= inet_unhash,
2620 	.get_port		= inet_csk_get_port,
2621 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2622 	.sockets_allocated	= &tcp_sockets_allocated,
2623 	.orphan_count		= &tcp_orphan_count,
2624 	.memory_allocated	= &tcp_memory_allocated,
2625 	.memory_pressure	= &tcp_memory_pressure,
2626 	.sysctl_wmem		= sysctl_tcp_wmem,
2627 	.sysctl_rmem		= sysctl_tcp_rmem,
2628 	.max_header		= MAX_TCP_HEADER,
2629 	.obj_size		= sizeof(struct tcp_sock),
2630 	.slab_flags		= SLAB_DESTROY_BY_RCU,
2631 	.twsk_prot		= &tcp_timewait_sock_ops,
2632 	.rsk_prot		= &tcp_request_sock_ops,
2633 	.h.hashinfo		= &tcp_hashinfo,
2634 	.no_autobind		= true,
2635 #ifdef CONFIG_COMPAT
2636 	.compat_setsockopt	= compat_tcp_setsockopt,
2637 	.compat_getsockopt	= compat_tcp_getsockopt,
2638 #endif
2639 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
2640 	.init_cgroup		= tcp_init_cgroup,
2641 	.destroy_cgroup		= tcp_destroy_cgroup,
2642 	.proto_cgroup		= tcp_proto_cgroup,
2643 #endif
2644 };
2645 EXPORT_SYMBOL(tcp_prot);
2646 
2647 static int __net_init tcp_sk_init(struct net *net)
2648 {
2649 	return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2650 				    PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2651 }
2652 
2653 static void __net_exit tcp_sk_exit(struct net *net)
2654 {
2655 	inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2656 }
2657 
2658 static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2659 {
2660 	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2661 }
2662 
2663 static struct pernet_operations __net_initdata tcp_sk_ops = {
2664        .init	   = tcp_sk_init,
2665        .exit	   = tcp_sk_exit,
2666        .exit_batch = tcp_sk_exit_batch,
2667 };
2668 
2669 void __init tcp_v4_init(void)
2670 {
2671 	inet_hashinfo_init(&tcp_hashinfo);
2672 	if (register_pernet_subsys(&tcp_sk_ops))
2673 		panic("Failed to create the TCP control socket.\n");
2674 }
2675