xref: /linux/net/ipv4/tcp_ipv4.c (revision a508da6cc0093171833efb8376b00473f24221b9)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Implementation of the Transmission Control Protocol(TCP).
7  *
8  *		IPv4 specific functions
9  *
10  *
11  *		code split from:
12  *		linux/ipv4/tcp.c
13  *		linux/ipv4/tcp_input.c
14  *		linux/ipv4/tcp_output.c
15  *
16  *		See tcp.c for author information
17  *
18  *	This program is free software; you can redistribute it and/or
19  *      modify it under the terms of the GNU General Public License
20  *      as published by the Free Software Foundation; either version
21  *      2 of the License, or (at your option) any later version.
22  */
23 
24 /*
25  * Changes:
26  *		David S. Miller	:	New socket lookup architecture.
27  *					This code is dedicated to John Dyson.
28  *		David S. Miller :	Change semantics of established hash,
29  *					half is devoted to TIME_WAIT sockets
30  *					and the rest go in the other half.
31  *		Andi Kleen :		Add support for syncookies and fixed
32  *					some bugs: ip options weren't passed to
33  *					the TCP layer, missed a check for an
34  *					ACK bit.
35  *		Andi Kleen :		Implemented fast path mtu discovery.
36  *	     				Fixed many serious bugs in the
37  *					request_sock handling and moved
38  *					most of it into the af independent code.
39  *					Added tail drop and some other bugfixes.
40  *					Added new listen semantics.
41  *		Mike McLagan	:	Routing by source
42  *	Juan Jose Ciarlante:		ip_dynaddr bits
43  *		Andi Kleen:		various fixes.
44  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
45  *					coma.
46  *	Andi Kleen		:	Fix new listen.
47  *	Andi Kleen		:	Fix accept error reporting.
48  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
49  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
50  *					a single port at the same time.
51  */
52 
53 #define pr_fmt(fmt) "TCP: " fmt
54 
55 #include <linux/bottom_half.h>
56 #include <linux/types.h>
57 #include <linux/fcntl.h>
58 #include <linux/module.h>
59 #include <linux/random.h>
60 #include <linux/cache.h>
61 #include <linux/jhash.h>
62 #include <linux/init.h>
63 #include <linux/times.h>
64 #include <linux/slab.h>
65 
66 #include <net/net_namespace.h>
67 #include <net/icmp.h>
68 #include <net/inet_hashtables.h>
69 #include <net/tcp.h>
70 #include <net/transp_v6.h>
71 #include <net/ipv6.h>
72 #include <net/inet_common.h>
73 #include <net/timewait_sock.h>
74 #include <net/xfrm.h>
75 #include <net/netdma.h>
76 #include <net/secure_seq.h>
77 #include <net/tcp_memcontrol.h>
78 
79 #include <linux/inet.h>
80 #include <linux/ipv6.h>
81 #include <linux/stddef.h>
82 #include <linux/proc_fs.h>
83 #include <linux/seq_file.h>
84 
85 #include <linux/crypto.h>
86 #include <linux/scatterlist.h>
87 
88 int sysctl_tcp_tw_reuse __read_mostly;
89 int sysctl_tcp_low_latency __read_mostly;
90 EXPORT_SYMBOL(sysctl_tcp_low_latency);
91 
92 
93 #ifdef CONFIG_TCP_MD5SIG
94 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
95 			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
96 #endif
97 
98 struct inet_hashinfo tcp_hashinfo;
99 EXPORT_SYMBOL(tcp_hashinfo);
100 
101 static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
102 {
103 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
104 					  ip_hdr(skb)->saddr,
105 					  tcp_hdr(skb)->dest,
106 					  tcp_hdr(skb)->source);
107 }
108 
109 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
110 {
111 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
112 	struct tcp_sock *tp = tcp_sk(sk);
113 
114 	/* With PAWS, it is safe from the viewpoint
115 	   of data integrity. Even without PAWS it is safe provided sequence
116 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
117 
118 	   Actually, the idea is close to VJ's one, only timestamp cache is
119 	   held not per host, but per port pair and TW bucket is used as state
120 	   holder.
121 
122 	   If TW bucket has been already destroyed we fall back to VJ's scheme
123 	   and use initial timestamp retrieved from peer table.
124 	 */
125 	if (tcptw->tw_ts_recent_stamp &&
126 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
127 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
128 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
129 		if (tp->write_seq == 0)
130 			tp->write_seq = 1;
131 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
132 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
133 		sock_hold(sktw);
134 		return 1;
135 	}
136 
137 	return 0;
138 }
139 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
140 
141 static int tcp_repair_connect(struct sock *sk)
142 {
143 	tcp_connect_init(sk);
144 	tcp_finish_connect(sk, NULL);
145 
146 	return 0;
147 }
148 
149 /* This will initiate an outgoing connection. */
150 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
151 {
152 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
153 	struct inet_sock *inet = inet_sk(sk);
154 	struct tcp_sock *tp = tcp_sk(sk);
155 	__be16 orig_sport, orig_dport;
156 	__be32 daddr, nexthop;
157 	struct flowi4 *fl4;
158 	struct rtable *rt;
159 	int err;
160 	struct ip_options_rcu *inet_opt;
161 
162 	if (addr_len < sizeof(struct sockaddr_in))
163 		return -EINVAL;
164 
165 	if (usin->sin_family != AF_INET)
166 		return -EAFNOSUPPORT;
167 
168 	nexthop = daddr = usin->sin_addr.s_addr;
169 	inet_opt = rcu_dereference_protected(inet->inet_opt,
170 					     sock_owned_by_user(sk));
171 	if (inet_opt && inet_opt->opt.srr) {
172 		if (!daddr)
173 			return -EINVAL;
174 		nexthop = inet_opt->opt.faddr;
175 	}
176 
177 	orig_sport = inet->inet_sport;
178 	orig_dport = usin->sin_port;
179 	fl4 = &inet->cork.fl.u.ip4;
180 	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
181 			      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
182 			      IPPROTO_TCP,
183 			      orig_sport, orig_dport, sk, true);
184 	if (IS_ERR(rt)) {
185 		err = PTR_ERR(rt);
186 		if (err == -ENETUNREACH)
187 			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
188 		return err;
189 	}
190 
191 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
192 		ip_rt_put(rt);
193 		return -ENETUNREACH;
194 	}
195 
196 	if (!inet_opt || !inet_opt->opt.srr)
197 		daddr = fl4->daddr;
198 
199 	if (!inet->inet_saddr)
200 		inet->inet_saddr = fl4->saddr;
201 	inet->inet_rcv_saddr = inet->inet_saddr;
202 
203 	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
204 		/* Reset inherited state */
205 		tp->rx_opt.ts_recent	   = 0;
206 		tp->rx_opt.ts_recent_stamp = 0;
207 		if (likely(!tp->repair))
208 			tp->write_seq	   = 0;
209 	}
210 
211 	if (tcp_death_row.sysctl_tw_recycle &&
212 	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
213 		struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
214 		/*
215 		 * VJ's idea. We save last timestamp seen from
216 		 * the destination in peer table, when entering state
217 		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
218 		 * when trying new connection.
219 		 */
220 		if (peer) {
221 			inet_peer_refcheck(peer);
222 			if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
223 				tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
224 				tp->rx_opt.ts_recent = peer->tcp_ts;
225 			}
226 		}
227 	}
228 
229 	inet->inet_dport = usin->sin_port;
230 	inet->inet_daddr = daddr;
231 
232 	inet_csk(sk)->icsk_ext_hdr_len = 0;
233 	if (inet_opt)
234 		inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
235 
236 	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
237 
238 	/* Socket identity is still unknown (sport may be zero).
239 	 * However we set state to SYN-SENT and not releasing socket
240 	 * lock select source port, enter ourselves into the hash tables and
241 	 * complete initialization after this.
242 	 */
243 	tcp_set_state(sk, TCP_SYN_SENT);
244 	err = inet_hash_connect(&tcp_death_row, sk);
245 	if (err)
246 		goto failure;
247 
248 	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
249 			       inet->inet_sport, inet->inet_dport, sk);
250 	if (IS_ERR(rt)) {
251 		err = PTR_ERR(rt);
252 		rt = NULL;
253 		goto failure;
254 	}
255 	/* OK, now commit destination to socket.  */
256 	sk->sk_gso_type = SKB_GSO_TCPV4;
257 	sk_setup_caps(sk, &rt->dst);
258 
259 	if (!tp->write_seq && likely(!tp->repair))
260 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
261 							   inet->inet_daddr,
262 							   inet->inet_sport,
263 							   usin->sin_port);
264 
265 	inet->inet_id = tp->write_seq ^ jiffies;
266 
267 	if (likely(!tp->repair))
268 		err = tcp_connect(sk);
269 	else
270 		err = tcp_repair_connect(sk);
271 
272 	rt = NULL;
273 	if (err)
274 		goto failure;
275 
276 	return 0;
277 
278 failure:
279 	/*
280 	 * This unhashes the socket and releases the local port,
281 	 * if necessary.
282 	 */
283 	tcp_set_state(sk, TCP_CLOSE);
284 	ip_rt_put(rt);
285 	sk->sk_route_caps = 0;
286 	inet->inet_dport = 0;
287 	return err;
288 }
289 EXPORT_SYMBOL(tcp_v4_connect);
290 
291 /*
292  * This routine does path mtu discovery as defined in RFC1191.
293  */
294 static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
295 {
296 	struct dst_entry *dst;
297 	struct inet_sock *inet = inet_sk(sk);
298 
299 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
300 	 * send out by Linux are always <576bytes so they should go through
301 	 * unfragmented).
302 	 */
303 	if (sk->sk_state == TCP_LISTEN)
304 		return;
305 
306 	/* We don't check in the destentry if pmtu discovery is forbidden
307 	 * on this route. We just assume that no packet_to_big packets
308 	 * are send back when pmtu discovery is not active.
309 	 * There is a small race when the user changes this flag in the
310 	 * route, but I think that's acceptable.
311 	 */
312 	if ((dst = __sk_dst_check(sk, 0)) == NULL)
313 		return;
314 
315 	dst->ops->update_pmtu(dst, mtu);
316 
317 	/* Something is about to be wrong... Remember soft error
318 	 * for the case, if this connection will not able to recover.
319 	 */
320 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
321 		sk->sk_err_soft = EMSGSIZE;
322 
323 	mtu = dst_mtu(dst);
324 
325 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
326 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
327 		tcp_sync_mss(sk, mtu);
328 
329 		/* Resend the TCP packet because it's
330 		 * clear that the old packet has been
331 		 * dropped. This is the new "fast" path mtu
332 		 * discovery.
333 		 */
334 		tcp_simple_retransmit(sk);
335 	} /* else let the usual retransmit timer handle it */
336 }
337 
338 /*
339  * This routine is called by the ICMP module when it gets some
340  * sort of error condition.  If err < 0 then the socket should
341  * be closed and the error returned to the user.  If err > 0
342  * it's just the icmp type << 8 | icmp code.  After adjustment
343  * header points to the first 8 bytes of the tcp header.  We need
344  * to find the appropriate port.
345  *
346  * The locking strategy used here is very "optimistic". When
347  * someone else accesses the socket the ICMP is just dropped
348  * and for some paths there is no check at all.
349  * A more general error queue to queue errors for later handling
350  * is probably better.
351  *
352  */
353 
354 void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
355 {
356 	const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
357 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
358 	struct inet_connection_sock *icsk;
359 	struct tcp_sock *tp;
360 	struct inet_sock *inet;
361 	const int type = icmp_hdr(icmp_skb)->type;
362 	const int code = icmp_hdr(icmp_skb)->code;
363 	struct sock *sk;
364 	struct sk_buff *skb;
365 	__u32 seq;
366 	__u32 remaining;
367 	int err;
368 	struct net *net = dev_net(icmp_skb->dev);
369 
370 	if (icmp_skb->len < (iph->ihl << 2) + 8) {
371 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
372 		return;
373 	}
374 
375 	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
376 			iph->saddr, th->source, inet_iif(icmp_skb));
377 	if (!sk) {
378 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
379 		return;
380 	}
381 	if (sk->sk_state == TCP_TIME_WAIT) {
382 		inet_twsk_put(inet_twsk(sk));
383 		return;
384 	}
385 
386 	bh_lock_sock(sk);
387 	/* If too many ICMPs get dropped on busy
388 	 * servers this needs to be solved differently.
389 	 */
390 	if (sock_owned_by_user(sk))
391 		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
392 
393 	if (sk->sk_state == TCP_CLOSE)
394 		goto out;
395 
396 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
397 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
398 		goto out;
399 	}
400 
401 	icsk = inet_csk(sk);
402 	tp = tcp_sk(sk);
403 	seq = ntohl(th->seq);
404 	if (sk->sk_state != TCP_LISTEN &&
405 	    !between(seq, tp->snd_una, tp->snd_nxt)) {
406 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
407 		goto out;
408 	}
409 
410 	switch (type) {
411 	case ICMP_SOURCE_QUENCH:
412 		/* Just silently ignore these. */
413 		goto out;
414 	case ICMP_PARAMETERPROB:
415 		err = EPROTO;
416 		break;
417 	case ICMP_DEST_UNREACH:
418 		if (code > NR_ICMP_UNREACH)
419 			goto out;
420 
421 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
422 			if (!sock_owned_by_user(sk))
423 				do_pmtu_discovery(sk, iph, info);
424 			goto out;
425 		}
426 
427 		err = icmp_err_convert[code].errno;
428 		/* check if icmp_skb allows revert of backoff
429 		 * (see draft-zimmermann-tcp-lcd) */
430 		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
431 			break;
432 		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
433 		    !icsk->icsk_backoff)
434 			break;
435 
436 		if (sock_owned_by_user(sk))
437 			break;
438 
439 		icsk->icsk_backoff--;
440 		inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
441 			TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
442 		tcp_bound_rto(sk);
443 
444 		skb = tcp_write_queue_head(sk);
445 		BUG_ON(!skb);
446 
447 		remaining = icsk->icsk_rto - min(icsk->icsk_rto,
448 				tcp_time_stamp - TCP_SKB_CB(skb)->when);
449 
450 		if (remaining) {
451 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
452 						  remaining, TCP_RTO_MAX);
453 		} else {
454 			/* RTO revert clocked out retransmission.
455 			 * Will retransmit now */
456 			tcp_retransmit_timer(sk);
457 		}
458 
459 		break;
460 	case ICMP_TIME_EXCEEDED:
461 		err = EHOSTUNREACH;
462 		break;
463 	default:
464 		goto out;
465 	}
466 
467 	switch (sk->sk_state) {
468 		struct request_sock *req, **prev;
469 	case TCP_LISTEN:
470 		if (sock_owned_by_user(sk))
471 			goto out;
472 
473 		req = inet_csk_search_req(sk, &prev, th->dest,
474 					  iph->daddr, iph->saddr);
475 		if (!req)
476 			goto out;
477 
478 		/* ICMPs are not backlogged, hence we cannot get
479 		   an established socket here.
480 		 */
481 		WARN_ON(req->sk);
482 
483 		if (seq != tcp_rsk(req)->snt_isn) {
484 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
485 			goto out;
486 		}
487 
488 		/*
489 		 * Still in SYN_RECV, just remove it silently.
490 		 * There is no good way to pass the error to the newly
491 		 * created socket, and POSIX does not want network
492 		 * errors returned from accept().
493 		 */
494 		inet_csk_reqsk_queue_drop(sk, req, prev);
495 		goto out;
496 
497 	case TCP_SYN_SENT:
498 	case TCP_SYN_RECV:  /* Cannot happen.
499 			       It can f.e. if SYNs crossed.
500 			     */
501 		if (!sock_owned_by_user(sk)) {
502 			sk->sk_err = err;
503 
504 			sk->sk_error_report(sk);
505 
506 			tcp_done(sk);
507 		} else {
508 			sk->sk_err_soft = err;
509 		}
510 		goto out;
511 	}
512 
513 	/* If we've already connected we will keep trying
514 	 * until we time out, or the user gives up.
515 	 *
516 	 * rfc1122 4.2.3.9 allows to consider as hard errors
517 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
518 	 * but it is obsoleted by pmtu discovery).
519 	 *
520 	 * Note, that in modern internet, where routing is unreliable
521 	 * and in each dark corner broken firewalls sit, sending random
522 	 * errors ordered by their masters even this two messages finally lose
523 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
524 	 *
525 	 * Now we are in compliance with RFCs.
526 	 *							--ANK (980905)
527 	 */
528 
529 	inet = inet_sk(sk);
530 	if (!sock_owned_by_user(sk) && inet->recverr) {
531 		sk->sk_err = err;
532 		sk->sk_error_report(sk);
533 	} else	{ /* Only an error on timeout */
534 		sk->sk_err_soft = err;
535 	}
536 
537 out:
538 	bh_unlock_sock(sk);
539 	sock_put(sk);
540 }
541 
542 static void __tcp_v4_send_check(struct sk_buff *skb,
543 				__be32 saddr, __be32 daddr)
544 {
545 	struct tcphdr *th = tcp_hdr(skb);
546 
547 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
548 		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
549 		skb->csum_start = skb_transport_header(skb) - skb->head;
550 		skb->csum_offset = offsetof(struct tcphdr, check);
551 	} else {
552 		th->check = tcp_v4_check(skb->len, saddr, daddr,
553 					 csum_partial(th,
554 						      th->doff << 2,
555 						      skb->csum));
556 	}
557 }
558 
559 /* This routine computes an IPv4 TCP checksum. */
560 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
561 {
562 	const struct inet_sock *inet = inet_sk(sk);
563 
564 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
565 }
566 EXPORT_SYMBOL(tcp_v4_send_check);
567 
568 int tcp_v4_gso_send_check(struct sk_buff *skb)
569 {
570 	const struct iphdr *iph;
571 	struct tcphdr *th;
572 
573 	if (!pskb_may_pull(skb, sizeof(*th)))
574 		return -EINVAL;
575 
576 	iph = ip_hdr(skb);
577 	th = tcp_hdr(skb);
578 
579 	th->check = 0;
580 	skb->ip_summed = CHECKSUM_PARTIAL;
581 	__tcp_v4_send_check(skb, iph->saddr, iph->daddr);
582 	return 0;
583 }
584 
585 /*
586  *	This routine will send an RST to the other tcp.
587  *
588  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
589  *		      for reset.
590  *	Answer: if a packet caused RST, it is not for a socket
591  *		existing in our system, if it is matched to a socket,
592  *		it is just duplicate segment or bug in other side's TCP.
593  *		So that we build reply only basing on parameters
594  *		arrived with segment.
595  *	Exception: precedence violation. We do not implement it in any case.
596  */
597 
598 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
599 {
600 	const struct tcphdr *th = tcp_hdr(skb);
601 	struct {
602 		struct tcphdr th;
603 #ifdef CONFIG_TCP_MD5SIG
604 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
605 #endif
606 	} rep;
607 	struct ip_reply_arg arg;
608 #ifdef CONFIG_TCP_MD5SIG
609 	struct tcp_md5sig_key *key;
610 	const __u8 *hash_location = NULL;
611 	unsigned char newhash[16];
612 	int genhash;
613 	struct sock *sk1 = NULL;
614 #endif
615 	struct net *net;
616 
617 	/* Never send a reset in response to a reset. */
618 	if (th->rst)
619 		return;
620 
621 	if (skb_rtable(skb)->rt_type != RTN_LOCAL)
622 		return;
623 
624 	/* Swap the send and the receive. */
625 	memset(&rep, 0, sizeof(rep));
626 	rep.th.dest   = th->source;
627 	rep.th.source = th->dest;
628 	rep.th.doff   = sizeof(struct tcphdr) / 4;
629 	rep.th.rst    = 1;
630 
631 	if (th->ack) {
632 		rep.th.seq = th->ack_seq;
633 	} else {
634 		rep.th.ack = 1;
635 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
636 				       skb->len - (th->doff << 2));
637 	}
638 
639 	memset(&arg, 0, sizeof(arg));
640 	arg.iov[0].iov_base = (unsigned char *)&rep;
641 	arg.iov[0].iov_len  = sizeof(rep.th);
642 
643 #ifdef CONFIG_TCP_MD5SIG
644 	hash_location = tcp_parse_md5sig_option(th);
645 	if (!sk && hash_location) {
646 		/*
647 		 * active side is lost. Try to find listening socket through
648 		 * source port, and then find md5 key through listening socket.
649 		 * we are not loose security here:
650 		 * Incoming packet is checked with md5 hash with finding key,
651 		 * no RST generated if md5 hash doesn't match.
652 		 */
653 		sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
654 					     &tcp_hashinfo, ip_hdr(skb)->daddr,
655 					     ntohs(th->source), inet_iif(skb));
656 		/* don't send rst if it can't find key */
657 		if (!sk1)
658 			return;
659 		rcu_read_lock();
660 		key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
661 					&ip_hdr(skb)->saddr, AF_INET);
662 		if (!key)
663 			goto release_sk1;
664 
665 		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
666 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
667 			goto release_sk1;
668 	} else {
669 		key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
670 					     &ip_hdr(skb)->saddr,
671 					     AF_INET) : NULL;
672 	}
673 
674 	if (key) {
675 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
676 				   (TCPOPT_NOP << 16) |
677 				   (TCPOPT_MD5SIG << 8) |
678 				   TCPOLEN_MD5SIG);
679 		/* Update length and the length the header thinks exists */
680 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
681 		rep.th.doff = arg.iov[0].iov_len / 4;
682 
683 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
684 				     key, ip_hdr(skb)->saddr,
685 				     ip_hdr(skb)->daddr, &rep.th);
686 	}
687 #endif
688 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
689 				      ip_hdr(skb)->saddr, /* XXX */
690 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
691 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
692 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
693 	/* When socket is gone, all binding information is lost.
694 	 * routing might fail in this case. using iif for oif to
695 	 * make sure we can deliver it
696 	 */
697 	arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
698 
699 	net = dev_net(skb_dst(skb)->dev);
700 	arg.tos = ip_hdr(skb)->tos;
701 	ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
702 		      &arg, arg.iov[0].iov_len);
703 
704 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
705 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
706 
707 #ifdef CONFIG_TCP_MD5SIG
708 release_sk1:
709 	if (sk1) {
710 		rcu_read_unlock();
711 		sock_put(sk1);
712 	}
713 #endif
714 }
715 
716 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
717    outside socket context is ugly, certainly. What can I do?
718  */
719 
720 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
721 			    u32 win, u32 ts, int oif,
722 			    struct tcp_md5sig_key *key,
723 			    int reply_flags, u8 tos)
724 {
725 	const struct tcphdr *th = tcp_hdr(skb);
726 	struct {
727 		struct tcphdr th;
728 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
729 #ifdef CONFIG_TCP_MD5SIG
730 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
731 #endif
732 			];
733 	} rep;
734 	struct ip_reply_arg arg;
735 	struct net *net = dev_net(skb_dst(skb)->dev);
736 
737 	memset(&rep.th, 0, sizeof(struct tcphdr));
738 	memset(&arg, 0, sizeof(arg));
739 
740 	arg.iov[0].iov_base = (unsigned char *)&rep;
741 	arg.iov[0].iov_len  = sizeof(rep.th);
742 	if (ts) {
743 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
744 				   (TCPOPT_TIMESTAMP << 8) |
745 				   TCPOLEN_TIMESTAMP);
746 		rep.opt[1] = htonl(tcp_time_stamp);
747 		rep.opt[2] = htonl(ts);
748 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
749 	}
750 
751 	/* Swap the send and the receive. */
752 	rep.th.dest    = th->source;
753 	rep.th.source  = th->dest;
754 	rep.th.doff    = arg.iov[0].iov_len / 4;
755 	rep.th.seq     = htonl(seq);
756 	rep.th.ack_seq = htonl(ack);
757 	rep.th.ack     = 1;
758 	rep.th.window  = htons(win);
759 
760 #ifdef CONFIG_TCP_MD5SIG
761 	if (key) {
762 		int offset = (ts) ? 3 : 0;
763 
764 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
765 					  (TCPOPT_NOP << 16) |
766 					  (TCPOPT_MD5SIG << 8) |
767 					  TCPOLEN_MD5SIG);
768 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
769 		rep.th.doff = arg.iov[0].iov_len/4;
770 
771 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
772 				    key, ip_hdr(skb)->saddr,
773 				    ip_hdr(skb)->daddr, &rep.th);
774 	}
775 #endif
776 	arg.flags = reply_flags;
777 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
778 				      ip_hdr(skb)->saddr, /* XXX */
779 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
780 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
781 	if (oif)
782 		arg.bound_dev_if = oif;
783 	arg.tos = tos;
784 	ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
785 		      &arg, arg.iov[0].iov_len);
786 
787 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
788 }
789 
790 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
791 {
792 	struct inet_timewait_sock *tw = inet_twsk(sk);
793 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
794 
795 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
796 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
797 			tcptw->tw_ts_recent,
798 			tw->tw_bound_dev_if,
799 			tcp_twsk_md5_key(tcptw),
800 			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
801 			tw->tw_tos
802 			);
803 
804 	inet_twsk_put(tw);
805 }
806 
807 static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
808 				  struct request_sock *req)
809 {
810 	tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
811 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
812 			req->ts_recent,
813 			0,
814 			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
815 					  AF_INET),
816 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
817 			ip_hdr(skb)->tos);
818 }
819 
820 /*
821  *	Send a SYN-ACK after having received a SYN.
822  *	This still operates on a request_sock only, not on a big
823  *	socket.
824  */
825 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
826 			      struct request_sock *req,
827 			      struct request_values *rvp)
828 {
829 	const struct inet_request_sock *ireq = inet_rsk(req);
830 	struct flowi4 fl4;
831 	int err = -1;
832 	struct sk_buff * skb;
833 
834 	/* First, grab a route. */
835 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
836 		return -1;
837 
838 	skb = tcp_make_synack(sk, dst, req, rvp);
839 
840 	if (skb) {
841 		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
842 
843 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
844 					    ireq->rmt_addr,
845 					    ireq->opt);
846 		err = net_xmit_eval(err);
847 	}
848 
849 	dst_release(dst);
850 	return err;
851 }
852 
853 static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
854 			      struct request_values *rvp)
855 {
856 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
857 	return tcp_v4_send_synack(sk, NULL, req, rvp);
858 }
859 
860 /*
861  *	IPv4 request_sock destructor.
862  */
863 static void tcp_v4_reqsk_destructor(struct request_sock *req)
864 {
865 	kfree(inet_rsk(req)->opt);
866 }
867 
868 /*
869  * Return true if a syncookie should be sent
870  */
871 bool tcp_syn_flood_action(struct sock *sk,
872 			 const struct sk_buff *skb,
873 			 const char *proto)
874 {
875 	const char *msg = "Dropping request";
876 	bool want_cookie = false;
877 	struct listen_sock *lopt;
878 
879 
880 
881 #ifdef CONFIG_SYN_COOKIES
882 	if (sysctl_tcp_syncookies) {
883 		msg = "Sending cookies";
884 		want_cookie = true;
885 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
886 	} else
887 #endif
888 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
889 
890 	lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
891 	if (!lopt->synflood_warned) {
892 		lopt->synflood_warned = 1;
893 		pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
894 			proto, ntohs(tcp_hdr(skb)->dest), msg);
895 	}
896 	return want_cookie;
897 }
898 EXPORT_SYMBOL(tcp_syn_flood_action);
899 
900 /*
901  * Save and compile IPv4 options into the request_sock if needed.
902  */
903 static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
904 						  struct sk_buff *skb)
905 {
906 	const struct ip_options *opt = &(IPCB(skb)->opt);
907 	struct ip_options_rcu *dopt = NULL;
908 
909 	if (opt && opt->optlen) {
910 		int opt_size = sizeof(*dopt) + opt->optlen;
911 
912 		dopt = kmalloc(opt_size, GFP_ATOMIC);
913 		if (dopt) {
914 			if (ip_options_echo(&dopt->opt, skb)) {
915 				kfree(dopt);
916 				dopt = NULL;
917 			}
918 		}
919 	}
920 	return dopt;
921 }
922 
923 #ifdef CONFIG_TCP_MD5SIG
924 /*
925  * RFC2385 MD5 checksumming requires a mapping of
926  * IP address->MD5 Key.
927  * We need to maintain these in the sk structure.
928  */
929 
930 /* Find the Key structure for an address.  */
931 struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
932 					 const union tcp_md5_addr *addr,
933 					 int family)
934 {
935 	struct tcp_sock *tp = tcp_sk(sk);
936 	struct tcp_md5sig_key *key;
937 	struct hlist_node *pos;
938 	unsigned int size = sizeof(struct in_addr);
939 	struct tcp_md5sig_info *md5sig;
940 
941 	/* caller either holds rcu_read_lock() or socket lock */
942 	md5sig = rcu_dereference_check(tp->md5sig_info,
943 				       sock_owned_by_user(sk) ||
944 				       lockdep_is_held(&sk->sk_lock.slock));
945 	if (!md5sig)
946 		return NULL;
947 #if IS_ENABLED(CONFIG_IPV6)
948 	if (family == AF_INET6)
949 		size = sizeof(struct in6_addr);
950 #endif
951 	hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
952 		if (key->family != family)
953 			continue;
954 		if (!memcmp(&key->addr, addr, size))
955 			return key;
956 	}
957 	return NULL;
958 }
959 EXPORT_SYMBOL(tcp_md5_do_lookup);
960 
961 struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
962 					 struct sock *addr_sk)
963 {
964 	union tcp_md5_addr *addr;
965 
966 	addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
967 	return tcp_md5_do_lookup(sk, addr, AF_INET);
968 }
969 EXPORT_SYMBOL(tcp_v4_md5_lookup);
970 
971 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
972 						      struct request_sock *req)
973 {
974 	union tcp_md5_addr *addr;
975 
976 	addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
977 	return tcp_md5_do_lookup(sk, addr, AF_INET);
978 }
979 
980 /* This can be called on a newly created socket, from other files */
981 int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
982 		   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
983 {
984 	/* Add Key to the list */
985 	struct tcp_md5sig_key *key;
986 	struct tcp_sock *tp = tcp_sk(sk);
987 	struct tcp_md5sig_info *md5sig;
988 
989 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
990 	if (key) {
991 		/* Pre-existing entry - just update that one. */
992 		memcpy(key->key, newkey, newkeylen);
993 		key->keylen = newkeylen;
994 		return 0;
995 	}
996 
997 	md5sig = rcu_dereference_protected(tp->md5sig_info,
998 					   sock_owned_by_user(sk));
999 	if (!md5sig) {
1000 		md5sig = kmalloc(sizeof(*md5sig), gfp);
1001 		if (!md5sig)
1002 			return -ENOMEM;
1003 
1004 		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1005 		INIT_HLIST_HEAD(&md5sig->head);
1006 		rcu_assign_pointer(tp->md5sig_info, md5sig);
1007 	}
1008 
1009 	key = sock_kmalloc(sk, sizeof(*key), gfp);
1010 	if (!key)
1011 		return -ENOMEM;
1012 	if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
1013 		sock_kfree_s(sk, key, sizeof(*key));
1014 		return -ENOMEM;
1015 	}
1016 
1017 	memcpy(key->key, newkey, newkeylen);
1018 	key->keylen = newkeylen;
1019 	key->family = family;
1020 	memcpy(&key->addr, addr,
1021 	       (family == AF_INET6) ? sizeof(struct in6_addr) :
1022 				      sizeof(struct in_addr));
1023 	hlist_add_head_rcu(&key->node, &md5sig->head);
1024 	return 0;
1025 }
1026 EXPORT_SYMBOL(tcp_md5_do_add);
1027 
1028 int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
1029 {
1030 	struct tcp_sock *tp = tcp_sk(sk);
1031 	struct tcp_md5sig_key *key;
1032 	struct tcp_md5sig_info *md5sig;
1033 
1034 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
1035 	if (!key)
1036 		return -ENOENT;
1037 	hlist_del_rcu(&key->node);
1038 	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1039 	kfree_rcu(key, rcu);
1040 	md5sig = rcu_dereference_protected(tp->md5sig_info,
1041 					   sock_owned_by_user(sk));
1042 	if (hlist_empty(&md5sig->head))
1043 		tcp_free_md5sig_pool();
1044 	return 0;
1045 }
1046 EXPORT_SYMBOL(tcp_md5_do_del);
1047 
1048 void tcp_clear_md5_list(struct sock *sk)
1049 {
1050 	struct tcp_sock *tp = tcp_sk(sk);
1051 	struct tcp_md5sig_key *key;
1052 	struct hlist_node *pos, *n;
1053 	struct tcp_md5sig_info *md5sig;
1054 
1055 	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1056 
1057 	if (!hlist_empty(&md5sig->head))
1058 		tcp_free_md5sig_pool();
1059 	hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
1060 		hlist_del_rcu(&key->node);
1061 		atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1062 		kfree_rcu(key, rcu);
1063 	}
1064 }
1065 
1066 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1067 				 int optlen)
1068 {
1069 	struct tcp_md5sig cmd;
1070 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1071 
1072 	if (optlen < sizeof(cmd))
1073 		return -EINVAL;
1074 
1075 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
1076 		return -EFAULT;
1077 
1078 	if (sin->sin_family != AF_INET)
1079 		return -EINVAL;
1080 
1081 	if (!cmd.tcpm_key || !cmd.tcpm_keylen)
1082 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1083 				      AF_INET);
1084 
1085 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1086 		return -EINVAL;
1087 
1088 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1089 			      AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1090 			      GFP_KERNEL);
1091 }
1092 
1093 static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1094 					__be32 daddr, __be32 saddr, int nbytes)
1095 {
1096 	struct tcp4_pseudohdr *bp;
1097 	struct scatterlist sg;
1098 
1099 	bp = &hp->md5_blk.ip4;
1100 
1101 	/*
1102 	 * 1. the TCP pseudo-header (in the order: source IP address,
1103 	 * destination IP address, zero-padded protocol number, and
1104 	 * segment length)
1105 	 */
1106 	bp->saddr = saddr;
1107 	bp->daddr = daddr;
1108 	bp->pad = 0;
1109 	bp->protocol = IPPROTO_TCP;
1110 	bp->len = cpu_to_be16(nbytes);
1111 
1112 	sg_init_one(&sg, bp, sizeof(*bp));
1113 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1114 }
1115 
1116 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1117 			       __be32 daddr, __be32 saddr, const struct tcphdr *th)
1118 {
1119 	struct tcp_md5sig_pool *hp;
1120 	struct hash_desc *desc;
1121 
1122 	hp = tcp_get_md5sig_pool();
1123 	if (!hp)
1124 		goto clear_hash_noput;
1125 	desc = &hp->md5_desc;
1126 
1127 	if (crypto_hash_init(desc))
1128 		goto clear_hash;
1129 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1130 		goto clear_hash;
1131 	if (tcp_md5_hash_header(hp, th))
1132 		goto clear_hash;
1133 	if (tcp_md5_hash_key(hp, key))
1134 		goto clear_hash;
1135 	if (crypto_hash_final(desc, md5_hash))
1136 		goto clear_hash;
1137 
1138 	tcp_put_md5sig_pool();
1139 	return 0;
1140 
1141 clear_hash:
1142 	tcp_put_md5sig_pool();
1143 clear_hash_noput:
1144 	memset(md5_hash, 0, 16);
1145 	return 1;
1146 }
1147 
1148 int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1149 			const struct sock *sk, const struct request_sock *req,
1150 			const struct sk_buff *skb)
1151 {
1152 	struct tcp_md5sig_pool *hp;
1153 	struct hash_desc *desc;
1154 	const struct tcphdr *th = tcp_hdr(skb);
1155 	__be32 saddr, daddr;
1156 
1157 	if (sk) {
1158 		saddr = inet_sk(sk)->inet_saddr;
1159 		daddr = inet_sk(sk)->inet_daddr;
1160 	} else if (req) {
1161 		saddr = inet_rsk(req)->loc_addr;
1162 		daddr = inet_rsk(req)->rmt_addr;
1163 	} else {
1164 		const struct iphdr *iph = ip_hdr(skb);
1165 		saddr = iph->saddr;
1166 		daddr = iph->daddr;
1167 	}
1168 
1169 	hp = tcp_get_md5sig_pool();
1170 	if (!hp)
1171 		goto clear_hash_noput;
1172 	desc = &hp->md5_desc;
1173 
1174 	if (crypto_hash_init(desc))
1175 		goto clear_hash;
1176 
1177 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1178 		goto clear_hash;
1179 	if (tcp_md5_hash_header(hp, th))
1180 		goto clear_hash;
1181 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1182 		goto clear_hash;
1183 	if (tcp_md5_hash_key(hp, key))
1184 		goto clear_hash;
1185 	if (crypto_hash_final(desc, md5_hash))
1186 		goto clear_hash;
1187 
1188 	tcp_put_md5sig_pool();
1189 	return 0;
1190 
1191 clear_hash:
1192 	tcp_put_md5sig_pool();
1193 clear_hash_noput:
1194 	memset(md5_hash, 0, 16);
1195 	return 1;
1196 }
1197 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1198 
1199 static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1200 {
1201 	/*
1202 	 * This gets called for each TCP segment that arrives
1203 	 * so we want to be efficient.
1204 	 * We have 3 drop cases:
1205 	 * o No MD5 hash and one expected.
1206 	 * o MD5 hash and we're not expecting one.
1207 	 * o MD5 hash and its wrong.
1208 	 */
1209 	const __u8 *hash_location = NULL;
1210 	struct tcp_md5sig_key *hash_expected;
1211 	const struct iphdr *iph = ip_hdr(skb);
1212 	const struct tcphdr *th = tcp_hdr(skb);
1213 	int genhash;
1214 	unsigned char newhash[16];
1215 
1216 	hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1217 					  AF_INET);
1218 	hash_location = tcp_parse_md5sig_option(th);
1219 
1220 	/* We've parsed the options - do we have a hash? */
1221 	if (!hash_expected && !hash_location)
1222 		return false;
1223 
1224 	if (hash_expected && !hash_location) {
1225 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1226 		return true;
1227 	}
1228 
1229 	if (!hash_expected && hash_location) {
1230 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1231 		return true;
1232 	}
1233 
1234 	/* Okay, so this is hash_expected and hash_location -
1235 	 * so we need to calculate the checksum.
1236 	 */
1237 	genhash = tcp_v4_md5_hash_skb(newhash,
1238 				      hash_expected,
1239 				      NULL, NULL, skb);
1240 
1241 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1242 		net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1243 				     &iph->saddr, ntohs(th->source),
1244 				     &iph->daddr, ntohs(th->dest),
1245 				     genhash ? " tcp_v4_calc_md5_hash failed"
1246 				     : "");
1247 		return true;
1248 	}
1249 	return false;
1250 }
1251 
1252 #endif
1253 
1254 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1255 	.family		=	PF_INET,
1256 	.obj_size	=	sizeof(struct tcp_request_sock),
1257 	.rtx_syn_ack	=	tcp_v4_rtx_synack,
1258 	.send_ack	=	tcp_v4_reqsk_send_ack,
1259 	.destructor	=	tcp_v4_reqsk_destructor,
1260 	.send_reset	=	tcp_v4_send_reset,
1261 	.syn_ack_timeout = 	tcp_syn_ack_timeout,
1262 };
1263 
1264 #ifdef CONFIG_TCP_MD5SIG
1265 static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1266 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1267 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1268 };
1269 #endif
1270 
1271 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1272 {
1273 	struct tcp_extend_values tmp_ext;
1274 	struct tcp_options_received tmp_opt;
1275 	const u8 *hash_location;
1276 	struct request_sock *req;
1277 	struct inet_request_sock *ireq;
1278 	struct tcp_sock *tp = tcp_sk(sk);
1279 	struct dst_entry *dst = NULL;
1280 	__be32 saddr = ip_hdr(skb)->saddr;
1281 	__be32 daddr = ip_hdr(skb)->daddr;
1282 	__u32 isn = TCP_SKB_CB(skb)->when;
1283 	bool want_cookie = false;
1284 
1285 	/* Never answer to SYNs send to broadcast or multicast */
1286 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1287 		goto drop;
1288 
1289 	/* TW buckets are converted to open requests without
1290 	 * limitations, they conserve resources and peer is
1291 	 * evidently real one.
1292 	 */
1293 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1294 		want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1295 		if (!want_cookie)
1296 			goto drop;
1297 	}
1298 
1299 	/* Accept backlog is full. If we have already queued enough
1300 	 * of warm entries in syn queue, drop request. It is better than
1301 	 * clogging syn queue with openreqs with exponentially increasing
1302 	 * timeout.
1303 	 */
1304 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1305 		goto drop;
1306 
1307 	req = inet_reqsk_alloc(&tcp_request_sock_ops);
1308 	if (!req)
1309 		goto drop;
1310 
1311 #ifdef CONFIG_TCP_MD5SIG
1312 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1313 #endif
1314 
1315 	tcp_clear_options(&tmp_opt);
1316 	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1317 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
1318 	tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1319 
1320 	if (tmp_opt.cookie_plus > 0 &&
1321 	    tmp_opt.saw_tstamp &&
1322 	    !tp->rx_opt.cookie_out_never &&
1323 	    (sysctl_tcp_cookie_size > 0 ||
1324 	     (tp->cookie_values != NULL &&
1325 	      tp->cookie_values->cookie_desired > 0))) {
1326 		u8 *c;
1327 		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1328 		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1329 
1330 		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1331 			goto drop_and_release;
1332 
1333 		/* Secret recipe starts with IP addresses */
1334 		*mess++ ^= (__force u32)daddr;
1335 		*mess++ ^= (__force u32)saddr;
1336 
1337 		/* plus variable length Initiator Cookie */
1338 		c = (u8 *)mess;
1339 		while (l-- > 0)
1340 			*c++ ^= *hash_location++;
1341 
1342 		want_cookie = false;	/* not our kind of cookie */
1343 		tmp_ext.cookie_out_never = 0; /* false */
1344 		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1345 	} else if (!tp->rx_opt.cookie_in_always) {
1346 		/* redundant indications, but ensure initialization. */
1347 		tmp_ext.cookie_out_never = 1; /* true */
1348 		tmp_ext.cookie_plus = 0;
1349 	} else {
1350 		goto drop_and_release;
1351 	}
1352 	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1353 
1354 	if (want_cookie && !tmp_opt.saw_tstamp)
1355 		tcp_clear_options(&tmp_opt);
1356 
1357 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1358 	tcp_openreq_init(req, &tmp_opt, skb);
1359 
1360 	ireq = inet_rsk(req);
1361 	ireq->loc_addr = daddr;
1362 	ireq->rmt_addr = saddr;
1363 	ireq->no_srccheck = inet_sk(sk)->transparent;
1364 	ireq->opt = tcp_v4_save_options(sk, skb);
1365 
1366 	if (security_inet_conn_request(sk, skb, req))
1367 		goto drop_and_free;
1368 
1369 	if (!want_cookie || tmp_opt.tstamp_ok)
1370 		TCP_ECN_create_request(req, skb);
1371 
1372 	if (want_cookie) {
1373 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1374 		req->cookie_ts = tmp_opt.tstamp_ok;
1375 	} else if (!isn) {
1376 		struct inet_peer *peer = NULL;
1377 		struct flowi4 fl4;
1378 
1379 		/* VJ's idea. We save last timestamp seen
1380 		 * from the destination in peer table, when entering
1381 		 * state TIME-WAIT, and check against it before
1382 		 * accepting new connection request.
1383 		 *
1384 		 * If "isn" is not zero, this request hit alive
1385 		 * timewait bucket, so that all the necessary checks
1386 		 * are made in the function processing timewait state.
1387 		 */
1388 		if (tmp_opt.saw_tstamp &&
1389 		    tcp_death_row.sysctl_tw_recycle &&
1390 		    (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1391 		    fl4.daddr == saddr &&
1392 		    (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
1393 			inet_peer_refcheck(peer);
1394 			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1395 			    (s32)(peer->tcp_ts - req->ts_recent) >
1396 							TCP_PAWS_WINDOW) {
1397 				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1398 				goto drop_and_release;
1399 			}
1400 		}
1401 		/* Kill the following clause, if you dislike this way. */
1402 		else if (!sysctl_tcp_syncookies &&
1403 			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1404 			  (sysctl_max_syn_backlog >> 2)) &&
1405 			 (!peer || !peer->tcp_ts_stamp) &&
1406 			 (!dst || !dst_metric(dst, RTAX_RTT))) {
1407 			/* Without syncookies last quarter of
1408 			 * backlog is filled with destinations,
1409 			 * proven to be alive.
1410 			 * It means that we continue to communicate
1411 			 * to destinations, already remembered
1412 			 * to the moment of synflood.
1413 			 */
1414 			LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
1415 				       &saddr, ntohs(tcp_hdr(skb)->source));
1416 			goto drop_and_release;
1417 		}
1418 
1419 		isn = tcp_v4_init_sequence(skb);
1420 	}
1421 	tcp_rsk(req)->snt_isn = isn;
1422 	tcp_rsk(req)->snt_synack = tcp_time_stamp;
1423 
1424 	if (tcp_v4_send_synack(sk, dst, req,
1425 			       (struct request_values *)&tmp_ext) ||
1426 	    want_cookie)
1427 		goto drop_and_free;
1428 
1429 	inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1430 	return 0;
1431 
1432 drop_and_release:
1433 	dst_release(dst);
1434 drop_and_free:
1435 	reqsk_free(req);
1436 drop:
1437 	return 0;
1438 }
1439 EXPORT_SYMBOL(tcp_v4_conn_request);
1440 
1441 
1442 /*
1443  * The three way handshake has completed - we got a valid synack -
1444  * now create the new socket.
1445  */
1446 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1447 				  struct request_sock *req,
1448 				  struct dst_entry *dst)
1449 {
1450 	struct inet_request_sock *ireq;
1451 	struct inet_sock *newinet;
1452 	struct tcp_sock *newtp;
1453 	struct sock *newsk;
1454 #ifdef CONFIG_TCP_MD5SIG
1455 	struct tcp_md5sig_key *key;
1456 #endif
1457 	struct ip_options_rcu *inet_opt;
1458 
1459 	if (sk_acceptq_is_full(sk))
1460 		goto exit_overflow;
1461 
1462 	newsk = tcp_create_openreq_child(sk, req, skb);
1463 	if (!newsk)
1464 		goto exit_nonewsk;
1465 
1466 	newsk->sk_gso_type = SKB_GSO_TCPV4;
1467 
1468 	newtp		      = tcp_sk(newsk);
1469 	newinet		      = inet_sk(newsk);
1470 	ireq		      = inet_rsk(req);
1471 	newinet->inet_daddr   = ireq->rmt_addr;
1472 	newinet->inet_rcv_saddr = ireq->loc_addr;
1473 	newinet->inet_saddr	      = ireq->loc_addr;
1474 	inet_opt	      = ireq->opt;
1475 	rcu_assign_pointer(newinet->inet_opt, inet_opt);
1476 	ireq->opt	      = NULL;
1477 	newinet->mc_index     = inet_iif(skb);
1478 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
1479 	newinet->rcv_tos      = ip_hdr(skb)->tos;
1480 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1481 	if (inet_opt)
1482 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1483 	newinet->inet_id = newtp->write_seq ^ jiffies;
1484 
1485 	if (!dst) {
1486 		dst = inet_csk_route_child_sock(sk, newsk, req);
1487 		if (!dst)
1488 			goto put_and_exit;
1489 	} else {
1490 		/* syncookie case : see end of cookie_v4_check() */
1491 	}
1492 	sk_setup_caps(newsk, dst);
1493 
1494 	tcp_mtup_init(newsk);
1495 	tcp_sync_mss(newsk, dst_mtu(dst));
1496 	newtp->advmss = dst_metric_advmss(dst);
1497 	if (tcp_sk(sk)->rx_opt.user_mss &&
1498 	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1499 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1500 
1501 	tcp_initialize_rcv_mss(newsk);
1502 	if (tcp_rsk(req)->snt_synack)
1503 		tcp_valid_rtt_meas(newsk,
1504 		    tcp_time_stamp - tcp_rsk(req)->snt_synack);
1505 	newtp->total_retrans = req->retrans;
1506 
1507 #ifdef CONFIG_TCP_MD5SIG
1508 	/* Copy over the MD5 key from the original socket */
1509 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1510 				AF_INET);
1511 	if (key != NULL) {
1512 		/*
1513 		 * We're using one, so create a matching key
1514 		 * on the newsk structure. If we fail to get
1515 		 * memory, then we end up not copying the key
1516 		 * across. Shucks.
1517 		 */
1518 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1519 			       AF_INET, key->key, key->keylen, GFP_ATOMIC);
1520 		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1521 	}
1522 #endif
1523 
1524 	if (__inet_inherit_port(sk, newsk) < 0)
1525 		goto put_and_exit;
1526 	__inet_hash_nolisten(newsk, NULL);
1527 
1528 	return newsk;
1529 
1530 exit_overflow:
1531 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1532 exit_nonewsk:
1533 	dst_release(dst);
1534 exit:
1535 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1536 	return NULL;
1537 put_and_exit:
1538 	tcp_clear_xmit_timers(newsk);
1539 	tcp_cleanup_congestion_control(newsk);
1540 	bh_unlock_sock(newsk);
1541 	sock_put(newsk);
1542 	goto exit;
1543 }
1544 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1545 
1546 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1547 {
1548 	struct tcphdr *th = tcp_hdr(skb);
1549 	const struct iphdr *iph = ip_hdr(skb);
1550 	struct sock *nsk;
1551 	struct request_sock **prev;
1552 	/* Find possible connection requests. */
1553 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1554 						       iph->saddr, iph->daddr);
1555 	if (req)
1556 		return tcp_check_req(sk, skb, req, prev);
1557 
1558 	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1559 			th->source, iph->daddr, th->dest, inet_iif(skb));
1560 
1561 	if (nsk) {
1562 		if (nsk->sk_state != TCP_TIME_WAIT) {
1563 			bh_lock_sock(nsk);
1564 			return nsk;
1565 		}
1566 		inet_twsk_put(inet_twsk(nsk));
1567 		return NULL;
1568 	}
1569 
1570 #ifdef CONFIG_SYN_COOKIES
1571 	if (!th->syn)
1572 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1573 #endif
1574 	return sk;
1575 }
1576 
1577 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1578 {
1579 	const struct iphdr *iph = ip_hdr(skb);
1580 
1581 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
1582 		if (!tcp_v4_check(skb->len, iph->saddr,
1583 				  iph->daddr, skb->csum)) {
1584 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1585 			return 0;
1586 		}
1587 	}
1588 
1589 	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1590 				       skb->len, IPPROTO_TCP, 0);
1591 
1592 	if (skb->len <= 76) {
1593 		return __skb_checksum_complete(skb);
1594 	}
1595 	return 0;
1596 }
1597 
1598 
1599 /* The socket must have it's spinlock held when we get
1600  * here.
1601  *
1602  * We have a potential double-lock case here, so even when
1603  * doing backlog processing we use the BH locking scheme.
1604  * This is because we cannot sleep with the original spinlock
1605  * held.
1606  */
1607 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1608 {
1609 	struct sock *rsk;
1610 #ifdef CONFIG_TCP_MD5SIG
1611 	/*
1612 	 * We really want to reject the packet as early as possible
1613 	 * if:
1614 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1615 	 *  o There is an MD5 option and we're not expecting one
1616 	 */
1617 	if (tcp_v4_inbound_md5_hash(sk, skb))
1618 		goto discard;
1619 #endif
1620 
1621 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1622 		sock_rps_save_rxhash(sk, skb);
1623 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1624 			rsk = sk;
1625 			goto reset;
1626 		}
1627 		return 0;
1628 	}
1629 
1630 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1631 		goto csum_err;
1632 
1633 	if (sk->sk_state == TCP_LISTEN) {
1634 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1635 		if (!nsk)
1636 			goto discard;
1637 
1638 		if (nsk != sk) {
1639 			sock_rps_save_rxhash(nsk, skb);
1640 			if (tcp_child_process(sk, nsk, skb)) {
1641 				rsk = nsk;
1642 				goto reset;
1643 			}
1644 			return 0;
1645 		}
1646 	} else
1647 		sock_rps_save_rxhash(sk, skb);
1648 
1649 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1650 		rsk = sk;
1651 		goto reset;
1652 	}
1653 	return 0;
1654 
1655 reset:
1656 	tcp_v4_send_reset(rsk, skb);
1657 discard:
1658 	kfree_skb(skb);
1659 	/* Be careful here. If this function gets more complicated and
1660 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
1661 	 * might be destroyed here. This current version compiles correctly,
1662 	 * but you have been warned.
1663 	 */
1664 	return 0;
1665 
1666 csum_err:
1667 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1668 	goto discard;
1669 }
1670 EXPORT_SYMBOL(tcp_v4_do_rcv);
1671 
1672 /*
1673  *	From tcp_input.c
1674  */
1675 
1676 int tcp_v4_rcv(struct sk_buff *skb)
1677 {
1678 	const struct iphdr *iph;
1679 	const struct tcphdr *th;
1680 	struct sock *sk;
1681 	int ret;
1682 	struct net *net = dev_net(skb->dev);
1683 
1684 	if (skb->pkt_type != PACKET_HOST)
1685 		goto discard_it;
1686 
1687 	/* Count it even if it's bad */
1688 	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1689 
1690 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1691 		goto discard_it;
1692 
1693 	th = tcp_hdr(skb);
1694 
1695 	if (th->doff < sizeof(struct tcphdr) / 4)
1696 		goto bad_packet;
1697 	if (!pskb_may_pull(skb, th->doff * 4))
1698 		goto discard_it;
1699 
1700 	/* An explanation is required here, I think.
1701 	 * Packet length and doff are validated by header prediction,
1702 	 * provided case of th->doff==0 is eliminated.
1703 	 * So, we defer the checks. */
1704 	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1705 		goto bad_packet;
1706 
1707 	th = tcp_hdr(skb);
1708 	iph = ip_hdr(skb);
1709 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1710 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1711 				    skb->len - th->doff * 4);
1712 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1713 	TCP_SKB_CB(skb)->when	 = 0;
1714 	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1715 	TCP_SKB_CB(skb)->sacked	 = 0;
1716 
1717 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1718 	if (!sk)
1719 		goto no_tcp_socket;
1720 
1721 process:
1722 	if (sk->sk_state == TCP_TIME_WAIT)
1723 		goto do_time_wait;
1724 
1725 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1726 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1727 		goto discard_and_relse;
1728 	}
1729 
1730 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1731 		goto discard_and_relse;
1732 	nf_reset(skb);
1733 
1734 	if (sk_filter(sk, skb))
1735 		goto discard_and_relse;
1736 
1737 	skb->dev = NULL;
1738 
1739 	bh_lock_sock_nested(sk);
1740 	ret = 0;
1741 	if (!sock_owned_by_user(sk)) {
1742 #ifdef CONFIG_NET_DMA
1743 		struct tcp_sock *tp = tcp_sk(sk);
1744 		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1745 			tp->ucopy.dma_chan = net_dma_find_channel();
1746 		if (tp->ucopy.dma_chan)
1747 			ret = tcp_v4_do_rcv(sk, skb);
1748 		else
1749 #endif
1750 		{
1751 			if (!tcp_prequeue(sk, skb))
1752 				ret = tcp_v4_do_rcv(sk, skb);
1753 		}
1754 	} else if (unlikely(sk_add_backlog(sk, skb,
1755 					   sk->sk_rcvbuf + sk->sk_sndbuf))) {
1756 		bh_unlock_sock(sk);
1757 		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1758 		goto discard_and_relse;
1759 	}
1760 	bh_unlock_sock(sk);
1761 
1762 	sock_put(sk);
1763 
1764 	return ret;
1765 
1766 no_tcp_socket:
1767 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1768 		goto discard_it;
1769 
1770 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1771 bad_packet:
1772 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1773 	} else {
1774 		tcp_v4_send_reset(NULL, skb);
1775 	}
1776 
1777 discard_it:
1778 	/* Discard frame. */
1779 	kfree_skb(skb);
1780 	return 0;
1781 
1782 discard_and_relse:
1783 	sock_put(sk);
1784 	goto discard_it;
1785 
1786 do_time_wait:
1787 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1788 		inet_twsk_put(inet_twsk(sk));
1789 		goto discard_it;
1790 	}
1791 
1792 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1793 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1794 		inet_twsk_put(inet_twsk(sk));
1795 		goto discard_it;
1796 	}
1797 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1798 	case TCP_TW_SYN: {
1799 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1800 							&tcp_hashinfo,
1801 							iph->daddr, th->dest,
1802 							inet_iif(skb));
1803 		if (sk2) {
1804 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1805 			inet_twsk_put(inet_twsk(sk));
1806 			sk = sk2;
1807 			goto process;
1808 		}
1809 		/* Fall through to ACK */
1810 	}
1811 	case TCP_TW_ACK:
1812 		tcp_v4_timewait_ack(sk, skb);
1813 		break;
1814 	case TCP_TW_RST:
1815 		goto no_tcp_socket;
1816 	case TCP_TW_SUCCESS:;
1817 	}
1818 	goto discard_it;
1819 }
1820 
1821 struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1822 {
1823 	struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
1824 	struct inet_sock *inet = inet_sk(sk);
1825 	struct inet_peer *peer;
1826 
1827 	if (!rt ||
1828 	    inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
1829 		peer = inet_getpeer_v4(inet->inet_daddr, 1);
1830 		*release_it = true;
1831 	} else {
1832 		if (!rt->peer)
1833 			rt_bind_peer(rt, inet->inet_daddr, 1);
1834 		peer = rt->peer;
1835 		*release_it = false;
1836 	}
1837 
1838 	return peer;
1839 }
1840 EXPORT_SYMBOL(tcp_v4_get_peer);
1841 
1842 void *tcp_v4_tw_get_peer(struct sock *sk)
1843 {
1844 	const struct inet_timewait_sock *tw = inet_twsk(sk);
1845 
1846 	return inet_getpeer_v4(tw->tw_daddr, 1);
1847 }
1848 EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1849 
1850 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1851 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1852 	.twsk_unique	= tcp_twsk_unique,
1853 	.twsk_destructor= tcp_twsk_destructor,
1854 	.twsk_getpeer	= tcp_v4_tw_get_peer,
1855 };
1856 
1857 const struct inet_connection_sock_af_ops ipv4_specific = {
1858 	.queue_xmit	   = ip_queue_xmit,
1859 	.send_check	   = tcp_v4_send_check,
1860 	.rebuild_header	   = inet_sk_rebuild_header,
1861 	.conn_request	   = tcp_v4_conn_request,
1862 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
1863 	.get_peer	   = tcp_v4_get_peer,
1864 	.net_header_len	   = sizeof(struct iphdr),
1865 	.setsockopt	   = ip_setsockopt,
1866 	.getsockopt	   = ip_getsockopt,
1867 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1868 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1869 	.bind_conflict	   = inet_csk_bind_conflict,
1870 #ifdef CONFIG_COMPAT
1871 	.compat_setsockopt = compat_ip_setsockopt,
1872 	.compat_getsockopt = compat_ip_getsockopt,
1873 #endif
1874 };
1875 EXPORT_SYMBOL(ipv4_specific);
1876 
1877 #ifdef CONFIG_TCP_MD5SIG
1878 static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1879 	.md5_lookup		= tcp_v4_md5_lookup,
1880 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
1881 	.md5_parse		= tcp_v4_parse_md5_keys,
1882 };
1883 #endif
1884 
1885 /* NOTE: A lot of things set to zero explicitly by call to
1886  *       sk_alloc() so need not be done here.
1887  */
1888 static int tcp_v4_init_sock(struct sock *sk)
1889 {
1890 	struct inet_connection_sock *icsk = inet_csk(sk);
1891 
1892 	tcp_init_sock(sk);
1893 
1894 	icsk->icsk_af_ops = &ipv4_specific;
1895 
1896 #ifdef CONFIG_TCP_MD5SIG
1897 	tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1898 #endif
1899 
1900 	return 0;
1901 }
1902 
1903 void tcp_v4_destroy_sock(struct sock *sk)
1904 {
1905 	struct tcp_sock *tp = tcp_sk(sk);
1906 
1907 	tcp_clear_xmit_timers(sk);
1908 
1909 	tcp_cleanup_congestion_control(sk);
1910 
1911 	/* Cleanup up the write buffer. */
1912 	tcp_write_queue_purge(sk);
1913 
1914 	/* Cleans up our, hopefully empty, out_of_order_queue. */
1915 	__skb_queue_purge(&tp->out_of_order_queue);
1916 
1917 #ifdef CONFIG_TCP_MD5SIG
1918 	/* Clean up the MD5 key list, if any */
1919 	if (tp->md5sig_info) {
1920 		tcp_clear_md5_list(sk);
1921 		kfree_rcu(tp->md5sig_info, rcu);
1922 		tp->md5sig_info = NULL;
1923 	}
1924 #endif
1925 
1926 #ifdef CONFIG_NET_DMA
1927 	/* Cleans up our sk_async_wait_queue */
1928 	__skb_queue_purge(&sk->sk_async_wait_queue);
1929 #endif
1930 
1931 	/* Clean prequeue, it must be empty really */
1932 	__skb_queue_purge(&tp->ucopy.prequeue);
1933 
1934 	/* Clean up a referenced TCP bind bucket. */
1935 	if (inet_csk(sk)->icsk_bind_hash)
1936 		inet_put_port(sk);
1937 
1938 	/*
1939 	 * If sendmsg cached page exists, toss it.
1940 	 */
1941 	if (sk->sk_sndmsg_page) {
1942 		__free_page(sk->sk_sndmsg_page);
1943 		sk->sk_sndmsg_page = NULL;
1944 	}
1945 
1946 	/* TCP Cookie Transactions */
1947 	if (tp->cookie_values != NULL) {
1948 		kref_put(&tp->cookie_values->kref,
1949 			 tcp_cookie_values_release);
1950 		tp->cookie_values = NULL;
1951 	}
1952 
1953 	sk_sockets_allocated_dec(sk);
1954 	sock_release_memcg(sk);
1955 }
1956 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1957 
1958 #ifdef CONFIG_PROC_FS
1959 /* Proc filesystem TCP sock list dumping. */
1960 
1961 static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1962 {
1963 	return hlist_nulls_empty(head) ? NULL :
1964 		list_entry(head->first, struct inet_timewait_sock, tw_node);
1965 }
1966 
1967 static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1968 {
1969 	return !is_a_nulls(tw->tw_node.next) ?
1970 		hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1971 }
1972 
1973 /*
1974  * Get next listener socket follow cur.  If cur is NULL, get first socket
1975  * starting from bucket given in st->bucket; when st->bucket is zero the
1976  * very first socket in the hash table is returned.
1977  */
1978 static void *listening_get_next(struct seq_file *seq, void *cur)
1979 {
1980 	struct inet_connection_sock *icsk;
1981 	struct hlist_nulls_node *node;
1982 	struct sock *sk = cur;
1983 	struct inet_listen_hashbucket *ilb;
1984 	struct tcp_iter_state *st = seq->private;
1985 	struct net *net = seq_file_net(seq);
1986 
1987 	if (!sk) {
1988 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
1989 		spin_lock_bh(&ilb->lock);
1990 		sk = sk_nulls_head(&ilb->head);
1991 		st->offset = 0;
1992 		goto get_sk;
1993 	}
1994 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
1995 	++st->num;
1996 	++st->offset;
1997 
1998 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
1999 		struct request_sock *req = cur;
2000 
2001 		icsk = inet_csk(st->syn_wait_sk);
2002 		req = req->dl_next;
2003 		while (1) {
2004 			while (req) {
2005 				if (req->rsk_ops->family == st->family) {
2006 					cur = req;
2007 					goto out;
2008 				}
2009 				req = req->dl_next;
2010 			}
2011 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2012 				break;
2013 get_req:
2014 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2015 		}
2016 		sk	  = sk_nulls_next(st->syn_wait_sk);
2017 		st->state = TCP_SEQ_STATE_LISTENING;
2018 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2019 	} else {
2020 		icsk = inet_csk(sk);
2021 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2022 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
2023 			goto start_req;
2024 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2025 		sk = sk_nulls_next(sk);
2026 	}
2027 get_sk:
2028 	sk_nulls_for_each_from(sk, node) {
2029 		if (!net_eq(sock_net(sk), net))
2030 			continue;
2031 		if (sk->sk_family == st->family) {
2032 			cur = sk;
2033 			goto out;
2034 		}
2035 		icsk = inet_csk(sk);
2036 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2037 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2038 start_req:
2039 			st->uid		= sock_i_uid(sk);
2040 			st->syn_wait_sk = sk;
2041 			st->state	= TCP_SEQ_STATE_OPENREQ;
2042 			st->sbucket	= 0;
2043 			goto get_req;
2044 		}
2045 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2046 	}
2047 	spin_unlock_bh(&ilb->lock);
2048 	st->offset = 0;
2049 	if (++st->bucket < INET_LHTABLE_SIZE) {
2050 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
2051 		spin_lock_bh(&ilb->lock);
2052 		sk = sk_nulls_head(&ilb->head);
2053 		goto get_sk;
2054 	}
2055 	cur = NULL;
2056 out:
2057 	return cur;
2058 }
2059 
2060 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2061 {
2062 	struct tcp_iter_state *st = seq->private;
2063 	void *rc;
2064 
2065 	st->bucket = 0;
2066 	st->offset = 0;
2067 	rc = listening_get_next(seq, NULL);
2068 
2069 	while (rc && *pos) {
2070 		rc = listening_get_next(seq, rc);
2071 		--*pos;
2072 	}
2073 	return rc;
2074 }
2075 
2076 static inline bool empty_bucket(struct tcp_iter_state *st)
2077 {
2078 	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2079 		hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2080 }
2081 
2082 /*
2083  * Get first established socket starting from bucket given in st->bucket.
2084  * If st->bucket is zero, the very first socket in the hash is returned.
2085  */
2086 static void *established_get_first(struct seq_file *seq)
2087 {
2088 	struct tcp_iter_state *st = seq->private;
2089 	struct net *net = seq_file_net(seq);
2090 	void *rc = NULL;
2091 
2092 	st->offset = 0;
2093 	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2094 		struct sock *sk;
2095 		struct hlist_nulls_node *node;
2096 		struct inet_timewait_sock *tw;
2097 		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2098 
2099 		/* Lockless fast path for the common case of empty buckets */
2100 		if (empty_bucket(st))
2101 			continue;
2102 
2103 		spin_lock_bh(lock);
2104 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2105 			if (sk->sk_family != st->family ||
2106 			    !net_eq(sock_net(sk), net)) {
2107 				continue;
2108 			}
2109 			rc = sk;
2110 			goto out;
2111 		}
2112 		st->state = TCP_SEQ_STATE_TIME_WAIT;
2113 		inet_twsk_for_each(tw, node,
2114 				   &tcp_hashinfo.ehash[st->bucket].twchain) {
2115 			if (tw->tw_family != st->family ||
2116 			    !net_eq(twsk_net(tw), net)) {
2117 				continue;
2118 			}
2119 			rc = tw;
2120 			goto out;
2121 		}
2122 		spin_unlock_bh(lock);
2123 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2124 	}
2125 out:
2126 	return rc;
2127 }
2128 
2129 static void *established_get_next(struct seq_file *seq, void *cur)
2130 {
2131 	struct sock *sk = cur;
2132 	struct inet_timewait_sock *tw;
2133 	struct hlist_nulls_node *node;
2134 	struct tcp_iter_state *st = seq->private;
2135 	struct net *net = seq_file_net(seq);
2136 
2137 	++st->num;
2138 	++st->offset;
2139 
2140 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2141 		tw = cur;
2142 		tw = tw_next(tw);
2143 get_tw:
2144 		while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2145 			tw = tw_next(tw);
2146 		}
2147 		if (tw) {
2148 			cur = tw;
2149 			goto out;
2150 		}
2151 		spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2152 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2153 
2154 		/* Look for next non empty bucket */
2155 		st->offset = 0;
2156 		while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2157 				empty_bucket(st))
2158 			;
2159 		if (st->bucket > tcp_hashinfo.ehash_mask)
2160 			return NULL;
2161 
2162 		spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2163 		sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2164 	} else
2165 		sk = sk_nulls_next(sk);
2166 
2167 	sk_nulls_for_each_from(sk, node) {
2168 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2169 			goto found;
2170 	}
2171 
2172 	st->state = TCP_SEQ_STATE_TIME_WAIT;
2173 	tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2174 	goto get_tw;
2175 found:
2176 	cur = sk;
2177 out:
2178 	return cur;
2179 }
2180 
2181 static void *established_get_idx(struct seq_file *seq, loff_t pos)
2182 {
2183 	struct tcp_iter_state *st = seq->private;
2184 	void *rc;
2185 
2186 	st->bucket = 0;
2187 	rc = established_get_first(seq);
2188 
2189 	while (rc && pos) {
2190 		rc = established_get_next(seq, rc);
2191 		--pos;
2192 	}
2193 	return rc;
2194 }
2195 
2196 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2197 {
2198 	void *rc;
2199 	struct tcp_iter_state *st = seq->private;
2200 
2201 	st->state = TCP_SEQ_STATE_LISTENING;
2202 	rc	  = listening_get_idx(seq, &pos);
2203 
2204 	if (!rc) {
2205 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2206 		rc	  = established_get_idx(seq, pos);
2207 	}
2208 
2209 	return rc;
2210 }
2211 
2212 static void *tcp_seek_last_pos(struct seq_file *seq)
2213 {
2214 	struct tcp_iter_state *st = seq->private;
2215 	int offset = st->offset;
2216 	int orig_num = st->num;
2217 	void *rc = NULL;
2218 
2219 	switch (st->state) {
2220 	case TCP_SEQ_STATE_OPENREQ:
2221 	case TCP_SEQ_STATE_LISTENING:
2222 		if (st->bucket >= INET_LHTABLE_SIZE)
2223 			break;
2224 		st->state = TCP_SEQ_STATE_LISTENING;
2225 		rc = listening_get_next(seq, NULL);
2226 		while (offset-- && rc)
2227 			rc = listening_get_next(seq, rc);
2228 		if (rc)
2229 			break;
2230 		st->bucket = 0;
2231 		/* Fallthrough */
2232 	case TCP_SEQ_STATE_ESTABLISHED:
2233 	case TCP_SEQ_STATE_TIME_WAIT:
2234 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2235 		if (st->bucket > tcp_hashinfo.ehash_mask)
2236 			break;
2237 		rc = established_get_first(seq);
2238 		while (offset-- && rc)
2239 			rc = established_get_next(seq, rc);
2240 	}
2241 
2242 	st->num = orig_num;
2243 
2244 	return rc;
2245 }
2246 
2247 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2248 {
2249 	struct tcp_iter_state *st = seq->private;
2250 	void *rc;
2251 
2252 	if (*pos && *pos == st->last_pos) {
2253 		rc = tcp_seek_last_pos(seq);
2254 		if (rc)
2255 			goto out;
2256 	}
2257 
2258 	st->state = TCP_SEQ_STATE_LISTENING;
2259 	st->num = 0;
2260 	st->bucket = 0;
2261 	st->offset = 0;
2262 	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2263 
2264 out:
2265 	st->last_pos = *pos;
2266 	return rc;
2267 }
2268 
2269 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2270 {
2271 	struct tcp_iter_state *st = seq->private;
2272 	void *rc = NULL;
2273 
2274 	if (v == SEQ_START_TOKEN) {
2275 		rc = tcp_get_idx(seq, 0);
2276 		goto out;
2277 	}
2278 
2279 	switch (st->state) {
2280 	case TCP_SEQ_STATE_OPENREQ:
2281 	case TCP_SEQ_STATE_LISTENING:
2282 		rc = listening_get_next(seq, v);
2283 		if (!rc) {
2284 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2285 			st->bucket = 0;
2286 			st->offset = 0;
2287 			rc	  = established_get_first(seq);
2288 		}
2289 		break;
2290 	case TCP_SEQ_STATE_ESTABLISHED:
2291 	case TCP_SEQ_STATE_TIME_WAIT:
2292 		rc = established_get_next(seq, v);
2293 		break;
2294 	}
2295 out:
2296 	++*pos;
2297 	st->last_pos = *pos;
2298 	return rc;
2299 }
2300 
2301 static void tcp_seq_stop(struct seq_file *seq, void *v)
2302 {
2303 	struct tcp_iter_state *st = seq->private;
2304 
2305 	switch (st->state) {
2306 	case TCP_SEQ_STATE_OPENREQ:
2307 		if (v) {
2308 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2309 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2310 		}
2311 	case TCP_SEQ_STATE_LISTENING:
2312 		if (v != SEQ_START_TOKEN)
2313 			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2314 		break;
2315 	case TCP_SEQ_STATE_TIME_WAIT:
2316 	case TCP_SEQ_STATE_ESTABLISHED:
2317 		if (v)
2318 			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2319 		break;
2320 	}
2321 }
2322 
2323 int tcp_seq_open(struct inode *inode, struct file *file)
2324 {
2325 	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2326 	struct tcp_iter_state *s;
2327 	int err;
2328 
2329 	err = seq_open_net(inode, file, &afinfo->seq_ops,
2330 			  sizeof(struct tcp_iter_state));
2331 	if (err < 0)
2332 		return err;
2333 
2334 	s = ((struct seq_file *)file->private_data)->private;
2335 	s->family		= afinfo->family;
2336 	s->last_pos 		= 0;
2337 	return 0;
2338 }
2339 EXPORT_SYMBOL(tcp_seq_open);
2340 
2341 int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2342 {
2343 	int rc = 0;
2344 	struct proc_dir_entry *p;
2345 
2346 	afinfo->seq_ops.start		= tcp_seq_start;
2347 	afinfo->seq_ops.next		= tcp_seq_next;
2348 	afinfo->seq_ops.stop		= tcp_seq_stop;
2349 
2350 	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2351 			     afinfo->seq_fops, afinfo);
2352 	if (!p)
2353 		rc = -ENOMEM;
2354 	return rc;
2355 }
2356 EXPORT_SYMBOL(tcp_proc_register);
2357 
2358 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2359 {
2360 	proc_net_remove(net, afinfo->name);
2361 }
2362 EXPORT_SYMBOL(tcp_proc_unregister);
2363 
2364 static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2365 			 struct seq_file *f, int i, int uid, int *len)
2366 {
2367 	const struct inet_request_sock *ireq = inet_rsk(req);
2368 	int ttd = req->expires - jiffies;
2369 
2370 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2371 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
2372 		i,
2373 		ireq->loc_addr,
2374 		ntohs(inet_sk(sk)->inet_sport),
2375 		ireq->rmt_addr,
2376 		ntohs(ireq->rmt_port),
2377 		TCP_SYN_RECV,
2378 		0, 0, /* could print option size, but that is af dependent. */
2379 		1,    /* timers active (only the expire timer) */
2380 		jiffies_to_clock_t(ttd),
2381 		req->retrans,
2382 		uid,
2383 		0,  /* non standard timer */
2384 		0, /* open_requests have no inode */
2385 		atomic_read(&sk->sk_refcnt),
2386 		req,
2387 		len);
2388 }
2389 
2390 static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2391 {
2392 	int timer_active;
2393 	unsigned long timer_expires;
2394 	const struct tcp_sock *tp = tcp_sk(sk);
2395 	const struct inet_connection_sock *icsk = inet_csk(sk);
2396 	const struct inet_sock *inet = inet_sk(sk);
2397 	__be32 dest = inet->inet_daddr;
2398 	__be32 src = inet->inet_rcv_saddr;
2399 	__u16 destp = ntohs(inet->inet_dport);
2400 	__u16 srcp = ntohs(inet->inet_sport);
2401 	int rx_queue;
2402 
2403 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2404 		timer_active	= 1;
2405 		timer_expires	= icsk->icsk_timeout;
2406 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2407 		timer_active	= 4;
2408 		timer_expires	= icsk->icsk_timeout;
2409 	} else if (timer_pending(&sk->sk_timer)) {
2410 		timer_active	= 2;
2411 		timer_expires	= sk->sk_timer.expires;
2412 	} else {
2413 		timer_active	= 0;
2414 		timer_expires = jiffies;
2415 	}
2416 
2417 	if (sk->sk_state == TCP_LISTEN)
2418 		rx_queue = sk->sk_ack_backlog;
2419 	else
2420 		/*
2421 		 * because we dont lock socket, we might find a transient negative value
2422 		 */
2423 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2424 
2425 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2426 			"%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
2427 		i, src, srcp, dest, destp, sk->sk_state,
2428 		tp->write_seq - tp->snd_una,
2429 		rx_queue,
2430 		timer_active,
2431 		jiffies_to_clock_t(timer_expires - jiffies),
2432 		icsk->icsk_retransmits,
2433 		sock_i_uid(sk),
2434 		icsk->icsk_probes_out,
2435 		sock_i_ino(sk),
2436 		atomic_read(&sk->sk_refcnt), sk,
2437 		jiffies_to_clock_t(icsk->icsk_rto),
2438 		jiffies_to_clock_t(icsk->icsk_ack.ato),
2439 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2440 		tp->snd_cwnd,
2441 		tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2442 		len);
2443 }
2444 
2445 static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2446 			       struct seq_file *f, int i, int *len)
2447 {
2448 	__be32 dest, src;
2449 	__u16 destp, srcp;
2450 	int ttd = tw->tw_ttd - jiffies;
2451 
2452 	if (ttd < 0)
2453 		ttd = 0;
2454 
2455 	dest  = tw->tw_daddr;
2456 	src   = tw->tw_rcv_saddr;
2457 	destp = ntohs(tw->tw_dport);
2458 	srcp  = ntohs(tw->tw_sport);
2459 
2460 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2461 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
2462 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2463 		3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2464 		atomic_read(&tw->tw_refcnt), tw, len);
2465 }
2466 
2467 #define TMPSZ 150
2468 
2469 static int tcp4_seq_show(struct seq_file *seq, void *v)
2470 {
2471 	struct tcp_iter_state *st;
2472 	int len;
2473 
2474 	if (v == SEQ_START_TOKEN) {
2475 		seq_printf(seq, "%-*s\n", TMPSZ - 1,
2476 			   "  sl  local_address rem_address   st tx_queue "
2477 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
2478 			   "inode");
2479 		goto out;
2480 	}
2481 	st = seq->private;
2482 
2483 	switch (st->state) {
2484 	case TCP_SEQ_STATE_LISTENING:
2485 	case TCP_SEQ_STATE_ESTABLISHED:
2486 		get_tcp4_sock(v, seq, st->num, &len);
2487 		break;
2488 	case TCP_SEQ_STATE_OPENREQ:
2489 		get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2490 		break;
2491 	case TCP_SEQ_STATE_TIME_WAIT:
2492 		get_timewait4_sock(v, seq, st->num, &len);
2493 		break;
2494 	}
2495 	seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2496 out:
2497 	return 0;
2498 }
2499 
2500 static const struct file_operations tcp_afinfo_seq_fops = {
2501 	.owner   = THIS_MODULE,
2502 	.open    = tcp_seq_open,
2503 	.read    = seq_read,
2504 	.llseek  = seq_lseek,
2505 	.release = seq_release_net
2506 };
2507 
2508 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2509 	.name		= "tcp",
2510 	.family		= AF_INET,
2511 	.seq_fops	= &tcp_afinfo_seq_fops,
2512 	.seq_ops	= {
2513 		.show		= tcp4_seq_show,
2514 	},
2515 };
2516 
2517 static int __net_init tcp4_proc_init_net(struct net *net)
2518 {
2519 	return tcp_proc_register(net, &tcp4_seq_afinfo);
2520 }
2521 
2522 static void __net_exit tcp4_proc_exit_net(struct net *net)
2523 {
2524 	tcp_proc_unregister(net, &tcp4_seq_afinfo);
2525 }
2526 
2527 static struct pernet_operations tcp4_net_ops = {
2528 	.init = tcp4_proc_init_net,
2529 	.exit = tcp4_proc_exit_net,
2530 };
2531 
2532 int __init tcp4_proc_init(void)
2533 {
2534 	return register_pernet_subsys(&tcp4_net_ops);
2535 }
2536 
2537 void tcp4_proc_exit(void)
2538 {
2539 	unregister_pernet_subsys(&tcp4_net_ops);
2540 }
2541 #endif /* CONFIG_PROC_FS */
2542 
2543 struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2544 {
2545 	const struct iphdr *iph = skb_gro_network_header(skb);
2546 
2547 	switch (skb->ip_summed) {
2548 	case CHECKSUM_COMPLETE:
2549 		if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2550 				  skb->csum)) {
2551 			skb->ip_summed = CHECKSUM_UNNECESSARY;
2552 			break;
2553 		}
2554 
2555 		/* fall through */
2556 	case CHECKSUM_NONE:
2557 		NAPI_GRO_CB(skb)->flush = 1;
2558 		return NULL;
2559 	}
2560 
2561 	return tcp_gro_receive(head, skb);
2562 }
2563 
2564 int tcp4_gro_complete(struct sk_buff *skb)
2565 {
2566 	const struct iphdr *iph = ip_hdr(skb);
2567 	struct tcphdr *th = tcp_hdr(skb);
2568 
2569 	th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2570 				  iph->saddr, iph->daddr, 0);
2571 	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2572 
2573 	return tcp_gro_complete(skb);
2574 }
2575 
2576 struct proto tcp_prot = {
2577 	.name			= "TCP",
2578 	.owner			= THIS_MODULE,
2579 	.close			= tcp_close,
2580 	.connect		= tcp_v4_connect,
2581 	.disconnect		= tcp_disconnect,
2582 	.accept			= inet_csk_accept,
2583 	.ioctl			= tcp_ioctl,
2584 	.init			= tcp_v4_init_sock,
2585 	.destroy		= tcp_v4_destroy_sock,
2586 	.shutdown		= tcp_shutdown,
2587 	.setsockopt		= tcp_setsockopt,
2588 	.getsockopt		= tcp_getsockopt,
2589 	.recvmsg		= tcp_recvmsg,
2590 	.sendmsg		= tcp_sendmsg,
2591 	.sendpage		= tcp_sendpage,
2592 	.backlog_rcv		= tcp_v4_do_rcv,
2593 	.hash			= inet_hash,
2594 	.unhash			= inet_unhash,
2595 	.get_port		= inet_csk_get_port,
2596 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2597 	.sockets_allocated	= &tcp_sockets_allocated,
2598 	.orphan_count		= &tcp_orphan_count,
2599 	.memory_allocated	= &tcp_memory_allocated,
2600 	.memory_pressure	= &tcp_memory_pressure,
2601 	.sysctl_wmem		= sysctl_tcp_wmem,
2602 	.sysctl_rmem		= sysctl_tcp_rmem,
2603 	.max_header		= MAX_TCP_HEADER,
2604 	.obj_size		= sizeof(struct tcp_sock),
2605 	.slab_flags		= SLAB_DESTROY_BY_RCU,
2606 	.twsk_prot		= &tcp_timewait_sock_ops,
2607 	.rsk_prot		= &tcp_request_sock_ops,
2608 	.h.hashinfo		= &tcp_hashinfo,
2609 	.no_autobind		= true,
2610 #ifdef CONFIG_COMPAT
2611 	.compat_setsockopt	= compat_tcp_setsockopt,
2612 	.compat_getsockopt	= compat_tcp_getsockopt,
2613 #endif
2614 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
2615 	.init_cgroup		= tcp_init_cgroup,
2616 	.destroy_cgroup		= tcp_destroy_cgroup,
2617 	.proto_cgroup		= tcp_proto_cgroup,
2618 #endif
2619 };
2620 EXPORT_SYMBOL(tcp_prot);
2621 
2622 static int __net_init tcp_sk_init(struct net *net)
2623 {
2624 	return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2625 				    PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2626 }
2627 
2628 static void __net_exit tcp_sk_exit(struct net *net)
2629 {
2630 	inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2631 }
2632 
2633 static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2634 {
2635 	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2636 }
2637 
2638 static struct pernet_operations __net_initdata tcp_sk_ops = {
2639        .init	   = tcp_sk_init,
2640        .exit	   = tcp_sk_exit,
2641        .exit_batch = tcp_sk_exit_batch,
2642 };
2643 
2644 void __init tcp_v4_init(void)
2645 {
2646 	inet_hashinfo_init(&tcp_hashinfo);
2647 	if (register_pernet_subsys(&tcp_sk_ops))
2648 		panic("Failed to create the TCP control socket.\n");
2649 }
2650