xref: /linux/net/ipv4/tcp_ipv4.c (revision e0683e707c12a431919e1be814e15a4360523533)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
31da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
41da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *		Implementation of the Transmission Control Protocol(TCP).
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  *		IPv4 specific functions
91da177e4SLinus Torvalds  *
101da177e4SLinus Torvalds  *
111da177e4SLinus Torvalds  *		code split from:
121da177e4SLinus Torvalds  *		linux/ipv4/tcp.c
131da177e4SLinus Torvalds  *		linux/ipv4/tcp_input.c
141da177e4SLinus Torvalds  *		linux/ipv4/tcp_output.c
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  *		See tcp.c for author information
171da177e4SLinus Torvalds  *
181da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
191da177e4SLinus Torvalds  *      modify it under the terms of the GNU General Public License
201da177e4SLinus Torvalds  *      as published by the Free Software Foundation; either version
211da177e4SLinus Torvalds  *      2 of the License, or (at your option) any later version.
221da177e4SLinus Torvalds  */
231da177e4SLinus Torvalds 
241da177e4SLinus Torvalds /*
251da177e4SLinus Torvalds  * Changes:
261da177e4SLinus Torvalds  *		David S. Miller	:	New socket lookup architecture.
271da177e4SLinus Torvalds  *					This code is dedicated to John Dyson.
281da177e4SLinus Torvalds  *		David S. Miller :	Change semantics of established hash,
291da177e4SLinus Torvalds  *					half is devoted to TIME_WAIT sockets
301da177e4SLinus Torvalds  *					and the rest go in the other half.
311da177e4SLinus Torvalds  *		Andi Kleen :		Add support for syncookies and fixed
321da177e4SLinus Torvalds  *					some bugs: ip options weren't passed to
331da177e4SLinus Torvalds  *					the TCP layer, missed a check for an
341da177e4SLinus Torvalds  *					ACK bit.
351da177e4SLinus Torvalds  *		Andi Kleen :		Implemented fast path mtu discovery.
361da177e4SLinus Torvalds  *	     				Fixed many serious bugs in the
3760236fddSArnaldo Carvalho de Melo  *					request_sock handling and moved
381da177e4SLinus Torvalds  *					most of it into the af independent code.
391da177e4SLinus Torvalds  *					Added tail drop and some other bugfixes.
40caa20d9aSStephen Hemminger  *					Added new listen semantics.
411da177e4SLinus Torvalds  *		Mike McLagan	:	Routing by source
421da177e4SLinus Torvalds  *	Juan Jose Ciarlante:		ip_dynaddr bits
431da177e4SLinus Torvalds  *		Andi Kleen:		various fixes.
441da177e4SLinus Torvalds  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
451da177e4SLinus Torvalds  *					coma.
461da177e4SLinus Torvalds  *	Andi Kleen		:	Fix new listen.
471da177e4SLinus Torvalds  *	Andi Kleen		:	Fix accept error reporting.
481da177e4SLinus Torvalds  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
491da177e4SLinus Torvalds  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
501da177e4SLinus Torvalds  *					a single port at the same time.
511da177e4SLinus Torvalds  */
521da177e4SLinus Torvalds 
53afd46503SJoe Perches #define pr_fmt(fmt) "TCP: " fmt
541da177e4SLinus Torvalds 
55eb4dea58SHerbert Xu #include <linux/bottom_half.h>
561da177e4SLinus Torvalds #include <linux/types.h>
571da177e4SLinus Torvalds #include <linux/fcntl.h>
581da177e4SLinus Torvalds #include <linux/module.h>
591da177e4SLinus Torvalds #include <linux/random.h>
601da177e4SLinus Torvalds #include <linux/cache.h>
611da177e4SLinus Torvalds #include <linux/jhash.h>
621da177e4SLinus Torvalds #include <linux/init.h>
631da177e4SLinus Torvalds #include <linux/times.h>
645a0e3ad6STejun Heo #include <linux/slab.h>
651da177e4SLinus Torvalds 
66457c4cbcSEric W. Biederman #include <net/net_namespace.h>
671da177e4SLinus Torvalds #include <net/icmp.h>
68304a1618SArnaldo Carvalho de Melo #include <net/inet_hashtables.h>
691da177e4SLinus Torvalds #include <net/tcp.h>
7020380731SArnaldo Carvalho de Melo #include <net/transp_v6.h>
711da177e4SLinus Torvalds #include <net/ipv6.h>
721da177e4SLinus Torvalds #include <net/inet_common.h>
736d6ee43eSArnaldo Carvalho de Melo #include <net/timewait_sock.h>
741da177e4SLinus Torvalds #include <net/xfrm.h>
751a2449a8SChris Leech #include <net/netdma.h>
766e5714eaSDavid S. Miller #include <net/secure_seq.h>
77d1a4c0b3SGlauber Costa #include <net/tcp_memcontrol.h>
781da177e4SLinus Torvalds 
791da177e4SLinus Torvalds #include <linux/inet.h>
801da177e4SLinus Torvalds #include <linux/ipv6.h>
811da177e4SLinus Torvalds #include <linux/stddef.h>
821da177e4SLinus Torvalds #include <linux/proc_fs.h>
831da177e4SLinus Torvalds #include <linux/seq_file.h>
841da177e4SLinus Torvalds 
85cfb6eeb4SYOSHIFUJI Hideaki #include <linux/crypto.h>
86cfb6eeb4SYOSHIFUJI Hideaki #include <linux/scatterlist.h>
87cfb6eeb4SYOSHIFUJI Hideaki 
88ab32ea5dSBrian Haley int sysctl_tcp_tw_reuse __read_mostly;
89ab32ea5dSBrian Haley int sysctl_tcp_low_latency __read_mostly;
904bc2f18bSEric Dumazet EXPORT_SYMBOL(sysctl_tcp_low_latency);
911da177e4SLinus Torvalds 
921da177e4SLinus Torvalds 
93cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
94a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
95318cf7aaSEric Dumazet 			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
96cfb6eeb4SYOSHIFUJI Hideaki #endif
97cfb6eeb4SYOSHIFUJI Hideaki 
985caea4eaSEric Dumazet struct inet_hashinfo tcp_hashinfo;
994bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_hashinfo);
1001da177e4SLinus Torvalds 
101cf533ea5SEric Dumazet static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1021da177e4SLinus Torvalds {
103eddc9ec5SArnaldo Carvalho de Melo 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
104eddc9ec5SArnaldo Carvalho de Melo 					  ip_hdr(skb)->saddr,
105aa8223c7SArnaldo Carvalho de Melo 					  tcp_hdr(skb)->dest,
106aa8223c7SArnaldo Carvalho de Melo 					  tcp_hdr(skb)->source);
1071da177e4SLinus Torvalds }
1081da177e4SLinus Torvalds 
1096d6ee43eSArnaldo Carvalho de Melo int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
1106d6ee43eSArnaldo Carvalho de Melo {
1116d6ee43eSArnaldo Carvalho de Melo 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
1126d6ee43eSArnaldo Carvalho de Melo 	struct tcp_sock *tp = tcp_sk(sk);
1136d6ee43eSArnaldo Carvalho de Melo 
1146d6ee43eSArnaldo Carvalho de Melo 	/* With PAWS, it is safe from the viewpoint
1156d6ee43eSArnaldo Carvalho de Melo 	   of data integrity. Even without PAWS it is safe provided sequence
1166d6ee43eSArnaldo Carvalho de Melo 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
1176d6ee43eSArnaldo Carvalho de Melo 
1186d6ee43eSArnaldo Carvalho de Melo 	   Actually, the idea is close to VJ's one, only timestamp cache is
1196d6ee43eSArnaldo Carvalho de Melo 	   held not per host, but per port pair and TW bucket is used as state
1206d6ee43eSArnaldo Carvalho de Melo 	   holder.
1216d6ee43eSArnaldo Carvalho de Melo 
1226d6ee43eSArnaldo Carvalho de Melo 	   If TW bucket has been already destroyed we fall back to VJ's scheme
1236d6ee43eSArnaldo Carvalho de Melo 	   and use initial timestamp retrieved from peer table.
1246d6ee43eSArnaldo Carvalho de Melo 	 */
1256d6ee43eSArnaldo Carvalho de Melo 	if (tcptw->tw_ts_recent_stamp &&
1266d6ee43eSArnaldo Carvalho de Melo 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
1279d729f72SJames Morris 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
1286d6ee43eSArnaldo Carvalho de Melo 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
1296d6ee43eSArnaldo Carvalho de Melo 		if (tp->write_seq == 0)
1306d6ee43eSArnaldo Carvalho de Melo 			tp->write_seq = 1;
1316d6ee43eSArnaldo Carvalho de Melo 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
1326d6ee43eSArnaldo Carvalho de Melo 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
1336d6ee43eSArnaldo Carvalho de Melo 		sock_hold(sktw);
1346d6ee43eSArnaldo Carvalho de Melo 		return 1;
1356d6ee43eSArnaldo Carvalho de Melo 	}
1366d6ee43eSArnaldo Carvalho de Melo 
1376d6ee43eSArnaldo Carvalho de Melo 	return 0;
1386d6ee43eSArnaldo Carvalho de Melo }
1396d6ee43eSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(tcp_twsk_unique);
1406d6ee43eSArnaldo Carvalho de Melo 
141ee995283SPavel Emelyanov static int tcp_repair_connect(struct sock *sk)
142ee995283SPavel Emelyanov {
143ee995283SPavel Emelyanov 	tcp_connect_init(sk);
144ee995283SPavel Emelyanov 	tcp_finish_connect(sk, NULL);
145ee995283SPavel Emelyanov 
146ee995283SPavel Emelyanov 	return 0;
147ee995283SPavel Emelyanov }
148ee995283SPavel Emelyanov 
1491da177e4SLinus Torvalds /* This will initiate an outgoing connection. */
1501da177e4SLinus Torvalds int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
1511da177e4SLinus Torvalds {
1522d7192d6SDavid S. Miller 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1531da177e4SLinus Torvalds 	struct inet_sock *inet = inet_sk(sk);
1541da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
155dca8b089SDavid S. Miller 	__be16 orig_sport, orig_dport;
156bada8adcSAl Viro 	__be32 daddr, nexthop;
157da905bd1SDavid S. Miller 	struct flowi4 *fl4;
1582d7192d6SDavid S. Miller 	struct rtable *rt;
1591da177e4SLinus Torvalds 	int err;
160f6d8bd05SEric Dumazet 	struct ip_options_rcu *inet_opt;
1611da177e4SLinus Torvalds 
1621da177e4SLinus Torvalds 	if (addr_len < sizeof(struct sockaddr_in))
1631da177e4SLinus Torvalds 		return -EINVAL;
1641da177e4SLinus Torvalds 
1651da177e4SLinus Torvalds 	if (usin->sin_family != AF_INET)
1661da177e4SLinus Torvalds 		return -EAFNOSUPPORT;
1671da177e4SLinus Torvalds 
1681da177e4SLinus Torvalds 	nexthop = daddr = usin->sin_addr.s_addr;
169f6d8bd05SEric Dumazet 	inet_opt = rcu_dereference_protected(inet->inet_opt,
170f6d8bd05SEric Dumazet 					     sock_owned_by_user(sk));
171f6d8bd05SEric Dumazet 	if (inet_opt && inet_opt->opt.srr) {
1721da177e4SLinus Torvalds 		if (!daddr)
1731da177e4SLinus Torvalds 			return -EINVAL;
174f6d8bd05SEric Dumazet 		nexthop = inet_opt->opt.faddr;
1751da177e4SLinus Torvalds 	}
1761da177e4SLinus Torvalds 
177dca8b089SDavid S. Miller 	orig_sport = inet->inet_sport;
178dca8b089SDavid S. Miller 	orig_dport = usin->sin_port;
179da905bd1SDavid S. Miller 	fl4 = &inet->cork.fl.u.ip4;
180da905bd1SDavid S. Miller 	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
1811da177e4SLinus Torvalds 			      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
1821da177e4SLinus Torvalds 			      IPPROTO_TCP,
183abdf7e72SDavid S. Miller 			      orig_sport, orig_dport, sk, true);
184b23dd4feSDavid S. Miller 	if (IS_ERR(rt)) {
185b23dd4feSDavid S. Miller 		err = PTR_ERR(rt);
186b23dd4feSDavid S. Miller 		if (err == -ENETUNREACH)
1877c73a6faSPavel Emelyanov 			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
188b23dd4feSDavid S. Miller 		return err;
189584bdf8cSWei Dong 	}
1901da177e4SLinus Torvalds 
1911da177e4SLinus Torvalds 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
1921da177e4SLinus Torvalds 		ip_rt_put(rt);
1931da177e4SLinus Torvalds 		return -ENETUNREACH;
1941da177e4SLinus Torvalds 	}
1951da177e4SLinus Torvalds 
196f6d8bd05SEric Dumazet 	if (!inet_opt || !inet_opt->opt.srr)
197da905bd1SDavid S. Miller 		daddr = fl4->daddr;
1981da177e4SLinus Torvalds 
199c720c7e8SEric Dumazet 	if (!inet->inet_saddr)
200da905bd1SDavid S. Miller 		inet->inet_saddr = fl4->saddr;
201c720c7e8SEric Dumazet 	inet->inet_rcv_saddr = inet->inet_saddr;
2021da177e4SLinus Torvalds 
203c720c7e8SEric Dumazet 	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
2041da177e4SLinus Torvalds 		/* Reset inherited state */
2051da177e4SLinus Torvalds 		tp->rx_opt.ts_recent	   = 0;
2061da177e4SLinus Torvalds 		tp->rx_opt.ts_recent_stamp = 0;
207ee995283SPavel Emelyanov 		if (likely(!tp->repair))
2081da177e4SLinus Torvalds 			tp->write_seq	   = 0;
2091da177e4SLinus Torvalds 	}
2101da177e4SLinus Torvalds 
211295ff7edSArnaldo Carvalho de Melo 	if (tcp_death_row.sysctl_tw_recycle &&
21281166dd6SDavid S. Miller 	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
21381166dd6SDavid S. Miller 		tcp_fetch_timewait_stamp(sk, &rt->dst);
2141da177e4SLinus Torvalds 
215c720c7e8SEric Dumazet 	inet->inet_dport = usin->sin_port;
216c720c7e8SEric Dumazet 	inet->inet_daddr = daddr;
2171da177e4SLinus Torvalds 
218d83d8461SArnaldo Carvalho de Melo 	inet_csk(sk)->icsk_ext_hdr_len = 0;
219f6d8bd05SEric Dumazet 	if (inet_opt)
220f6d8bd05SEric Dumazet 		inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
2211da177e4SLinus Torvalds 
222bee7ca9eSWilliam Allen Simpson 	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
2231da177e4SLinus Torvalds 
2241da177e4SLinus Torvalds 	/* Socket identity is still unknown (sport may be zero).
2251da177e4SLinus Torvalds 	 * However we set state to SYN-SENT and not releasing socket
2261da177e4SLinus Torvalds 	 * lock select source port, enter ourselves into the hash tables and
2271da177e4SLinus Torvalds 	 * complete initialization after this.
2281da177e4SLinus Torvalds 	 */
2291da177e4SLinus Torvalds 	tcp_set_state(sk, TCP_SYN_SENT);
230a7f5e7f1SArnaldo Carvalho de Melo 	err = inet_hash_connect(&tcp_death_row, sk);
2311da177e4SLinus Torvalds 	if (err)
2321da177e4SLinus Torvalds 		goto failure;
2331da177e4SLinus Torvalds 
234da905bd1SDavid S. Miller 	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
235c720c7e8SEric Dumazet 			       inet->inet_sport, inet->inet_dport, sk);
236b23dd4feSDavid S. Miller 	if (IS_ERR(rt)) {
237b23dd4feSDavid S. Miller 		err = PTR_ERR(rt);
238b23dd4feSDavid S. Miller 		rt = NULL;
2391da177e4SLinus Torvalds 		goto failure;
240b23dd4feSDavid S. Miller 	}
2411da177e4SLinus Torvalds 	/* OK, now commit destination to socket.  */
242bcd76111SHerbert Xu 	sk->sk_gso_type = SKB_GSO_TCPV4;
243d8d1f30bSChangli Gao 	sk_setup_caps(sk, &rt->dst);
2441da177e4SLinus Torvalds 
245ee995283SPavel Emelyanov 	if (!tp->write_seq && likely(!tp->repair))
246c720c7e8SEric Dumazet 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
247c720c7e8SEric Dumazet 							   inet->inet_daddr,
248c720c7e8SEric Dumazet 							   inet->inet_sport,
2491da177e4SLinus Torvalds 							   usin->sin_port);
2501da177e4SLinus Torvalds 
251c720c7e8SEric Dumazet 	inet->inet_id = tp->write_seq ^ jiffies;
2521da177e4SLinus Torvalds 
253ee995283SPavel Emelyanov 	if (likely(!tp->repair))
2541da177e4SLinus Torvalds 		err = tcp_connect(sk);
255ee995283SPavel Emelyanov 	else
256ee995283SPavel Emelyanov 		err = tcp_repair_connect(sk);
257ee995283SPavel Emelyanov 
2581da177e4SLinus Torvalds 	rt = NULL;
2591da177e4SLinus Torvalds 	if (err)
2601da177e4SLinus Torvalds 		goto failure;
2611da177e4SLinus Torvalds 
2621da177e4SLinus Torvalds 	return 0;
2631da177e4SLinus Torvalds 
2641da177e4SLinus Torvalds failure:
2657174259eSArnaldo Carvalho de Melo 	/*
2667174259eSArnaldo Carvalho de Melo 	 * This unhashes the socket and releases the local port,
2677174259eSArnaldo Carvalho de Melo 	 * if necessary.
2687174259eSArnaldo Carvalho de Melo 	 */
2691da177e4SLinus Torvalds 	tcp_set_state(sk, TCP_CLOSE);
2701da177e4SLinus Torvalds 	ip_rt_put(rt);
2711da177e4SLinus Torvalds 	sk->sk_route_caps = 0;
272c720c7e8SEric Dumazet 	inet->inet_dport = 0;
2731da177e4SLinus Torvalds 	return err;
2741da177e4SLinus Torvalds }
2754bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_connect);
2761da177e4SLinus Torvalds 
2771da177e4SLinus Torvalds /*
278563d34d0SEric Dumazet  * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
279563d34d0SEric Dumazet  * It can be called through tcp_release_cb() if socket was owned by user
280563d34d0SEric Dumazet  * at the time tcp_v4_err() was called to handle ICMP message.
2811da177e4SLinus Torvalds  */
282563d34d0SEric Dumazet static void tcp_v4_mtu_reduced(struct sock *sk)
2831da177e4SLinus Torvalds {
2841da177e4SLinus Torvalds 	struct dst_entry *dst;
2851da177e4SLinus Torvalds 	struct inet_sock *inet = inet_sk(sk);
286563d34d0SEric Dumazet 	u32 mtu = tcp_sk(sk)->mtu_info;
2871da177e4SLinus Torvalds 
2881da177e4SLinus Torvalds 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
2891da177e4SLinus Torvalds 	 * send out by Linux are always <576bytes so they should go through
2901da177e4SLinus Torvalds 	 * unfragmented).
2911da177e4SLinus Torvalds 	 */
2921da177e4SLinus Torvalds 	if (sk->sk_state == TCP_LISTEN)
2931da177e4SLinus Torvalds 		return;
2941da177e4SLinus Torvalds 
29580d0a69fSDavid S. Miller 	dst = inet_csk_update_pmtu(sk, mtu);
29680d0a69fSDavid S. Miller 	if (!dst)
2971da177e4SLinus Torvalds 		return;
2981da177e4SLinus Torvalds 
2991da177e4SLinus Torvalds 	/* Something is about to be wrong... Remember soft error
3001da177e4SLinus Torvalds 	 * for the case, if this connection will not able to recover.
3011da177e4SLinus Torvalds 	 */
3021da177e4SLinus Torvalds 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
3031da177e4SLinus Torvalds 		sk->sk_err_soft = EMSGSIZE;
3041da177e4SLinus Torvalds 
3051da177e4SLinus Torvalds 	mtu = dst_mtu(dst);
3061da177e4SLinus Torvalds 
3071da177e4SLinus Torvalds 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
308d83d8461SArnaldo Carvalho de Melo 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
3091da177e4SLinus Torvalds 		tcp_sync_mss(sk, mtu);
3101da177e4SLinus Torvalds 
3111da177e4SLinus Torvalds 		/* Resend the TCP packet because it's
3121da177e4SLinus Torvalds 		 * clear that the old packet has been
3131da177e4SLinus Torvalds 		 * dropped. This is the new "fast" path mtu
3141da177e4SLinus Torvalds 		 * discovery.
3151da177e4SLinus Torvalds 		 */
3161da177e4SLinus Torvalds 		tcp_simple_retransmit(sk);
3171da177e4SLinus Torvalds 	} /* else let the usual retransmit timer handle it */
3181da177e4SLinus Torvalds }
3191da177e4SLinus Torvalds 
32055be7a9cSDavid S. Miller static void do_redirect(struct sk_buff *skb, struct sock *sk)
32155be7a9cSDavid S. Miller {
32255be7a9cSDavid S. Miller 	struct dst_entry *dst = __sk_dst_check(sk, 0);
32355be7a9cSDavid S. Miller 
3241ed5c48fSDavid S. Miller 	if (dst)
3256700c270SDavid S. Miller 		dst->ops->redirect(dst, sk, skb);
32655be7a9cSDavid S. Miller }
32755be7a9cSDavid S. Miller 
3281da177e4SLinus Torvalds /*
3291da177e4SLinus Torvalds  * This routine is called by the ICMP module when it gets some
3301da177e4SLinus Torvalds  * sort of error condition.  If err < 0 then the socket should
3311da177e4SLinus Torvalds  * be closed and the error returned to the user.  If err > 0
3321da177e4SLinus Torvalds  * it's just the icmp type << 8 | icmp code.  After adjustment
3331da177e4SLinus Torvalds  * header points to the first 8 bytes of the tcp header.  We need
3341da177e4SLinus Torvalds  * to find the appropriate port.
3351da177e4SLinus Torvalds  *
3361da177e4SLinus Torvalds  * The locking strategy used here is very "optimistic". When
3371da177e4SLinus Torvalds  * someone else accesses the socket the ICMP is just dropped
3381da177e4SLinus Torvalds  * and for some paths there is no check at all.
3391da177e4SLinus Torvalds  * A more general error queue to queue errors for later handling
3401da177e4SLinus Torvalds  * is probably better.
3411da177e4SLinus Torvalds  *
3421da177e4SLinus Torvalds  */
3431da177e4SLinus Torvalds 
3444d1a2d9eSDamian Lukowski void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
3451da177e4SLinus Torvalds {
346b71d1d42SEric Dumazet 	const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
3474d1a2d9eSDamian Lukowski 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
348f1ecd5d9SDamian Lukowski 	struct inet_connection_sock *icsk;
3491da177e4SLinus Torvalds 	struct tcp_sock *tp;
3501da177e4SLinus Torvalds 	struct inet_sock *inet;
3514d1a2d9eSDamian Lukowski 	const int type = icmp_hdr(icmp_skb)->type;
3524d1a2d9eSDamian Lukowski 	const int code = icmp_hdr(icmp_skb)->code;
3531da177e4SLinus Torvalds 	struct sock *sk;
354f1ecd5d9SDamian Lukowski 	struct sk_buff *skb;
355168a8f58SJerry Chu 	struct request_sock *req;
3561da177e4SLinus Torvalds 	__u32 seq;
357f1ecd5d9SDamian Lukowski 	__u32 remaining;
3581da177e4SLinus Torvalds 	int err;
3594d1a2d9eSDamian Lukowski 	struct net *net = dev_net(icmp_skb->dev);
3601da177e4SLinus Torvalds 
3614d1a2d9eSDamian Lukowski 	if (icmp_skb->len < (iph->ihl << 2) + 8) {
362dcfc23caSPavel Emelyanov 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
3631da177e4SLinus Torvalds 		return;
3641da177e4SLinus Torvalds 	}
3651da177e4SLinus Torvalds 
366fd54d716SPavel Emelyanov 	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
3674d1a2d9eSDamian Lukowski 			iph->saddr, th->source, inet_iif(icmp_skb));
3681da177e4SLinus Torvalds 	if (!sk) {
369dcfc23caSPavel Emelyanov 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
3701da177e4SLinus Torvalds 		return;
3711da177e4SLinus Torvalds 	}
3721da177e4SLinus Torvalds 	if (sk->sk_state == TCP_TIME_WAIT) {
3739469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
3741da177e4SLinus Torvalds 		return;
3751da177e4SLinus Torvalds 	}
3761da177e4SLinus Torvalds 
3771da177e4SLinus Torvalds 	bh_lock_sock(sk);
3781da177e4SLinus Torvalds 	/* If too many ICMPs get dropped on busy
3791da177e4SLinus Torvalds 	 * servers this needs to be solved differently.
380563d34d0SEric Dumazet 	 * We do take care of PMTU discovery (RFC1191) special case :
381563d34d0SEric Dumazet 	 * we can receive locally generated ICMP messages while socket is held.
3821da177e4SLinus Torvalds 	 */
383563d34d0SEric Dumazet 	if (sock_owned_by_user(sk) &&
384563d34d0SEric Dumazet 	    type != ICMP_DEST_UNREACH &&
385563d34d0SEric Dumazet 	    code != ICMP_FRAG_NEEDED)
386de0744afSPavel Emelyanov 		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
3871da177e4SLinus Torvalds 
3881da177e4SLinus Torvalds 	if (sk->sk_state == TCP_CLOSE)
3891da177e4SLinus Torvalds 		goto out;
3901da177e4SLinus Torvalds 
39197e3ecd1Sstephen hemminger 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
39297e3ecd1Sstephen hemminger 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
39397e3ecd1Sstephen hemminger 		goto out;
39497e3ecd1Sstephen hemminger 	}
39597e3ecd1Sstephen hemminger 
396f1ecd5d9SDamian Lukowski 	icsk = inet_csk(sk);
3971da177e4SLinus Torvalds 	tp = tcp_sk(sk);
398168a8f58SJerry Chu 	req = tp->fastopen_rsk;
3991da177e4SLinus Torvalds 	seq = ntohl(th->seq);
4001da177e4SLinus Torvalds 	if (sk->sk_state != TCP_LISTEN &&
401168a8f58SJerry Chu 	    !between(seq, tp->snd_una, tp->snd_nxt) &&
402168a8f58SJerry Chu 	    (req == NULL || seq != tcp_rsk(req)->snt_isn)) {
403168a8f58SJerry Chu 		/* For a Fast Open socket, allow seq to be snt_isn. */
404de0744afSPavel Emelyanov 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
4051da177e4SLinus Torvalds 		goto out;
4061da177e4SLinus Torvalds 	}
4071da177e4SLinus Torvalds 
4081da177e4SLinus Torvalds 	switch (type) {
40955be7a9cSDavid S. Miller 	case ICMP_REDIRECT:
41055be7a9cSDavid S. Miller 		do_redirect(icmp_skb, sk);
41155be7a9cSDavid S. Miller 		goto out;
4121da177e4SLinus Torvalds 	case ICMP_SOURCE_QUENCH:
4131da177e4SLinus Torvalds 		/* Just silently ignore these. */
4141da177e4SLinus Torvalds 		goto out;
4151da177e4SLinus Torvalds 	case ICMP_PARAMETERPROB:
4161da177e4SLinus Torvalds 		err = EPROTO;
4171da177e4SLinus Torvalds 		break;
4181da177e4SLinus Torvalds 	case ICMP_DEST_UNREACH:
4191da177e4SLinus Torvalds 		if (code > NR_ICMP_UNREACH)
4201da177e4SLinus Torvalds 			goto out;
4211da177e4SLinus Torvalds 
4221da177e4SLinus Torvalds 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
423563d34d0SEric Dumazet 			tp->mtu_info = info;
424144d56e9SEric Dumazet 			if (!sock_owned_by_user(sk)) {
425563d34d0SEric Dumazet 				tcp_v4_mtu_reduced(sk);
426144d56e9SEric Dumazet 			} else {
427144d56e9SEric Dumazet 				if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
428144d56e9SEric Dumazet 					sock_hold(sk);
429144d56e9SEric Dumazet 			}
4301da177e4SLinus Torvalds 			goto out;
4311da177e4SLinus Torvalds 		}
4321da177e4SLinus Torvalds 
4331da177e4SLinus Torvalds 		err = icmp_err_convert[code].errno;
434f1ecd5d9SDamian Lukowski 		/* check if icmp_skb allows revert of backoff
435f1ecd5d9SDamian Lukowski 		 * (see draft-zimmermann-tcp-lcd) */
436f1ecd5d9SDamian Lukowski 		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
437f1ecd5d9SDamian Lukowski 			break;
438f1ecd5d9SDamian Lukowski 		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
439f1ecd5d9SDamian Lukowski 		    !icsk->icsk_backoff)
440f1ecd5d9SDamian Lukowski 			break;
441f1ecd5d9SDamian Lukowski 
442168a8f58SJerry Chu 		/* XXX (TFO) - revisit the following logic for TFO */
443168a8f58SJerry Chu 
4448f49c270SDavid S. Miller 		if (sock_owned_by_user(sk))
4458f49c270SDavid S. Miller 			break;
4468f49c270SDavid S. Miller 
447f1ecd5d9SDamian Lukowski 		icsk->icsk_backoff--;
4489ad7c049SJerry Chu 		inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
4499ad7c049SJerry Chu 			TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
450f1ecd5d9SDamian Lukowski 		tcp_bound_rto(sk);
451f1ecd5d9SDamian Lukowski 
452f1ecd5d9SDamian Lukowski 		skb = tcp_write_queue_head(sk);
453f1ecd5d9SDamian Lukowski 		BUG_ON(!skb);
454f1ecd5d9SDamian Lukowski 
455f1ecd5d9SDamian Lukowski 		remaining = icsk->icsk_rto - min(icsk->icsk_rto,
456f1ecd5d9SDamian Lukowski 				tcp_time_stamp - TCP_SKB_CB(skb)->when);
457f1ecd5d9SDamian Lukowski 
458f1ecd5d9SDamian Lukowski 		if (remaining) {
459f1ecd5d9SDamian Lukowski 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
460f1ecd5d9SDamian Lukowski 						  remaining, TCP_RTO_MAX);
461f1ecd5d9SDamian Lukowski 		} else {
462f1ecd5d9SDamian Lukowski 			/* RTO revert clocked out retransmission.
463f1ecd5d9SDamian Lukowski 			 * Will retransmit now */
464f1ecd5d9SDamian Lukowski 			tcp_retransmit_timer(sk);
465f1ecd5d9SDamian Lukowski 		}
466f1ecd5d9SDamian Lukowski 
4671da177e4SLinus Torvalds 		break;
4681da177e4SLinus Torvalds 	case ICMP_TIME_EXCEEDED:
4691da177e4SLinus Torvalds 		err = EHOSTUNREACH;
4701da177e4SLinus Torvalds 		break;
4711da177e4SLinus Torvalds 	default:
4721da177e4SLinus Torvalds 		goto out;
4731da177e4SLinus Torvalds 	}
4741da177e4SLinus Torvalds 
475168a8f58SJerry Chu 	/* XXX (TFO) - if it's a TFO socket and has been accepted, rather
476168a8f58SJerry Chu 	 * than following the TCP_SYN_RECV case and closing the socket,
477168a8f58SJerry Chu 	 * we ignore the ICMP error and keep trying like a fully established
478168a8f58SJerry Chu 	 * socket. Is this the right thing to do?
479168a8f58SJerry Chu 	 */
480168a8f58SJerry Chu 	if (req && req->sk == NULL)
481168a8f58SJerry Chu 		goto out;
482168a8f58SJerry Chu 
4831da177e4SLinus Torvalds 	switch (sk->sk_state) {
48460236fddSArnaldo Carvalho de Melo 		struct request_sock *req, **prev;
4851da177e4SLinus Torvalds 	case TCP_LISTEN:
4861da177e4SLinus Torvalds 		if (sock_owned_by_user(sk))
4871da177e4SLinus Torvalds 			goto out;
4881da177e4SLinus Torvalds 
489463c84b9SArnaldo Carvalho de Melo 		req = inet_csk_search_req(sk, &prev, th->dest,
4901da177e4SLinus Torvalds 					  iph->daddr, iph->saddr);
4911da177e4SLinus Torvalds 		if (!req)
4921da177e4SLinus Torvalds 			goto out;
4931da177e4SLinus Torvalds 
4941da177e4SLinus Torvalds 		/* ICMPs are not backlogged, hence we cannot get
4951da177e4SLinus Torvalds 		   an established socket here.
4961da177e4SLinus Torvalds 		 */
497547b792cSIlpo Järvinen 		WARN_ON(req->sk);
4981da177e4SLinus Torvalds 
4992e6599cbSArnaldo Carvalho de Melo 		if (seq != tcp_rsk(req)->snt_isn) {
500de0744afSPavel Emelyanov 			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
5011da177e4SLinus Torvalds 			goto out;
5021da177e4SLinus Torvalds 		}
5031da177e4SLinus Torvalds 
5041da177e4SLinus Torvalds 		/*
5051da177e4SLinus Torvalds 		 * Still in SYN_RECV, just remove it silently.
5061da177e4SLinus Torvalds 		 * There is no good way to pass the error to the newly
5071da177e4SLinus Torvalds 		 * created socket, and POSIX does not want network
5081da177e4SLinus Torvalds 		 * errors returned from accept().
5091da177e4SLinus Torvalds 		 */
510463c84b9SArnaldo Carvalho de Melo 		inet_csk_reqsk_queue_drop(sk, req, prev);
5111da177e4SLinus Torvalds 		goto out;
5121da177e4SLinus Torvalds 
5131da177e4SLinus Torvalds 	case TCP_SYN_SENT:
5141da177e4SLinus Torvalds 	case TCP_SYN_RECV:  /* Cannot happen.
515168a8f58SJerry Chu 			       It can f.e. if SYNs crossed,
516168a8f58SJerry Chu 			       or Fast Open.
5171da177e4SLinus Torvalds 			     */
5181da177e4SLinus Torvalds 		if (!sock_owned_by_user(sk)) {
5191da177e4SLinus Torvalds 			sk->sk_err = err;
5201da177e4SLinus Torvalds 
5211da177e4SLinus Torvalds 			sk->sk_error_report(sk);
5221da177e4SLinus Torvalds 
5231da177e4SLinus Torvalds 			tcp_done(sk);
5241da177e4SLinus Torvalds 		} else {
5251da177e4SLinus Torvalds 			sk->sk_err_soft = err;
5261da177e4SLinus Torvalds 		}
5271da177e4SLinus Torvalds 		goto out;
5281da177e4SLinus Torvalds 	}
5291da177e4SLinus Torvalds 
5301da177e4SLinus Torvalds 	/* If we've already connected we will keep trying
5311da177e4SLinus Torvalds 	 * until we time out, or the user gives up.
5321da177e4SLinus Torvalds 	 *
5331da177e4SLinus Torvalds 	 * rfc1122 4.2.3.9 allows to consider as hard errors
5341da177e4SLinus Torvalds 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
5351da177e4SLinus Torvalds 	 * but it is obsoleted by pmtu discovery).
5361da177e4SLinus Torvalds 	 *
5371da177e4SLinus Torvalds 	 * Note, that in modern internet, where routing is unreliable
5381da177e4SLinus Torvalds 	 * and in each dark corner broken firewalls sit, sending random
5391da177e4SLinus Torvalds 	 * errors ordered by their masters even this two messages finally lose
5401da177e4SLinus Torvalds 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
5411da177e4SLinus Torvalds 	 *
5421da177e4SLinus Torvalds 	 * Now we are in compliance with RFCs.
5431da177e4SLinus Torvalds 	 *							--ANK (980905)
5441da177e4SLinus Torvalds 	 */
5451da177e4SLinus Torvalds 
5461da177e4SLinus Torvalds 	inet = inet_sk(sk);
5471da177e4SLinus Torvalds 	if (!sock_owned_by_user(sk) && inet->recverr) {
5481da177e4SLinus Torvalds 		sk->sk_err = err;
5491da177e4SLinus Torvalds 		sk->sk_error_report(sk);
5501da177e4SLinus Torvalds 	} else	{ /* Only an error on timeout */
5511da177e4SLinus Torvalds 		sk->sk_err_soft = err;
5521da177e4SLinus Torvalds 	}
5531da177e4SLinus Torvalds 
5541da177e4SLinus Torvalds out:
5551da177e4SLinus Torvalds 	bh_unlock_sock(sk);
5561da177e4SLinus Torvalds 	sock_put(sk);
5571da177e4SLinus Torvalds }
5581da177e4SLinus Torvalds 
559419f9f89SHerbert Xu static void __tcp_v4_send_check(struct sk_buff *skb,
560419f9f89SHerbert Xu 				__be32 saddr, __be32 daddr)
5611da177e4SLinus Torvalds {
562aa8223c7SArnaldo Carvalho de Melo 	struct tcphdr *th = tcp_hdr(skb);
5631da177e4SLinus Torvalds 
56484fa7933SPatrick McHardy 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
565419f9f89SHerbert Xu 		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
566663ead3bSHerbert Xu 		skb->csum_start = skb_transport_header(skb) - skb->head;
567ff1dcadbSAl Viro 		skb->csum_offset = offsetof(struct tcphdr, check);
5681da177e4SLinus Torvalds 	} else {
569419f9f89SHerbert Xu 		th->check = tcp_v4_check(skb->len, saddr, daddr,
57007f0757aSJoe Perches 					 csum_partial(th,
5711da177e4SLinus Torvalds 						      th->doff << 2,
5721da177e4SLinus Torvalds 						      skb->csum));
5731da177e4SLinus Torvalds 	}
5741da177e4SLinus Torvalds }
5751da177e4SLinus Torvalds 
576419f9f89SHerbert Xu /* This routine computes an IPv4 TCP checksum. */
577bb296246SHerbert Xu void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
578419f9f89SHerbert Xu {
579cf533ea5SEric Dumazet 	const struct inet_sock *inet = inet_sk(sk);
580419f9f89SHerbert Xu 
581419f9f89SHerbert Xu 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
582419f9f89SHerbert Xu }
5834bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_send_check);
584419f9f89SHerbert Xu 
585a430a43dSHerbert Xu int tcp_v4_gso_send_check(struct sk_buff *skb)
586a430a43dSHerbert Xu {
587eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph;
588a430a43dSHerbert Xu 	struct tcphdr *th;
589a430a43dSHerbert Xu 
590a430a43dSHerbert Xu 	if (!pskb_may_pull(skb, sizeof(*th)))
591a430a43dSHerbert Xu 		return -EINVAL;
592a430a43dSHerbert Xu 
593eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
594aa8223c7SArnaldo Carvalho de Melo 	th = tcp_hdr(skb);
595a430a43dSHerbert Xu 
596a430a43dSHerbert Xu 	th->check = 0;
59784fa7933SPatrick McHardy 	skb->ip_summed = CHECKSUM_PARTIAL;
598419f9f89SHerbert Xu 	__tcp_v4_send_check(skb, iph->saddr, iph->daddr);
599a430a43dSHerbert Xu 	return 0;
600a430a43dSHerbert Xu }
601a430a43dSHerbert Xu 
6021da177e4SLinus Torvalds /*
6031da177e4SLinus Torvalds  *	This routine will send an RST to the other tcp.
6041da177e4SLinus Torvalds  *
6051da177e4SLinus Torvalds  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
6061da177e4SLinus Torvalds  *		      for reset.
6071da177e4SLinus Torvalds  *	Answer: if a packet caused RST, it is not for a socket
6081da177e4SLinus Torvalds  *		existing in our system, if it is matched to a socket,
6091da177e4SLinus Torvalds  *		it is just duplicate segment or bug in other side's TCP.
6101da177e4SLinus Torvalds  *		So that we build reply only basing on parameters
6111da177e4SLinus Torvalds  *		arrived with segment.
6121da177e4SLinus Torvalds  *	Exception: precedence violation. We do not implement it in any case.
6131da177e4SLinus Torvalds  */
6141da177e4SLinus Torvalds 
615cfb6eeb4SYOSHIFUJI Hideaki static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
6161da177e4SLinus Torvalds {
617cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
618cfb6eeb4SYOSHIFUJI Hideaki 	struct {
619cfb6eeb4SYOSHIFUJI Hideaki 		struct tcphdr th;
620cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
621714e85beSAl Viro 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
622cfb6eeb4SYOSHIFUJI Hideaki #endif
623cfb6eeb4SYOSHIFUJI Hideaki 	} rep;
6241da177e4SLinus Torvalds 	struct ip_reply_arg arg;
625cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
626cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *key;
627658ddaafSShawn Lu 	const __u8 *hash_location = NULL;
628658ddaafSShawn Lu 	unsigned char newhash[16];
629658ddaafSShawn Lu 	int genhash;
630658ddaafSShawn Lu 	struct sock *sk1 = NULL;
631cfb6eeb4SYOSHIFUJI Hideaki #endif
632a86b1e30SPavel Emelyanov 	struct net *net;
6331da177e4SLinus Torvalds 
6341da177e4SLinus Torvalds 	/* Never send a reset in response to a reset. */
6351da177e4SLinus Torvalds 	if (th->rst)
6361da177e4SLinus Torvalds 		return;
6371da177e4SLinus Torvalds 
638511c3f92SEric Dumazet 	if (skb_rtable(skb)->rt_type != RTN_LOCAL)
6391da177e4SLinus Torvalds 		return;
6401da177e4SLinus Torvalds 
6411da177e4SLinus Torvalds 	/* Swap the send and the receive. */
642cfb6eeb4SYOSHIFUJI Hideaki 	memset(&rep, 0, sizeof(rep));
643cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.dest   = th->source;
644cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.source = th->dest;
645cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.doff   = sizeof(struct tcphdr) / 4;
646cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.rst    = 1;
6471da177e4SLinus Torvalds 
6481da177e4SLinus Torvalds 	if (th->ack) {
649cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.seq = th->ack_seq;
6501da177e4SLinus Torvalds 	} else {
651cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.ack = 1;
652cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
6531da177e4SLinus Torvalds 				       skb->len - (th->doff << 2));
6541da177e4SLinus Torvalds 	}
6551da177e4SLinus Torvalds 
6567174259eSArnaldo Carvalho de Melo 	memset(&arg, 0, sizeof(arg));
657cfb6eeb4SYOSHIFUJI Hideaki 	arg.iov[0].iov_base = (unsigned char *)&rep;
658cfb6eeb4SYOSHIFUJI Hideaki 	arg.iov[0].iov_len  = sizeof(rep.th);
659cfb6eeb4SYOSHIFUJI Hideaki 
660cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
661658ddaafSShawn Lu 	hash_location = tcp_parse_md5sig_option(th);
662658ddaafSShawn Lu 	if (!sk && hash_location) {
663658ddaafSShawn Lu 		/*
664658ddaafSShawn Lu 		 * active side is lost. Try to find listening socket through
665658ddaafSShawn Lu 		 * source port, and then find md5 key through listening socket.
666658ddaafSShawn Lu 		 * we are not loose security here:
667658ddaafSShawn Lu 		 * Incoming packet is checked with md5 hash with finding key,
668658ddaafSShawn Lu 		 * no RST generated if md5 hash doesn't match.
669658ddaafSShawn Lu 		 */
670658ddaafSShawn Lu 		sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
671658ddaafSShawn Lu 					     &tcp_hashinfo, ip_hdr(skb)->daddr,
672658ddaafSShawn Lu 					     ntohs(th->source), inet_iif(skb));
673658ddaafSShawn Lu 		/* don't send rst if it can't find key */
674658ddaafSShawn Lu 		if (!sk1)
675658ddaafSShawn Lu 			return;
676658ddaafSShawn Lu 		rcu_read_lock();
677658ddaafSShawn Lu 		key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
678658ddaafSShawn Lu 					&ip_hdr(skb)->saddr, AF_INET);
679658ddaafSShawn Lu 		if (!key)
680658ddaafSShawn Lu 			goto release_sk1;
681658ddaafSShawn Lu 
682658ddaafSShawn Lu 		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
683658ddaafSShawn Lu 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
684658ddaafSShawn Lu 			goto release_sk1;
685658ddaafSShawn Lu 	} else {
686658ddaafSShawn Lu 		key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
687658ddaafSShawn Lu 					     &ip_hdr(skb)->saddr,
688a915da9bSEric Dumazet 					     AF_INET) : NULL;
689658ddaafSShawn Lu 	}
690658ddaafSShawn Lu 
691cfb6eeb4SYOSHIFUJI Hideaki 	if (key) {
692cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
693cfb6eeb4SYOSHIFUJI Hideaki 				   (TCPOPT_NOP << 16) |
694cfb6eeb4SYOSHIFUJI Hideaki 				   (TCPOPT_MD5SIG << 8) |
695cfb6eeb4SYOSHIFUJI Hideaki 				   TCPOLEN_MD5SIG);
696cfb6eeb4SYOSHIFUJI Hideaki 		/* Update length and the length the header thinks exists */
697cfb6eeb4SYOSHIFUJI Hideaki 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
698cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.doff = arg.iov[0].iov_len / 4;
699cfb6eeb4SYOSHIFUJI Hideaki 
70049a72dfbSAdam Langley 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
70178e645cbSIlpo Järvinen 				     key, ip_hdr(skb)->saddr,
70278e645cbSIlpo Järvinen 				     ip_hdr(skb)->daddr, &rep.th);
703cfb6eeb4SYOSHIFUJI Hideaki 	}
704cfb6eeb4SYOSHIFUJI Hideaki #endif
705eddc9ec5SArnaldo Carvalho de Melo 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
706eddc9ec5SArnaldo Carvalho de Melo 				      ip_hdr(skb)->saddr, /* XXX */
70752cd5750SIlpo Järvinen 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
7081da177e4SLinus Torvalds 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
70988ef4a5aSKOVACS Krisztian 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
710e2446eaaSShawn Lu 	/* When socket is gone, all binding information is lost.
7114c675258SAlexey Kuznetsov 	 * routing might fail in this case. No choice here, if we choose to force
7124c675258SAlexey Kuznetsov 	 * input interface, we will misroute in case of asymmetric route.
713e2446eaaSShawn Lu 	 */
7144c675258SAlexey Kuznetsov 	if (sk)
7154c675258SAlexey Kuznetsov 		arg.bound_dev_if = sk->sk_bound_dev_if;
7161da177e4SLinus Torvalds 
717adf30907SEric Dumazet 	net = dev_net(skb_dst(skb)->dev);
71866b13d99SEric Dumazet 	arg.tos = ip_hdr(skb)->tos;
719be9f4a44SEric Dumazet 	ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
72070e73416SDavid S. Miller 			      ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
7211da177e4SLinus Torvalds 
72263231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
72363231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
724658ddaafSShawn Lu 
725658ddaafSShawn Lu #ifdef CONFIG_TCP_MD5SIG
726658ddaafSShawn Lu release_sk1:
727658ddaafSShawn Lu 	if (sk1) {
728658ddaafSShawn Lu 		rcu_read_unlock();
729658ddaafSShawn Lu 		sock_put(sk1);
730658ddaafSShawn Lu 	}
731658ddaafSShawn Lu #endif
7321da177e4SLinus Torvalds }
7331da177e4SLinus Torvalds 
7341da177e4SLinus Torvalds /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
7351da177e4SLinus Torvalds    outside socket context is ugly, certainly. What can I do?
7361da177e4SLinus Torvalds  */
7371da177e4SLinus Torvalds 
7389501f972SYOSHIFUJI Hideaki static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
7399501f972SYOSHIFUJI Hideaki 			    u32 win, u32 ts, int oif,
74088ef4a5aSKOVACS Krisztian 			    struct tcp_md5sig_key *key,
74166b13d99SEric Dumazet 			    int reply_flags, u8 tos)
7421da177e4SLinus Torvalds {
743cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
7441da177e4SLinus Torvalds 	struct {
7451da177e4SLinus Torvalds 		struct tcphdr th;
746714e85beSAl Viro 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
747cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
748cfb6eeb4SYOSHIFUJI Hideaki 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
749cfb6eeb4SYOSHIFUJI Hideaki #endif
750cfb6eeb4SYOSHIFUJI Hideaki 			];
7511da177e4SLinus Torvalds 	} rep;
7521da177e4SLinus Torvalds 	struct ip_reply_arg arg;
753adf30907SEric Dumazet 	struct net *net = dev_net(skb_dst(skb)->dev);
7541da177e4SLinus Torvalds 
7551da177e4SLinus Torvalds 	memset(&rep.th, 0, sizeof(struct tcphdr));
7567174259eSArnaldo Carvalho de Melo 	memset(&arg, 0, sizeof(arg));
7571da177e4SLinus Torvalds 
7581da177e4SLinus Torvalds 	arg.iov[0].iov_base = (unsigned char *)&rep;
7591da177e4SLinus Torvalds 	arg.iov[0].iov_len  = sizeof(rep.th);
7601da177e4SLinus Torvalds 	if (ts) {
761cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
7621da177e4SLinus Torvalds 				   (TCPOPT_TIMESTAMP << 8) |
7631da177e4SLinus Torvalds 				   TCPOLEN_TIMESTAMP);
764cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[1] = htonl(tcp_time_stamp);
765cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[2] = htonl(ts);
766cb48cfe8SCraig Schlenter 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
7671da177e4SLinus Torvalds 	}
7681da177e4SLinus Torvalds 
7691da177e4SLinus Torvalds 	/* Swap the send and the receive. */
7701da177e4SLinus Torvalds 	rep.th.dest    = th->source;
7711da177e4SLinus Torvalds 	rep.th.source  = th->dest;
7721da177e4SLinus Torvalds 	rep.th.doff    = arg.iov[0].iov_len / 4;
7731da177e4SLinus Torvalds 	rep.th.seq     = htonl(seq);
7741da177e4SLinus Torvalds 	rep.th.ack_seq = htonl(ack);
7751da177e4SLinus Torvalds 	rep.th.ack     = 1;
7761da177e4SLinus Torvalds 	rep.th.window  = htons(win);
7771da177e4SLinus Torvalds 
778cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
779cfb6eeb4SYOSHIFUJI Hideaki 	if (key) {
780cfb6eeb4SYOSHIFUJI Hideaki 		int offset = (ts) ? 3 : 0;
781cfb6eeb4SYOSHIFUJI Hideaki 
782cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
783cfb6eeb4SYOSHIFUJI Hideaki 					  (TCPOPT_NOP << 16) |
784cfb6eeb4SYOSHIFUJI Hideaki 					  (TCPOPT_MD5SIG << 8) |
785cfb6eeb4SYOSHIFUJI Hideaki 					  TCPOLEN_MD5SIG);
786cfb6eeb4SYOSHIFUJI Hideaki 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
787cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.doff = arg.iov[0].iov_len/4;
788cfb6eeb4SYOSHIFUJI Hideaki 
78949a72dfbSAdam Langley 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
79090b7e112SAdam Langley 				    key, ip_hdr(skb)->saddr,
79190b7e112SAdam Langley 				    ip_hdr(skb)->daddr, &rep.th);
792cfb6eeb4SYOSHIFUJI Hideaki 	}
793cfb6eeb4SYOSHIFUJI Hideaki #endif
79488ef4a5aSKOVACS Krisztian 	arg.flags = reply_flags;
795eddc9ec5SArnaldo Carvalho de Melo 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
796eddc9ec5SArnaldo Carvalho de Melo 				      ip_hdr(skb)->saddr, /* XXX */
7971da177e4SLinus Torvalds 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
7981da177e4SLinus Torvalds 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
7999501f972SYOSHIFUJI Hideaki 	if (oif)
8009501f972SYOSHIFUJI Hideaki 		arg.bound_dev_if = oif;
80166b13d99SEric Dumazet 	arg.tos = tos;
802be9f4a44SEric Dumazet 	ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
80370e73416SDavid S. Miller 			      ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
8041da177e4SLinus Torvalds 
80563231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
8061da177e4SLinus Torvalds }
8071da177e4SLinus Torvalds 
8081da177e4SLinus Torvalds static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
8091da177e4SLinus Torvalds {
8108feaf0c0SArnaldo Carvalho de Melo 	struct inet_timewait_sock *tw = inet_twsk(sk);
811cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
8121da177e4SLinus Torvalds 
8139501f972SYOSHIFUJI Hideaki 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
8147174259eSArnaldo Carvalho de Melo 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
8159501f972SYOSHIFUJI Hideaki 			tcptw->tw_ts_recent,
8169501f972SYOSHIFUJI Hideaki 			tw->tw_bound_dev_if,
81788ef4a5aSKOVACS Krisztian 			tcp_twsk_md5_key(tcptw),
81866b13d99SEric Dumazet 			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
81966b13d99SEric Dumazet 			tw->tw_tos
8209501f972SYOSHIFUJI Hideaki 			);
8211da177e4SLinus Torvalds 
8228feaf0c0SArnaldo Carvalho de Melo 	inet_twsk_put(tw);
8231da177e4SLinus Torvalds }
8241da177e4SLinus Torvalds 
8256edafaafSGui Jianfeng static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
8267174259eSArnaldo Carvalho de Melo 				  struct request_sock *req)
8271da177e4SLinus Torvalds {
828168a8f58SJerry Chu 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
829168a8f58SJerry Chu 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
830168a8f58SJerry Chu 	 */
831168a8f58SJerry Chu 	tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
832168a8f58SJerry Chu 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
833168a8f58SJerry Chu 			tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
8349501f972SYOSHIFUJI Hideaki 			req->ts_recent,
8359501f972SYOSHIFUJI Hideaki 			0,
836a915da9bSEric Dumazet 			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
837a915da9bSEric Dumazet 					  AF_INET),
83866b13d99SEric Dumazet 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
83966b13d99SEric Dumazet 			ip_hdr(skb)->tos);
8401da177e4SLinus Torvalds }
8411da177e4SLinus Torvalds 
8421da177e4SLinus Torvalds /*
8439bf1d83eSKris Katterjohn  *	Send a SYN-ACK after having received a SYN.
84460236fddSArnaldo Carvalho de Melo  *	This still operates on a request_sock only, not on a big
8451da177e4SLinus Torvalds  *	socket.
8461da177e4SLinus Torvalds  */
84772659eccSOctavian Purdila static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
848e6b4d113SWilliam Allen Simpson 			      struct request_sock *req,
849fff32699SEric Dumazet 			      struct request_values *rvp,
8507586ecebSEric Dumazet 			      u16 queue_mapping,
8517586ecebSEric Dumazet 			      bool nocache)
8521da177e4SLinus Torvalds {
8532e6599cbSArnaldo Carvalho de Melo 	const struct inet_request_sock *ireq = inet_rsk(req);
8546bd023f3SDavid S. Miller 	struct flowi4 fl4;
8551da177e4SLinus Torvalds 	int err = -1;
8561da177e4SLinus Torvalds 	struct sk_buff * skb;
8571da177e4SLinus Torvalds 
8581da177e4SLinus Torvalds 	/* First, grab a route. */
859ba3f7f04SDavid S. Miller 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
860fd80eb94SDenis V. Lunev 		return -1;
8611da177e4SLinus Torvalds 
8628336886fSJerry Chu 	skb = tcp_make_synack(sk, dst, req, rvp, NULL);
8631da177e4SLinus Torvalds 
8641da177e4SLinus Torvalds 	if (skb) {
865419f9f89SHerbert Xu 		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
8661da177e4SLinus Torvalds 
867fff32699SEric Dumazet 		skb_set_queue_mapping(skb, queue_mapping);
8682e6599cbSArnaldo Carvalho de Melo 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
8692e6599cbSArnaldo Carvalho de Melo 					    ireq->rmt_addr,
8702e6599cbSArnaldo Carvalho de Melo 					    ireq->opt);
871b9df3cb8SGerrit Renker 		err = net_xmit_eval(err);
872016818d0SNeal Cardwell 		if (!tcp_rsk(req)->snt_synack && !err)
873016818d0SNeal Cardwell 			tcp_rsk(req)->snt_synack = tcp_time_stamp;
8741da177e4SLinus Torvalds 	}
8751da177e4SLinus Torvalds 
8761da177e4SLinus Torvalds 	return err;
8771da177e4SLinus Torvalds }
8781da177e4SLinus Torvalds 
87972659eccSOctavian Purdila static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
880e6b4d113SWilliam Allen Simpson 			      struct request_values *rvp)
881fd80eb94SDenis V. Lunev {
88272659eccSOctavian Purdila 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
8837586ecebSEric Dumazet 	return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
884fd80eb94SDenis V. Lunev }
885fd80eb94SDenis V. Lunev 
8861da177e4SLinus Torvalds /*
88760236fddSArnaldo Carvalho de Melo  *	IPv4 request_sock destructor.
8881da177e4SLinus Torvalds  */
88960236fddSArnaldo Carvalho de Melo static void tcp_v4_reqsk_destructor(struct request_sock *req)
8901da177e4SLinus Torvalds {
8912e6599cbSArnaldo Carvalho de Melo 	kfree(inet_rsk(req)->opt);
8921da177e4SLinus Torvalds }
8931da177e4SLinus Torvalds 
894946cedccSEric Dumazet /*
895a2a385d6SEric Dumazet  * Return true if a syncookie should be sent
896946cedccSEric Dumazet  */
897a2a385d6SEric Dumazet bool tcp_syn_flood_action(struct sock *sk,
898946cedccSEric Dumazet 			 const struct sk_buff *skb,
899946cedccSEric Dumazet 			 const char *proto)
9001da177e4SLinus Torvalds {
901946cedccSEric Dumazet 	const char *msg = "Dropping request";
902a2a385d6SEric Dumazet 	bool want_cookie = false;
903946cedccSEric Dumazet 	struct listen_sock *lopt;
904946cedccSEric Dumazet 
905946cedccSEric Dumazet 
9061da177e4SLinus Torvalds 
9072a1d4bd4SFlorian Westphal #ifdef CONFIG_SYN_COOKIES
908946cedccSEric Dumazet 	if (sysctl_tcp_syncookies) {
9092a1d4bd4SFlorian Westphal 		msg = "Sending cookies";
910a2a385d6SEric Dumazet 		want_cookie = true;
911946cedccSEric Dumazet 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
912946cedccSEric Dumazet 	} else
91380e40daaSArnaldo Carvalho de Melo #endif
914946cedccSEric Dumazet 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
9152a1d4bd4SFlorian Westphal 
916946cedccSEric Dumazet 	lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
917946cedccSEric Dumazet 	if (!lopt->synflood_warned) {
918946cedccSEric Dumazet 		lopt->synflood_warned = 1;
919afd46503SJoe Perches 		pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
920946cedccSEric Dumazet 			proto, ntohs(tcp_hdr(skb)->dest), msg);
9212a1d4bd4SFlorian Westphal 	}
922946cedccSEric Dumazet 	return want_cookie;
923946cedccSEric Dumazet }
924946cedccSEric Dumazet EXPORT_SYMBOL(tcp_syn_flood_action);
9251da177e4SLinus Torvalds 
9261da177e4SLinus Torvalds /*
92760236fddSArnaldo Carvalho de Melo  * Save and compile IPv4 options into the request_sock if needed.
9281da177e4SLinus Torvalds  */
9295dff747bSChristoph Paasch static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
9301da177e4SLinus Torvalds {
931f6d8bd05SEric Dumazet 	const struct ip_options *opt = &(IPCB(skb)->opt);
932f6d8bd05SEric Dumazet 	struct ip_options_rcu *dopt = NULL;
9331da177e4SLinus Torvalds 
9341da177e4SLinus Torvalds 	if (opt && opt->optlen) {
935f6d8bd05SEric Dumazet 		int opt_size = sizeof(*dopt) + opt->optlen;
936f6d8bd05SEric Dumazet 
9371da177e4SLinus Torvalds 		dopt = kmalloc(opt_size, GFP_ATOMIC);
9381da177e4SLinus Torvalds 		if (dopt) {
939f6d8bd05SEric Dumazet 			if (ip_options_echo(&dopt->opt, skb)) {
9401da177e4SLinus Torvalds 				kfree(dopt);
9411da177e4SLinus Torvalds 				dopt = NULL;
9421da177e4SLinus Torvalds 			}
9431da177e4SLinus Torvalds 		}
9441da177e4SLinus Torvalds 	}
9451da177e4SLinus Torvalds 	return dopt;
9461da177e4SLinus Torvalds }
9471da177e4SLinus Torvalds 
948cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
949cfb6eeb4SYOSHIFUJI Hideaki /*
950cfb6eeb4SYOSHIFUJI Hideaki  * RFC2385 MD5 checksumming requires a mapping of
951cfb6eeb4SYOSHIFUJI Hideaki  * IP address->MD5 Key.
952cfb6eeb4SYOSHIFUJI Hideaki  * We need to maintain these in the sk structure.
953cfb6eeb4SYOSHIFUJI Hideaki  */
954cfb6eeb4SYOSHIFUJI Hideaki 
955cfb6eeb4SYOSHIFUJI Hideaki /* Find the Key structure for an address.  */
956a915da9bSEric Dumazet struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
957a915da9bSEric Dumazet 					 const union tcp_md5_addr *addr,
958a915da9bSEric Dumazet 					 int family)
959cfb6eeb4SYOSHIFUJI Hideaki {
960cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
961a915da9bSEric Dumazet 	struct tcp_md5sig_key *key;
962a915da9bSEric Dumazet 	struct hlist_node *pos;
963a915da9bSEric Dumazet 	unsigned int size = sizeof(struct in_addr);
964a8afca03SEric Dumazet 	struct tcp_md5sig_info *md5sig;
965cfb6eeb4SYOSHIFUJI Hideaki 
966a8afca03SEric Dumazet 	/* caller either holds rcu_read_lock() or socket lock */
967a8afca03SEric Dumazet 	md5sig = rcu_dereference_check(tp->md5sig_info,
968b4fb05eaSEric Dumazet 				       sock_owned_by_user(sk) ||
969b4fb05eaSEric Dumazet 				       lockdep_is_held(&sk->sk_lock.slock));
970a8afca03SEric Dumazet 	if (!md5sig)
971cfb6eeb4SYOSHIFUJI Hideaki 		return NULL;
972a915da9bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
973a915da9bSEric Dumazet 	if (family == AF_INET6)
974a915da9bSEric Dumazet 		size = sizeof(struct in6_addr);
975a915da9bSEric Dumazet #endif
976a8afca03SEric Dumazet 	hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
977a915da9bSEric Dumazet 		if (key->family != family)
978a915da9bSEric Dumazet 			continue;
979a915da9bSEric Dumazet 		if (!memcmp(&key->addr, addr, size))
980a915da9bSEric Dumazet 			return key;
981cfb6eeb4SYOSHIFUJI Hideaki 	}
982cfb6eeb4SYOSHIFUJI Hideaki 	return NULL;
983cfb6eeb4SYOSHIFUJI Hideaki }
984a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_lookup);
985cfb6eeb4SYOSHIFUJI Hideaki 
986cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
987cfb6eeb4SYOSHIFUJI Hideaki 					 struct sock *addr_sk)
988cfb6eeb4SYOSHIFUJI Hideaki {
989a915da9bSEric Dumazet 	union tcp_md5_addr *addr;
990a915da9bSEric Dumazet 
991a915da9bSEric Dumazet 	addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
992a915da9bSEric Dumazet 	return tcp_md5_do_lookup(sk, addr, AF_INET);
993cfb6eeb4SYOSHIFUJI Hideaki }
994cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_md5_lookup);
995cfb6eeb4SYOSHIFUJI Hideaki 
996f5b99bcdSAdrian Bunk static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
997cfb6eeb4SYOSHIFUJI Hideaki 						      struct request_sock *req)
998cfb6eeb4SYOSHIFUJI Hideaki {
999a915da9bSEric Dumazet 	union tcp_md5_addr *addr;
1000a915da9bSEric Dumazet 
1001a915da9bSEric Dumazet 	addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
1002a915da9bSEric Dumazet 	return tcp_md5_do_lookup(sk, addr, AF_INET);
1003cfb6eeb4SYOSHIFUJI Hideaki }
1004cfb6eeb4SYOSHIFUJI Hideaki 
1005cfb6eeb4SYOSHIFUJI Hideaki /* This can be called on a newly created socket, from other files */
1006a915da9bSEric Dumazet int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
1007a915da9bSEric Dumazet 		   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
1008cfb6eeb4SYOSHIFUJI Hideaki {
1009cfb6eeb4SYOSHIFUJI Hideaki 	/* Add Key to the list */
1010b0a713e9SMatthias M. Dellweg 	struct tcp_md5sig_key *key;
1011cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
1012f6685938SArnaldo Carvalho de Melo 	struct tcp_md5sig_info *md5sig;
1013f6685938SArnaldo Carvalho de Melo 
1014a915da9bSEric Dumazet 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
1015a915da9bSEric Dumazet 	if (key) {
1016a915da9bSEric Dumazet 		/* Pre-existing entry - just update that one. */
1017a915da9bSEric Dumazet 		memcpy(key->key, newkey, newkeylen);
1018a915da9bSEric Dumazet 		key->keylen = newkeylen;
1019a915da9bSEric Dumazet 		return 0;
1020cfb6eeb4SYOSHIFUJI Hideaki 	}
1021260fcbebSYan, Zheng 
1022a8afca03SEric Dumazet 	md5sig = rcu_dereference_protected(tp->md5sig_info,
1023a8afca03SEric Dumazet 					   sock_owned_by_user(sk));
1024a915da9bSEric Dumazet 	if (!md5sig) {
1025a915da9bSEric Dumazet 		md5sig = kmalloc(sizeof(*md5sig), gfp);
1026a915da9bSEric Dumazet 		if (!md5sig)
1027a915da9bSEric Dumazet 			return -ENOMEM;
1028a915da9bSEric Dumazet 
1029a915da9bSEric Dumazet 		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1030a915da9bSEric Dumazet 		INIT_HLIST_HEAD(&md5sig->head);
1031a8afca03SEric Dumazet 		rcu_assign_pointer(tp->md5sig_info, md5sig);
1032a915da9bSEric Dumazet 	}
1033a915da9bSEric Dumazet 
10345f3d9cb2SEric Dumazet 	key = sock_kmalloc(sk, sizeof(*key), gfp);
1035a915da9bSEric Dumazet 	if (!key)
1036a915da9bSEric Dumazet 		return -ENOMEM;
1037a915da9bSEric Dumazet 	if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
10385f3d9cb2SEric Dumazet 		sock_kfree_s(sk, key, sizeof(*key));
1039cfb6eeb4SYOSHIFUJI Hideaki 		return -ENOMEM;
1040cfb6eeb4SYOSHIFUJI Hideaki 	}
1041f6685938SArnaldo Carvalho de Melo 
1042a915da9bSEric Dumazet 	memcpy(key->key, newkey, newkeylen);
1043a915da9bSEric Dumazet 	key->keylen = newkeylen;
1044a915da9bSEric Dumazet 	key->family = family;
1045a915da9bSEric Dumazet 	memcpy(&key->addr, addr,
1046a915da9bSEric Dumazet 	       (family == AF_INET6) ? sizeof(struct in6_addr) :
1047a915da9bSEric Dumazet 				      sizeof(struct in_addr));
1048a915da9bSEric Dumazet 	hlist_add_head_rcu(&key->node, &md5sig->head);
1049cfb6eeb4SYOSHIFUJI Hideaki 	return 0;
1050cfb6eeb4SYOSHIFUJI Hideaki }
1051a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_add);
1052cfb6eeb4SYOSHIFUJI Hideaki 
1053a915da9bSEric Dumazet int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
1054cfb6eeb4SYOSHIFUJI Hideaki {
1055cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
1056a915da9bSEric Dumazet 	struct tcp_md5sig_key *key;
1057a8afca03SEric Dumazet 	struct tcp_md5sig_info *md5sig;
1058cfb6eeb4SYOSHIFUJI Hideaki 
1059a915da9bSEric Dumazet 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
1060a915da9bSEric Dumazet 	if (!key)
1061cfb6eeb4SYOSHIFUJI Hideaki 		return -ENOENT;
1062a915da9bSEric Dumazet 	hlist_del_rcu(&key->node);
10635f3d9cb2SEric Dumazet 	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1064a915da9bSEric Dumazet 	kfree_rcu(key, rcu);
1065a8afca03SEric Dumazet 	md5sig = rcu_dereference_protected(tp->md5sig_info,
1066a8afca03SEric Dumazet 					   sock_owned_by_user(sk));
1067a8afca03SEric Dumazet 	if (hlist_empty(&md5sig->head))
1068a915da9bSEric Dumazet 		tcp_free_md5sig_pool();
1069a915da9bSEric Dumazet 	return 0;
1070cfb6eeb4SYOSHIFUJI Hideaki }
1071a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_del);
1072cfb6eeb4SYOSHIFUJI Hideaki 
1073*e0683e70Sstephen hemminger static void tcp_clear_md5_list(struct sock *sk)
1074cfb6eeb4SYOSHIFUJI Hideaki {
1075cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
1076a915da9bSEric Dumazet 	struct tcp_md5sig_key *key;
1077a915da9bSEric Dumazet 	struct hlist_node *pos, *n;
1078a8afca03SEric Dumazet 	struct tcp_md5sig_info *md5sig;
1079cfb6eeb4SYOSHIFUJI Hideaki 
1080a8afca03SEric Dumazet 	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1081a8afca03SEric Dumazet 
1082a8afca03SEric Dumazet 	if (!hlist_empty(&md5sig->head))
1083cfb6eeb4SYOSHIFUJI Hideaki 		tcp_free_md5sig_pool();
1084a8afca03SEric Dumazet 	hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
1085a915da9bSEric Dumazet 		hlist_del_rcu(&key->node);
10865f3d9cb2SEric Dumazet 		atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1087a915da9bSEric Dumazet 		kfree_rcu(key, rcu);
1088cfb6eeb4SYOSHIFUJI Hideaki 	}
1089cfb6eeb4SYOSHIFUJI Hideaki }
1090cfb6eeb4SYOSHIFUJI Hideaki 
1091cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1092cfb6eeb4SYOSHIFUJI Hideaki 				 int optlen)
1093cfb6eeb4SYOSHIFUJI Hideaki {
1094cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig cmd;
1095cfb6eeb4SYOSHIFUJI Hideaki 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1096cfb6eeb4SYOSHIFUJI Hideaki 
1097cfb6eeb4SYOSHIFUJI Hideaki 	if (optlen < sizeof(cmd))
1098cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
1099cfb6eeb4SYOSHIFUJI Hideaki 
1100cfb6eeb4SYOSHIFUJI Hideaki 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
1101cfb6eeb4SYOSHIFUJI Hideaki 		return -EFAULT;
1102cfb6eeb4SYOSHIFUJI Hideaki 
1103cfb6eeb4SYOSHIFUJI Hideaki 	if (sin->sin_family != AF_INET)
1104cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
1105cfb6eeb4SYOSHIFUJI Hideaki 
1106a8afca03SEric Dumazet 	if (!cmd.tcpm_key || !cmd.tcpm_keylen)
1107a915da9bSEric Dumazet 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1108a915da9bSEric Dumazet 				      AF_INET);
1109cfb6eeb4SYOSHIFUJI Hideaki 
1110cfb6eeb4SYOSHIFUJI Hideaki 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1111cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
1112cfb6eeb4SYOSHIFUJI Hideaki 
1113a915da9bSEric Dumazet 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1114a915da9bSEric Dumazet 			      AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1115a915da9bSEric Dumazet 			      GFP_KERNEL);
1116cfb6eeb4SYOSHIFUJI Hideaki }
1117cfb6eeb4SYOSHIFUJI Hideaki 
111849a72dfbSAdam Langley static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
111949a72dfbSAdam Langley 					__be32 daddr, __be32 saddr, int nbytes)
1120cfb6eeb4SYOSHIFUJI Hideaki {
1121cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp4_pseudohdr *bp;
112249a72dfbSAdam Langley 	struct scatterlist sg;
1123cfb6eeb4SYOSHIFUJI Hideaki 
1124cfb6eeb4SYOSHIFUJI Hideaki 	bp = &hp->md5_blk.ip4;
1125cfb6eeb4SYOSHIFUJI Hideaki 
1126cfb6eeb4SYOSHIFUJI Hideaki 	/*
112749a72dfbSAdam Langley 	 * 1. the TCP pseudo-header (in the order: source IP address,
1128cfb6eeb4SYOSHIFUJI Hideaki 	 * destination IP address, zero-padded protocol number, and
1129cfb6eeb4SYOSHIFUJI Hideaki 	 * segment length)
1130cfb6eeb4SYOSHIFUJI Hideaki 	 */
1131cfb6eeb4SYOSHIFUJI Hideaki 	bp->saddr = saddr;
1132cfb6eeb4SYOSHIFUJI Hideaki 	bp->daddr = daddr;
1133cfb6eeb4SYOSHIFUJI Hideaki 	bp->pad = 0;
1134076fb722SYOSHIFUJI Hideaki 	bp->protocol = IPPROTO_TCP;
113549a72dfbSAdam Langley 	bp->len = cpu_to_be16(nbytes);
1136c7da57a1SDavid S. Miller 
113749a72dfbSAdam Langley 	sg_init_one(&sg, bp, sizeof(*bp));
113849a72dfbSAdam Langley 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
113949a72dfbSAdam Langley }
114049a72dfbSAdam Langley 
1141a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1142318cf7aaSEric Dumazet 			       __be32 daddr, __be32 saddr, const struct tcphdr *th)
114349a72dfbSAdam Langley {
114449a72dfbSAdam Langley 	struct tcp_md5sig_pool *hp;
114549a72dfbSAdam Langley 	struct hash_desc *desc;
114649a72dfbSAdam Langley 
114749a72dfbSAdam Langley 	hp = tcp_get_md5sig_pool();
114849a72dfbSAdam Langley 	if (!hp)
114949a72dfbSAdam Langley 		goto clear_hash_noput;
115049a72dfbSAdam Langley 	desc = &hp->md5_desc;
115149a72dfbSAdam Langley 
115249a72dfbSAdam Langley 	if (crypto_hash_init(desc))
115349a72dfbSAdam Langley 		goto clear_hash;
115449a72dfbSAdam Langley 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
115549a72dfbSAdam Langley 		goto clear_hash;
115649a72dfbSAdam Langley 	if (tcp_md5_hash_header(hp, th))
115749a72dfbSAdam Langley 		goto clear_hash;
115849a72dfbSAdam Langley 	if (tcp_md5_hash_key(hp, key))
115949a72dfbSAdam Langley 		goto clear_hash;
116049a72dfbSAdam Langley 	if (crypto_hash_final(desc, md5_hash))
1161cfb6eeb4SYOSHIFUJI Hideaki 		goto clear_hash;
1162cfb6eeb4SYOSHIFUJI Hideaki 
1163cfb6eeb4SYOSHIFUJI Hideaki 	tcp_put_md5sig_pool();
1164cfb6eeb4SYOSHIFUJI Hideaki 	return 0;
116549a72dfbSAdam Langley 
1166cfb6eeb4SYOSHIFUJI Hideaki clear_hash:
1167cfb6eeb4SYOSHIFUJI Hideaki 	tcp_put_md5sig_pool();
1168cfb6eeb4SYOSHIFUJI Hideaki clear_hash_noput:
1169cfb6eeb4SYOSHIFUJI Hideaki 	memset(md5_hash, 0, 16);
117049a72dfbSAdam Langley 	return 1;
1171cfb6eeb4SYOSHIFUJI Hideaki }
1172cfb6eeb4SYOSHIFUJI Hideaki 
117349a72dfbSAdam Langley int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1174318cf7aaSEric Dumazet 			const struct sock *sk, const struct request_sock *req,
1175318cf7aaSEric Dumazet 			const struct sk_buff *skb)
1176cfb6eeb4SYOSHIFUJI Hideaki {
117749a72dfbSAdam Langley 	struct tcp_md5sig_pool *hp;
117849a72dfbSAdam Langley 	struct hash_desc *desc;
1179318cf7aaSEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
1180cfb6eeb4SYOSHIFUJI Hideaki 	__be32 saddr, daddr;
1181cfb6eeb4SYOSHIFUJI Hideaki 
1182cfb6eeb4SYOSHIFUJI Hideaki 	if (sk) {
1183c720c7e8SEric Dumazet 		saddr = inet_sk(sk)->inet_saddr;
1184c720c7e8SEric Dumazet 		daddr = inet_sk(sk)->inet_daddr;
118549a72dfbSAdam Langley 	} else if (req) {
118649a72dfbSAdam Langley 		saddr = inet_rsk(req)->loc_addr;
118749a72dfbSAdam Langley 		daddr = inet_rsk(req)->rmt_addr;
1188cfb6eeb4SYOSHIFUJI Hideaki 	} else {
118949a72dfbSAdam Langley 		const struct iphdr *iph = ip_hdr(skb);
119049a72dfbSAdam Langley 		saddr = iph->saddr;
119149a72dfbSAdam Langley 		daddr = iph->daddr;
1192cfb6eeb4SYOSHIFUJI Hideaki 	}
1193cfb6eeb4SYOSHIFUJI Hideaki 
119449a72dfbSAdam Langley 	hp = tcp_get_md5sig_pool();
119549a72dfbSAdam Langley 	if (!hp)
119649a72dfbSAdam Langley 		goto clear_hash_noput;
119749a72dfbSAdam Langley 	desc = &hp->md5_desc;
119849a72dfbSAdam Langley 
119949a72dfbSAdam Langley 	if (crypto_hash_init(desc))
120049a72dfbSAdam Langley 		goto clear_hash;
120149a72dfbSAdam Langley 
120249a72dfbSAdam Langley 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
120349a72dfbSAdam Langley 		goto clear_hash;
120449a72dfbSAdam Langley 	if (tcp_md5_hash_header(hp, th))
120549a72dfbSAdam Langley 		goto clear_hash;
120649a72dfbSAdam Langley 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
120749a72dfbSAdam Langley 		goto clear_hash;
120849a72dfbSAdam Langley 	if (tcp_md5_hash_key(hp, key))
120949a72dfbSAdam Langley 		goto clear_hash;
121049a72dfbSAdam Langley 	if (crypto_hash_final(desc, md5_hash))
121149a72dfbSAdam Langley 		goto clear_hash;
121249a72dfbSAdam Langley 
121349a72dfbSAdam Langley 	tcp_put_md5sig_pool();
121449a72dfbSAdam Langley 	return 0;
121549a72dfbSAdam Langley 
121649a72dfbSAdam Langley clear_hash:
121749a72dfbSAdam Langley 	tcp_put_md5sig_pool();
121849a72dfbSAdam Langley clear_hash_noput:
121949a72dfbSAdam Langley 	memset(md5_hash, 0, 16);
122049a72dfbSAdam Langley 	return 1;
122149a72dfbSAdam Langley }
122249a72dfbSAdam Langley EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1223cfb6eeb4SYOSHIFUJI Hideaki 
1224a2a385d6SEric Dumazet static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1225cfb6eeb4SYOSHIFUJI Hideaki {
1226cfb6eeb4SYOSHIFUJI Hideaki 	/*
1227cfb6eeb4SYOSHIFUJI Hideaki 	 * This gets called for each TCP segment that arrives
1228cfb6eeb4SYOSHIFUJI Hideaki 	 * so we want to be efficient.
1229cfb6eeb4SYOSHIFUJI Hideaki 	 * We have 3 drop cases:
1230cfb6eeb4SYOSHIFUJI Hideaki 	 * o No MD5 hash and one expected.
1231cfb6eeb4SYOSHIFUJI Hideaki 	 * o MD5 hash and we're not expecting one.
1232cfb6eeb4SYOSHIFUJI Hideaki 	 * o MD5 hash and its wrong.
1233cfb6eeb4SYOSHIFUJI Hideaki 	 */
1234cf533ea5SEric Dumazet 	const __u8 *hash_location = NULL;
1235cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *hash_expected;
1236eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
1237cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
1238cfb6eeb4SYOSHIFUJI Hideaki 	int genhash;
1239cfb6eeb4SYOSHIFUJI Hideaki 	unsigned char newhash[16];
1240cfb6eeb4SYOSHIFUJI Hideaki 
1241a915da9bSEric Dumazet 	hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1242a915da9bSEric Dumazet 					  AF_INET);
12437d5d5525SYOSHIFUJI Hideaki 	hash_location = tcp_parse_md5sig_option(th);
1244cfb6eeb4SYOSHIFUJI Hideaki 
1245cfb6eeb4SYOSHIFUJI Hideaki 	/* We've parsed the options - do we have a hash? */
1246cfb6eeb4SYOSHIFUJI Hideaki 	if (!hash_expected && !hash_location)
1247a2a385d6SEric Dumazet 		return false;
1248cfb6eeb4SYOSHIFUJI Hideaki 
1249cfb6eeb4SYOSHIFUJI Hideaki 	if (hash_expected && !hash_location) {
1250785957d3SDavid S. Miller 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1251a2a385d6SEric Dumazet 		return true;
1252cfb6eeb4SYOSHIFUJI Hideaki 	}
1253cfb6eeb4SYOSHIFUJI Hideaki 
1254cfb6eeb4SYOSHIFUJI Hideaki 	if (!hash_expected && hash_location) {
1255785957d3SDavid S. Miller 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1256a2a385d6SEric Dumazet 		return true;
1257cfb6eeb4SYOSHIFUJI Hideaki 	}
1258cfb6eeb4SYOSHIFUJI Hideaki 
1259cfb6eeb4SYOSHIFUJI Hideaki 	/* Okay, so this is hash_expected and hash_location -
1260cfb6eeb4SYOSHIFUJI Hideaki 	 * so we need to calculate the checksum.
1261cfb6eeb4SYOSHIFUJI Hideaki 	 */
126249a72dfbSAdam Langley 	genhash = tcp_v4_md5_hash_skb(newhash,
1263cfb6eeb4SYOSHIFUJI Hideaki 				      hash_expected,
126449a72dfbSAdam Langley 				      NULL, NULL, skb);
1265cfb6eeb4SYOSHIFUJI Hideaki 
1266cfb6eeb4SYOSHIFUJI Hideaki 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1267e87cc472SJoe Perches 		net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1268673d57e7SHarvey Harrison 				     &iph->saddr, ntohs(th->source),
1269673d57e7SHarvey Harrison 				     &iph->daddr, ntohs(th->dest),
1270e87cc472SJoe Perches 				     genhash ? " tcp_v4_calc_md5_hash failed"
1271e87cc472SJoe Perches 				     : "");
1272a2a385d6SEric Dumazet 		return true;
1273cfb6eeb4SYOSHIFUJI Hideaki 	}
1274a2a385d6SEric Dumazet 	return false;
1275cfb6eeb4SYOSHIFUJI Hideaki }
1276cfb6eeb4SYOSHIFUJI Hideaki 
1277cfb6eeb4SYOSHIFUJI Hideaki #endif
1278cfb6eeb4SYOSHIFUJI Hideaki 
127972a3effaSEric Dumazet struct request_sock_ops tcp_request_sock_ops __read_mostly = {
12801da177e4SLinus Torvalds 	.family		=	PF_INET,
12812e6599cbSArnaldo Carvalho de Melo 	.obj_size	=	sizeof(struct tcp_request_sock),
128272659eccSOctavian Purdila 	.rtx_syn_ack	=	tcp_v4_rtx_synack,
128360236fddSArnaldo Carvalho de Melo 	.send_ack	=	tcp_v4_reqsk_send_ack,
128460236fddSArnaldo Carvalho de Melo 	.destructor	=	tcp_v4_reqsk_destructor,
12851da177e4SLinus Torvalds 	.send_reset	=	tcp_v4_send_reset,
128672659eccSOctavian Purdila 	.syn_ack_timeout = 	tcp_syn_ack_timeout,
12871da177e4SLinus Torvalds };
12881da177e4SLinus Torvalds 
1289cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1290b2e4b3deSStephen Hemminger static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1291cfb6eeb4SYOSHIFUJI Hideaki 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1292e3afe7b7SJohn Dykstra 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1293cfb6eeb4SYOSHIFUJI Hideaki };
1294b6332e6cSAndrew Morton #endif
1295cfb6eeb4SYOSHIFUJI Hideaki 
1296168a8f58SJerry Chu static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
1297168a8f58SJerry Chu 			       struct request_sock *req,
1298168a8f58SJerry Chu 			       struct tcp_fastopen_cookie *foc,
1299168a8f58SJerry Chu 			       struct tcp_fastopen_cookie *valid_foc)
1300168a8f58SJerry Chu {
1301168a8f58SJerry Chu 	bool skip_cookie = false;
1302168a8f58SJerry Chu 	struct fastopen_queue *fastopenq;
1303168a8f58SJerry Chu 
1304168a8f58SJerry Chu 	if (likely(!fastopen_cookie_present(foc))) {
1305168a8f58SJerry Chu 		/* See include/net/tcp.h for the meaning of these knobs */
1306168a8f58SJerry Chu 		if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
1307168a8f58SJerry Chu 		    ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
1308168a8f58SJerry Chu 		    (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
1309168a8f58SJerry Chu 			skip_cookie = true; /* no cookie to validate */
1310168a8f58SJerry Chu 		else
1311168a8f58SJerry Chu 			return false;
1312168a8f58SJerry Chu 	}
1313168a8f58SJerry Chu 	fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
1314168a8f58SJerry Chu 	/* A FO option is present; bump the counter. */
1315168a8f58SJerry Chu 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
1316168a8f58SJerry Chu 
1317168a8f58SJerry Chu 	/* Make sure the listener has enabled fastopen, and we don't
1318168a8f58SJerry Chu 	 * exceed the max # of pending TFO requests allowed before trying
1319168a8f58SJerry Chu 	 * to validating the cookie in order to avoid burning CPU cycles
1320168a8f58SJerry Chu 	 * unnecessarily.
1321168a8f58SJerry Chu 	 *
1322168a8f58SJerry Chu 	 * XXX (TFO) - The implication of checking the max_qlen before
1323168a8f58SJerry Chu 	 * processing a cookie request is that clients can't differentiate
1324168a8f58SJerry Chu 	 * between qlen overflow causing Fast Open to be disabled
1325168a8f58SJerry Chu 	 * temporarily vs a server not supporting Fast Open at all.
1326168a8f58SJerry Chu 	 */
1327168a8f58SJerry Chu 	if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
1328168a8f58SJerry Chu 	    fastopenq == NULL || fastopenq->max_qlen == 0)
1329168a8f58SJerry Chu 		return false;
1330168a8f58SJerry Chu 
1331168a8f58SJerry Chu 	if (fastopenq->qlen >= fastopenq->max_qlen) {
1332168a8f58SJerry Chu 		struct request_sock *req1;
1333168a8f58SJerry Chu 		spin_lock(&fastopenq->lock);
1334168a8f58SJerry Chu 		req1 = fastopenq->rskq_rst_head;
1335168a8f58SJerry Chu 		if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
1336168a8f58SJerry Chu 			spin_unlock(&fastopenq->lock);
1337168a8f58SJerry Chu 			NET_INC_STATS_BH(sock_net(sk),
1338168a8f58SJerry Chu 			    LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
1339168a8f58SJerry Chu 			/* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/
1340168a8f58SJerry Chu 			foc->len = -1;
1341168a8f58SJerry Chu 			return false;
1342168a8f58SJerry Chu 		}
1343168a8f58SJerry Chu 		fastopenq->rskq_rst_head = req1->dl_next;
1344168a8f58SJerry Chu 		fastopenq->qlen--;
1345168a8f58SJerry Chu 		spin_unlock(&fastopenq->lock);
1346168a8f58SJerry Chu 		reqsk_free(req1);
1347168a8f58SJerry Chu 	}
1348168a8f58SJerry Chu 	if (skip_cookie) {
1349168a8f58SJerry Chu 		tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1350168a8f58SJerry Chu 		return true;
1351168a8f58SJerry Chu 	}
1352168a8f58SJerry Chu 	if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
1353168a8f58SJerry Chu 		if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
1354168a8f58SJerry Chu 			tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
1355168a8f58SJerry Chu 			if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
1356168a8f58SJerry Chu 			    memcmp(&foc->val[0], &valid_foc->val[0],
1357168a8f58SJerry Chu 			    TCP_FASTOPEN_COOKIE_SIZE) != 0)
1358168a8f58SJerry Chu 				return false;
1359168a8f58SJerry Chu 			valid_foc->len = -1;
1360168a8f58SJerry Chu 		}
1361168a8f58SJerry Chu 		/* Acknowledge the data received from the peer. */
1362168a8f58SJerry Chu 		tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1363168a8f58SJerry Chu 		return true;
1364168a8f58SJerry Chu 	} else if (foc->len == 0) { /* Client requesting a cookie */
1365168a8f58SJerry Chu 		tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
1366168a8f58SJerry Chu 		NET_INC_STATS_BH(sock_net(sk),
1367168a8f58SJerry Chu 		    LINUX_MIB_TCPFASTOPENCOOKIEREQD);
1368168a8f58SJerry Chu 	} else {
1369168a8f58SJerry Chu 		/* Client sent a cookie with wrong size. Treat it
1370168a8f58SJerry Chu 		 * the same as invalid and return a valid one.
1371168a8f58SJerry Chu 		 */
1372168a8f58SJerry Chu 		tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
1373168a8f58SJerry Chu 	}
1374168a8f58SJerry Chu 	return false;
1375168a8f58SJerry Chu }
1376168a8f58SJerry Chu 
1377168a8f58SJerry Chu static int tcp_v4_conn_req_fastopen(struct sock *sk,
1378168a8f58SJerry Chu 				    struct sk_buff *skb,
1379168a8f58SJerry Chu 				    struct sk_buff *skb_synack,
1380168a8f58SJerry Chu 				    struct request_sock *req,
1381168a8f58SJerry Chu 				    struct request_values *rvp)
1382168a8f58SJerry Chu {
1383168a8f58SJerry Chu 	struct tcp_sock *tp = tcp_sk(sk);
1384168a8f58SJerry Chu 	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
1385168a8f58SJerry Chu 	const struct inet_request_sock *ireq = inet_rsk(req);
1386168a8f58SJerry Chu 	struct sock *child;
1387016818d0SNeal Cardwell 	int err;
1388168a8f58SJerry Chu 
1389168a8f58SJerry Chu 	req->retrans = 0;
1390168a8f58SJerry Chu 	req->sk = NULL;
1391168a8f58SJerry Chu 
1392168a8f58SJerry Chu 	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
1393168a8f58SJerry Chu 	if (child == NULL) {
1394168a8f58SJerry Chu 		NET_INC_STATS_BH(sock_net(sk),
1395168a8f58SJerry Chu 				 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
1396168a8f58SJerry Chu 		kfree_skb(skb_synack);
1397168a8f58SJerry Chu 		return -1;
1398168a8f58SJerry Chu 	}
1399016818d0SNeal Cardwell 	err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
1400168a8f58SJerry Chu 				    ireq->rmt_addr, ireq->opt);
1401016818d0SNeal Cardwell 	err = net_xmit_eval(err);
1402016818d0SNeal Cardwell 	if (!err)
1403016818d0SNeal Cardwell 		tcp_rsk(req)->snt_synack = tcp_time_stamp;
1404168a8f58SJerry Chu 	/* XXX (TFO) - is it ok to ignore error and continue? */
1405168a8f58SJerry Chu 
1406168a8f58SJerry Chu 	spin_lock(&queue->fastopenq->lock);
1407168a8f58SJerry Chu 	queue->fastopenq->qlen++;
1408168a8f58SJerry Chu 	spin_unlock(&queue->fastopenq->lock);
1409168a8f58SJerry Chu 
1410168a8f58SJerry Chu 	/* Initialize the child socket. Have to fix some values to take
1411168a8f58SJerry Chu 	 * into account the child is a Fast Open socket and is created
1412168a8f58SJerry Chu 	 * only out of the bits carried in the SYN packet.
1413168a8f58SJerry Chu 	 */
1414168a8f58SJerry Chu 	tp = tcp_sk(child);
1415168a8f58SJerry Chu 
1416168a8f58SJerry Chu 	tp->fastopen_rsk = req;
1417168a8f58SJerry Chu 	/* Do a hold on the listner sk so that if the listener is being
1418168a8f58SJerry Chu 	 * closed, the child that has been accepted can live on and still
1419168a8f58SJerry Chu 	 * access listen_lock.
1420168a8f58SJerry Chu 	 */
1421168a8f58SJerry Chu 	sock_hold(sk);
1422168a8f58SJerry Chu 	tcp_rsk(req)->listener = sk;
1423168a8f58SJerry Chu 
1424168a8f58SJerry Chu 	/* RFC1323: The window in SYN & SYN/ACK segments is never
1425168a8f58SJerry Chu 	 * scaled. So correct it appropriately.
1426168a8f58SJerry Chu 	 */
1427168a8f58SJerry Chu 	tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
1428168a8f58SJerry Chu 
1429168a8f58SJerry Chu 	/* Activate the retrans timer so that SYNACK can be retransmitted.
1430168a8f58SJerry Chu 	 * The request socket is not added to the SYN table of the parent
1431168a8f58SJerry Chu 	 * because it's been added to the accept queue directly.
1432168a8f58SJerry Chu 	 */
1433168a8f58SJerry Chu 	inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
1434168a8f58SJerry Chu 	    TCP_TIMEOUT_INIT, TCP_RTO_MAX);
1435168a8f58SJerry Chu 
1436168a8f58SJerry Chu 	/* Add the child socket directly into the accept queue */
1437168a8f58SJerry Chu 	inet_csk_reqsk_queue_add(sk, req, child);
1438168a8f58SJerry Chu 
1439168a8f58SJerry Chu 	/* Now finish processing the fastopen child socket. */
1440168a8f58SJerry Chu 	inet_csk(child)->icsk_af_ops->rebuild_header(child);
1441168a8f58SJerry Chu 	tcp_init_congestion_control(child);
1442168a8f58SJerry Chu 	tcp_mtup_init(child);
1443168a8f58SJerry Chu 	tcp_init_buffer_space(child);
1444168a8f58SJerry Chu 	tcp_init_metrics(child);
1445168a8f58SJerry Chu 
1446168a8f58SJerry Chu 	/* Queue the data carried in the SYN packet. We need to first
1447168a8f58SJerry Chu 	 * bump skb's refcnt because the caller will attempt to free it.
1448168a8f58SJerry Chu 	 *
1449168a8f58SJerry Chu 	 * XXX (TFO) - we honor a zero-payload TFO request for now.
1450168a8f58SJerry Chu 	 * (Any reason not to?)
1451168a8f58SJerry Chu 	 */
1452168a8f58SJerry Chu 	if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
1453168a8f58SJerry Chu 		/* Don't queue the skb if there is no payload in SYN.
1454168a8f58SJerry Chu 		 * XXX (TFO) - How about SYN+FIN?
1455168a8f58SJerry Chu 		 */
1456168a8f58SJerry Chu 		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1457168a8f58SJerry Chu 	} else {
1458168a8f58SJerry Chu 		skb = skb_get(skb);
1459168a8f58SJerry Chu 		skb_dst_drop(skb);
1460168a8f58SJerry Chu 		__skb_pull(skb, tcp_hdr(skb)->doff * 4);
1461168a8f58SJerry Chu 		skb_set_owner_r(skb, child);
1462168a8f58SJerry Chu 		__skb_queue_tail(&child->sk_receive_queue, skb);
1463168a8f58SJerry Chu 		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1464168a8f58SJerry Chu 	}
1465168a8f58SJerry Chu 	sk->sk_data_ready(sk, 0);
1466168a8f58SJerry Chu 	bh_unlock_sock(child);
1467168a8f58SJerry Chu 	sock_put(child);
1468168a8f58SJerry Chu 	WARN_ON(req->sk == NULL);
1469168a8f58SJerry Chu 	return 0;
1470168a8f58SJerry Chu }
1471168a8f58SJerry Chu 
14721da177e4SLinus Torvalds int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
14731da177e4SLinus Torvalds {
14744957faadSWilliam Allen Simpson 	struct tcp_extend_values tmp_ext;
14751da177e4SLinus Torvalds 	struct tcp_options_received tmp_opt;
1476cf533ea5SEric Dumazet 	const u8 *hash_location;
147760236fddSArnaldo Carvalho de Melo 	struct request_sock *req;
1478e6b4d113SWilliam Allen Simpson 	struct inet_request_sock *ireq;
14794957faadSWilliam Allen Simpson 	struct tcp_sock *tp = tcp_sk(sk);
1480e6b4d113SWilliam Allen Simpson 	struct dst_entry *dst = NULL;
1481eddc9ec5SArnaldo Carvalho de Melo 	__be32 saddr = ip_hdr(skb)->saddr;
1482eddc9ec5SArnaldo Carvalho de Melo 	__be32 daddr = ip_hdr(skb)->daddr;
14831da177e4SLinus Torvalds 	__u32 isn = TCP_SKB_CB(skb)->when;
1484a2a385d6SEric Dumazet 	bool want_cookie = false;
1485168a8f58SJerry Chu 	struct flowi4 fl4;
1486168a8f58SJerry Chu 	struct tcp_fastopen_cookie foc = { .len = -1 };
1487168a8f58SJerry Chu 	struct tcp_fastopen_cookie valid_foc = { .len = -1 };
1488168a8f58SJerry Chu 	struct sk_buff *skb_synack;
1489168a8f58SJerry Chu 	int do_fastopen;
14901da177e4SLinus Torvalds 
14911da177e4SLinus Torvalds 	/* Never answer to SYNs send to broadcast or multicast */
1492511c3f92SEric Dumazet 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
14931da177e4SLinus Torvalds 		goto drop;
14941da177e4SLinus Torvalds 
14951da177e4SLinus Torvalds 	/* TW buckets are converted to open requests without
14961da177e4SLinus Torvalds 	 * limitations, they conserve resources and peer is
14971da177e4SLinus Torvalds 	 * evidently real one.
14981da177e4SLinus Torvalds 	 */
1499463c84b9SArnaldo Carvalho de Melo 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1500946cedccSEric Dumazet 		want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1501946cedccSEric Dumazet 		if (!want_cookie)
15021da177e4SLinus Torvalds 			goto drop;
15031da177e4SLinus Torvalds 	}
15041da177e4SLinus Torvalds 
15051da177e4SLinus Torvalds 	/* Accept backlog is full. If we have already queued enough
15061da177e4SLinus Torvalds 	 * of warm entries in syn queue, drop request. It is better than
15071da177e4SLinus Torvalds 	 * clogging syn queue with openreqs with exponentially increasing
15081da177e4SLinus Torvalds 	 * timeout.
15091da177e4SLinus Torvalds 	 */
1510463c84b9SArnaldo Carvalho de Melo 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
15111da177e4SLinus Torvalds 		goto drop;
15121da177e4SLinus Torvalds 
1513ce4a7d0dSArnaldo Carvalho de Melo 	req = inet_reqsk_alloc(&tcp_request_sock_ops);
15141da177e4SLinus Torvalds 	if (!req)
15151da177e4SLinus Torvalds 		goto drop;
15161da177e4SLinus Torvalds 
1517cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1518cfb6eeb4SYOSHIFUJI Hideaki 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1519cfb6eeb4SYOSHIFUJI Hideaki #endif
1520cfb6eeb4SYOSHIFUJI Hideaki 
15211da177e4SLinus Torvalds 	tcp_clear_options(&tmp_opt);
1522bee7ca9eSWilliam Allen Simpson 	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
15234957faadSWilliam Allen Simpson 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
1524168a8f58SJerry Chu 	tcp_parse_options(skb, &tmp_opt, &hash_location, 0,
1525168a8f58SJerry Chu 	    want_cookie ? NULL : &foc);
15261da177e4SLinus Torvalds 
15274957faadSWilliam Allen Simpson 	if (tmp_opt.cookie_plus > 0 &&
15284957faadSWilliam Allen Simpson 	    tmp_opt.saw_tstamp &&
15294957faadSWilliam Allen Simpson 	    !tp->rx_opt.cookie_out_never &&
15304957faadSWilliam Allen Simpson 	    (sysctl_tcp_cookie_size > 0 ||
15314957faadSWilliam Allen Simpson 	     (tp->cookie_values != NULL &&
15324957faadSWilliam Allen Simpson 	      tp->cookie_values->cookie_desired > 0))) {
15334957faadSWilliam Allen Simpson 		u8 *c;
15344957faadSWilliam Allen Simpson 		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
15354957faadSWilliam Allen Simpson 		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
15364957faadSWilliam Allen Simpson 
15374957faadSWilliam Allen Simpson 		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
15384957faadSWilliam Allen Simpson 			goto drop_and_release;
15394957faadSWilliam Allen Simpson 
15404957faadSWilliam Allen Simpson 		/* Secret recipe starts with IP addresses */
15410eae88f3SEric Dumazet 		*mess++ ^= (__force u32)daddr;
15420eae88f3SEric Dumazet 		*mess++ ^= (__force u32)saddr;
15434957faadSWilliam Allen Simpson 
15444957faadSWilliam Allen Simpson 		/* plus variable length Initiator Cookie */
15454957faadSWilliam Allen Simpson 		c = (u8 *)mess;
15464957faadSWilliam Allen Simpson 		while (l-- > 0)
15474957faadSWilliam Allen Simpson 			*c++ ^= *hash_location++;
15484957faadSWilliam Allen Simpson 
1549a2a385d6SEric Dumazet 		want_cookie = false;	/* not our kind of cookie */
15504957faadSWilliam Allen Simpson 		tmp_ext.cookie_out_never = 0; /* false */
15514957faadSWilliam Allen Simpson 		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
15524957faadSWilliam Allen Simpson 	} else if (!tp->rx_opt.cookie_in_always) {
15534957faadSWilliam Allen Simpson 		/* redundant indications, but ensure initialization. */
15544957faadSWilliam Allen Simpson 		tmp_ext.cookie_out_never = 1; /* true */
15554957faadSWilliam Allen Simpson 		tmp_ext.cookie_plus = 0;
15564957faadSWilliam Allen Simpson 	} else {
15574957faadSWilliam Allen Simpson 		goto drop_and_release;
15584957faadSWilliam Allen Simpson 	}
15594957faadSWilliam Allen Simpson 	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
15601da177e4SLinus Torvalds 
15614dfc2817SFlorian Westphal 	if (want_cookie && !tmp_opt.saw_tstamp)
15621da177e4SLinus Torvalds 		tcp_clear_options(&tmp_opt);
15631da177e4SLinus Torvalds 
15641da177e4SLinus Torvalds 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
15651da177e4SLinus Torvalds 	tcp_openreq_init(req, &tmp_opt, skb);
15661da177e4SLinus Torvalds 
1567bb5b7c11SDavid S. Miller 	ireq = inet_rsk(req);
1568bb5b7c11SDavid S. Miller 	ireq->loc_addr = daddr;
1569bb5b7c11SDavid S. Miller 	ireq->rmt_addr = saddr;
1570bb5b7c11SDavid S. Miller 	ireq->no_srccheck = inet_sk(sk)->transparent;
15715dff747bSChristoph Paasch 	ireq->opt = tcp_v4_save_options(skb);
1572bb5b7c11SDavid S. Miller 
1573284904aaSPaul Moore 	if (security_inet_conn_request(sk, skb, req))
1574bb5b7c11SDavid S. Miller 		goto drop_and_free;
1575284904aaSPaul Moore 
1576172d69e6SFlorian Westphal 	if (!want_cookie || tmp_opt.tstamp_ok)
1577bd14b1b2SEric Dumazet 		TCP_ECN_create_request(req, skb);
15781da177e4SLinus Torvalds 
15791da177e4SLinus Torvalds 	if (want_cookie) {
15801da177e4SLinus Torvalds 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1581172d69e6SFlorian Westphal 		req->cookie_ts = tmp_opt.tstamp_ok;
15821da177e4SLinus Torvalds 	} else if (!isn) {
15831da177e4SLinus Torvalds 		/* VJ's idea. We save last timestamp seen
15841da177e4SLinus Torvalds 		 * from the destination in peer table, when entering
15851da177e4SLinus Torvalds 		 * state TIME-WAIT, and check against it before
15861da177e4SLinus Torvalds 		 * accepting new connection request.
15871da177e4SLinus Torvalds 		 *
15881da177e4SLinus Torvalds 		 * If "isn" is not zero, this request hit alive
15891da177e4SLinus Torvalds 		 * timewait bucket, so that all the necessary checks
15901da177e4SLinus Torvalds 		 * are made in the function processing timewait state.
15911da177e4SLinus Torvalds 		 */
15921da177e4SLinus Torvalds 		if (tmp_opt.saw_tstamp &&
1593295ff7edSArnaldo Carvalho de Melo 		    tcp_death_row.sysctl_tw_recycle &&
1594ba3f7f04SDavid S. Miller 		    (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
159581166dd6SDavid S. Miller 		    fl4.daddr == saddr) {
159681166dd6SDavid S. Miller 			if (!tcp_peer_is_proven(req, dst, true)) {
1597de0744afSPavel Emelyanov 				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
15987cd04fa7SDenis V. Lunev 				goto drop_and_release;
15991da177e4SLinus Torvalds 			}
16001da177e4SLinus Torvalds 		}
16011da177e4SLinus Torvalds 		/* Kill the following clause, if you dislike this way. */
16021da177e4SLinus Torvalds 		else if (!sysctl_tcp_syncookies &&
1603463c84b9SArnaldo Carvalho de Melo 			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
16041da177e4SLinus Torvalds 			  (sysctl_max_syn_backlog >> 2)) &&
160581166dd6SDavid S. Miller 			 !tcp_peer_is_proven(req, dst, false)) {
16061da177e4SLinus Torvalds 			/* Without syncookies last quarter of
16071da177e4SLinus Torvalds 			 * backlog is filled with destinations,
16081da177e4SLinus Torvalds 			 * proven to be alive.
16091da177e4SLinus Torvalds 			 * It means that we continue to communicate
16101da177e4SLinus Torvalds 			 * to destinations, already remembered
16111da177e4SLinus Torvalds 			 * to the moment of synflood.
16121da177e4SLinus Torvalds 			 */
1613afd46503SJoe Perches 			LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
1614673d57e7SHarvey Harrison 				       &saddr, ntohs(tcp_hdr(skb)->source));
16157cd04fa7SDenis V. Lunev 			goto drop_and_release;
16161da177e4SLinus Torvalds 		}
16171da177e4SLinus Torvalds 
1618a94f723dSGerrit Renker 		isn = tcp_v4_init_sequence(skb);
16191da177e4SLinus Torvalds 	}
16202e6599cbSArnaldo Carvalho de Melo 	tcp_rsk(req)->snt_isn = isn;
16211da177e4SLinus Torvalds 
1622168a8f58SJerry Chu 	if (dst == NULL) {
1623168a8f58SJerry Chu 		dst = inet_csk_route_req(sk, &fl4, req);
1624168a8f58SJerry Chu 		if (dst == NULL)
1625168a8f58SJerry Chu 			goto drop_and_free;
1626168a8f58SJerry Chu 	}
1627168a8f58SJerry Chu 	do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);
1628168a8f58SJerry Chu 
1629168a8f58SJerry Chu 	/* We don't call tcp_v4_send_synack() directly because we need
1630168a8f58SJerry Chu 	 * to make sure a child socket can be created successfully before
1631168a8f58SJerry Chu 	 * sending back synack!
1632168a8f58SJerry Chu 	 *
1633168a8f58SJerry Chu 	 * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack()
1634168a8f58SJerry Chu 	 * (or better yet, call tcp_send_synack() in the child context
1635168a8f58SJerry Chu 	 * directly, but will have to fix bunch of other code first)
1636168a8f58SJerry Chu 	 * after syn_recv_sock() except one will need to first fix the
1637168a8f58SJerry Chu 	 * latter to remove its dependency on the current implementation
1638168a8f58SJerry Chu 	 * of tcp_v4_send_synack()->tcp_select_initial_window().
1639168a8f58SJerry Chu 	 */
1640168a8f58SJerry Chu 	skb_synack = tcp_make_synack(sk, dst, req,
1641fff32699SEric Dumazet 	    (struct request_values *)&tmp_ext,
1642168a8f58SJerry Chu 	    fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
1643168a8f58SJerry Chu 
1644168a8f58SJerry Chu 	if (skb_synack) {
1645168a8f58SJerry Chu 		__tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr);
1646168a8f58SJerry Chu 		skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
1647168a8f58SJerry Chu 	} else
16481da177e4SLinus Torvalds 		goto drop_and_free;
16491da177e4SLinus Torvalds 
1650168a8f58SJerry Chu 	if (likely(!do_fastopen)) {
1651168a8f58SJerry Chu 		int err;
1652168a8f58SJerry Chu 		err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
1653168a8f58SJerry Chu 		     ireq->rmt_addr, ireq->opt);
1654168a8f58SJerry Chu 		err = net_xmit_eval(err);
1655168a8f58SJerry Chu 		if (err || want_cookie)
1656168a8f58SJerry Chu 			goto drop_and_free;
1657168a8f58SJerry Chu 
1658016818d0SNeal Cardwell 		tcp_rsk(req)->snt_synack = tcp_time_stamp;
1659168a8f58SJerry Chu 		tcp_rsk(req)->listener = NULL;
1660168a8f58SJerry Chu 		/* Add the request_sock to the SYN table */
16613f421baaSArnaldo Carvalho de Melo 		inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1662168a8f58SJerry Chu 		if (fastopen_cookie_present(&foc) && foc.len != 0)
1663168a8f58SJerry Chu 			NET_INC_STATS_BH(sock_net(sk),
1664168a8f58SJerry Chu 			    LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
1665168a8f58SJerry Chu 	} else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req,
1666168a8f58SJerry Chu 	    (struct request_values *)&tmp_ext))
1667168a8f58SJerry Chu 		goto drop_and_free;
1668168a8f58SJerry Chu 
16691da177e4SLinus Torvalds 	return 0;
16701da177e4SLinus Torvalds 
16717cd04fa7SDenis V. Lunev drop_and_release:
16727cd04fa7SDenis V. Lunev 	dst_release(dst);
16731da177e4SLinus Torvalds drop_and_free:
167460236fddSArnaldo Carvalho de Melo 	reqsk_free(req);
16751da177e4SLinus Torvalds drop:
16761da177e4SLinus Torvalds 	return 0;
16771da177e4SLinus Torvalds }
16784bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_conn_request);
16791da177e4SLinus Torvalds 
16801da177e4SLinus Torvalds 
16811da177e4SLinus Torvalds /*
16821da177e4SLinus Torvalds  * The three way handshake has completed - we got a valid synack -
16831da177e4SLinus Torvalds  * now create the new socket.
16841da177e4SLinus Torvalds  */
16851da177e4SLinus Torvalds struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
168660236fddSArnaldo Carvalho de Melo 				  struct request_sock *req,
16871da177e4SLinus Torvalds 				  struct dst_entry *dst)
16881da177e4SLinus Torvalds {
16892e6599cbSArnaldo Carvalho de Melo 	struct inet_request_sock *ireq;
16901da177e4SLinus Torvalds 	struct inet_sock *newinet;
16911da177e4SLinus Torvalds 	struct tcp_sock *newtp;
16921da177e4SLinus Torvalds 	struct sock *newsk;
1693cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1694cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *key;
1695cfb6eeb4SYOSHIFUJI Hideaki #endif
1696f6d8bd05SEric Dumazet 	struct ip_options_rcu *inet_opt;
16971da177e4SLinus Torvalds 
16981da177e4SLinus Torvalds 	if (sk_acceptq_is_full(sk))
16991da177e4SLinus Torvalds 		goto exit_overflow;
17001da177e4SLinus Torvalds 
17011da177e4SLinus Torvalds 	newsk = tcp_create_openreq_child(sk, req, skb);
17021da177e4SLinus Torvalds 	if (!newsk)
1703093d2823SBalazs Scheidler 		goto exit_nonewsk;
17041da177e4SLinus Torvalds 
1705bcd76111SHerbert Xu 	newsk->sk_gso_type = SKB_GSO_TCPV4;
1706fae6ef87SNeal Cardwell 	inet_sk_rx_dst_set(newsk, skb);
17071da177e4SLinus Torvalds 
17081da177e4SLinus Torvalds 	newtp		      = tcp_sk(newsk);
17091da177e4SLinus Torvalds 	newinet		      = inet_sk(newsk);
17102e6599cbSArnaldo Carvalho de Melo 	ireq		      = inet_rsk(req);
1711c720c7e8SEric Dumazet 	newinet->inet_daddr   = ireq->rmt_addr;
1712c720c7e8SEric Dumazet 	newinet->inet_rcv_saddr = ireq->loc_addr;
1713c720c7e8SEric Dumazet 	newinet->inet_saddr	      = ireq->loc_addr;
1714f6d8bd05SEric Dumazet 	inet_opt	      = ireq->opt;
1715f6d8bd05SEric Dumazet 	rcu_assign_pointer(newinet->inet_opt, inet_opt);
17162e6599cbSArnaldo Carvalho de Melo 	ireq->opt	      = NULL;
1717463c84b9SArnaldo Carvalho de Melo 	newinet->mc_index     = inet_iif(skb);
1718eddc9ec5SArnaldo Carvalho de Melo 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
17194c507d28SJiri Benc 	newinet->rcv_tos      = ip_hdr(skb)->tos;
1720d83d8461SArnaldo Carvalho de Melo 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1721f6d8bd05SEric Dumazet 	if (inet_opt)
1722f6d8bd05SEric Dumazet 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1723c720c7e8SEric Dumazet 	newinet->inet_id = newtp->write_seq ^ jiffies;
17241da177e4SLinus Torvalds 
1725dfd25fffSEric Dumazet 	if (!dst) {
1726dfd25fffSEric Dumazet 		dst = inet_csk_route_child_sock(sk, newsk, req);
1727dfd25fffSEric Dumazet 		if (!dst)
17280e734419SDavid S. Miller 			goto put_and_exit;
1729dfd25fffSEric Dumazet 	} else {
1730dfd25fffSEric Dumazet 		/* syncookie case : see end of cookie_v4_check() */
1731dfd25fffSEric Dumazet 	}
17320e734419SDavid S. Miller 	sk_setup_caps(newsk, dst);
17330e734419SDavid S. Miller 
17345d424d5aSJohn Heffner 	tcp_mtup_init(newsk);
17351da177e4SLinus Torvalds 	tcp_sync_mss(newsk, dst_mtu(dst));
17360dbaee3bSDavid S. Miller 	newtp->advmss = dst_metric_advmss(dst);
1737f5fff5dcSTom Quetchenbach 	if (tcp_sk(sk)->rx_opt.user_mss &&
1738f5fff5dcSTom Quetchenbach 	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1739f5fff5dcSTom Quetchenbach 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1740f5fff5dcSTom Quetchenbach 
17411da177e4SLinus Torvalds 	tcp_initialize_rcv_mss(newsk);
1742623df484SNeal Cardwell 	tcp_synack_rtt_meas(newsk, req);
17439ad7c049SJerry Chu 	newtp->total_retrans = req->retrans;
17441da177e4SLinus Torvalds 
1745cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1746cfb6eeb4SYOSHIFUJI Hideaki 	/* Copy over the MD5 key from the original socket */
1747a915da9bSEric Dumazet 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1748a915da9bSEric Dumazet 				AF_INET);
1749c720c7e8SEric Dumazet 	if (key != NULL) {
1750cfb6eeb4SYOSHIFUJI Hideaki 		/*
1751cfb6eeb4SYOSHIFUJI Hideaki 		 * We're using one, so create a matching key
1752cfb6eeb4SYOSHIFUJI Hideaki 		 * on the newsk structure. If we fail to get
1753cfb6eeb4SYOSHIFUJI Hideaki 		 * memory, then we end up not copying the key
1754cfb6eeb4SYOSHIFUJI Hideaki 		 * across. Shucks.
1755cfb6eeb4SYOSHIFUJI Hideaki 		 */
1756a915da9bSEric Dumazet 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1757a915da9bSEric Dumazet 			       AF_INET, key->key, key->keylen, GFP_ATOMIC);
1758a465419bSEric Dumazet 		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1759cfb6eeb4SYOSHIFUJI Hideaki 	}
1760cfb6eeb4SYOSHIFUJI Hideaki #endif
1761cfb6eeb4SYOSHIFUJI Hideaki 
17620e734419SDavid S. Miller 	if (__inet_inherit_port(sk, newsk) < 0)
17630e734419SDavid S. Miller 		goto put_and_exit;
17649327f705SEric Dumazet 	__inet_hash_nolisten(newsk, NULL);
17651da177e4SLinus Torvalds 
17661da177e4SLinus Torvalds 	return newsk;
17671da177e4SLinus Torvalds 
17681da177e4SLinus Torvalds exit_overflow:
1769de0744afSPavel Emelyanov 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1770093d2823SBalazs Scheidler exit_nonewsk:
1771093d2823SBalazs Scheidler 	dst_release(dst);
17721da177e4SLinus Torvalds exit:
1773de0744afSPavel Emelyanov 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
17741da177e4SLinus Torvalds 	return NULL;
17750e734419SDavid S. Miller put_and_exit:
1776709e8697SEric Dumazet 	tcp_clear_xmit_timers(newsk);
1777d8a6e65fSEric Dumazet 	tcp_cleanup_congestion_control(newsk);
1778918eb399SEric Dumazet 	bh_unlock_sock(newsk);
17790e734419SDavid S. Miller 	sock_put(newsk);
17800e734419SDavid S. Miller 	goto exit;
17811da177e4SLinus Torvalds }
17824bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
17831da177e4SLinus Torvalds 
17841da177e4SLinus Torvalds static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
17851da177e4SLinus Torvalds {
1786aa8223c7SArnaldo Carvalho de Melo 	struct tcphdr *th = tcp_hdr(skb);
1787eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
17881da177e4SLinus Torvalds 	struct sock *nsk;
178960236fddSArnaldo Carvalho de Melo 	struct request_sock **prev;
17901da177e4SLinus Torvalds 	/* Find possible connection requests. */
1791463c84b9SArnaldo Carvalho de Melo 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
17921da177e4SLinus Torvalds 						       iph->saddr, iph->daddr);
17931da177e4SLinus Torvalds 	if (req)
17948336886fSJerry Chu 		return tcp_check_req(sk, skb, req, prev, false);
17951da177e4SLinus Torvalds 
17963b1e0a65SYOSHIFUJI Hideaki 	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1797c67499c0SPavel Emelyanov 			th->source, iph->daddr, th->dest, inet_iif(skb));
17981da177e4SLinus Torvalds 
17991da177e4SLinus Torvalds 	if (nsk) {
18001da177e4SLinus Torvalds 		if (nsk->sk_state != TCP_TIME_WAIT) {
18011da177e4SLinus Torvalds 			bh_lock_sock(nsk);
18021da177e4SLinus Torvalds 			return nsk;
18031da177e4SLinus Torvalds 		}
18049469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(nsk));
18051da177e4SLinus Torvalds 		return NULL;
18061da177e4SLinus Torvalds 	}
18071da177e4SLinus Torvalds 
18081da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES
1809af9b4738SFlorian Westphal 	if (!th->syn)
18101da177e4SLinus Torvalds 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
18111da177e4SLinus Torvalds #endif
18121da177e4SLinus Torvalds 	return sk;
18131da177e4SLinus Torvalds }
18141da177e4SLinus Torvalds 
1815b51655b9SAl Viro static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
18161da177e4SLinus Torvalds {
1817eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
1818eddc9ec5SArnaldo Carvalho de Melo 
181984fa7933SPatrick McHardy 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
1820eddc9ec5SArnaldo Carvalho de Melo 		if (!tcp_v4_check(skb->len, iph->saddr,
1821eddc9ec5SArnaldo Carvalho de Melo 				  iph->daddr, skb->csum)) {
18221da177e4SLinus Torvalds 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1823fb286bb2SHerbert Xu 			return 0;
1824fb286bb2SHerbert Xu 		}
1825fb286bb2SHerbert Xu 	}
1826fb286bb2SHerbert Xu 
1827eddc9ec5SArnaldo Carvalho de Melo 	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1828fb286bb2SHerbert Xu 				       skb->len, IPPROTO_TCP, 0);
1829fb286bb2SHerbert Xu 
1830fb286bb2SHerbert Xu 	if (skb->len <= 76) {
1831fb286bb2SHerbert Xu 		return __skb_checksum_complete(skb);
18321da177e4SLinus Torvalds 	}
18331da177e4SLinus Torvalds 	return 0;
18341da177e4SLinus Torvalds }
18351da177e4SLinus Torvalds 
18361da177e4SLinus Torvalds 
18371da177e4SLinus Torvalds /* The socket must have it's spinlock held when we get
18381da177e4SLinus Torvalds  * here.
18391da177e4SLinus Torvalds  *
18401da177e4SLinus Torvalds  * We have a potential double-lock case here, so even when
18411da177e4SLinus Torvalds  * doing backlog processing we use the BH locking scheme.
18421da177e4SLinus Torvalds  * This is because we cannot sleep with the original spinlock
18431da177e4SLinus Torvalds  * held.
18441da177e4SLinus Torvalds  */
18451da177e4SLinus Torvalds int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
18461da177e4SLinus Torvalds {
1847cfb6eeb4SYOSHIFUJI Hideaki 	struct sock *rsk;
1848cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1849cfb6eeb4SYOSHIFUJI Hideaki 	/*
1850cfb6eeb4SYOSHIFUJI Hideaki 	 * We really want to reject the packet as early as possible
1851cfb6eeb4SYOSHIFUJI Hideaki 	 * if:
1852cfb6eeb4SYOSHIFUJI Hideaki 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1853cfb6eeb4SYOSHIFUJI Hideaki 	 *  o There is an MD5 option and we're not expecting one
1854cfb6eeb4SYOSHIFUJI Hideaki 	 */
1855cfb6eeb4SYOSHIFUJI Hideaki 	if (tcp_v4_inbound_md5_hash(sk, skb))
1856cfb6eeb4SYOSHIFUJI Hideaki 		goto discard;
1857cfb6eeb4SYOSHIFUJI Hideaki #endif
1858cfb6eeb4SYOSHIFUJI Hideaki 
18591da177e4SLinus Torvalds 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
186092101b3bSDavid S. Miller 		struct dst_entry *dst = sk->sk_rx_dst;
1861404e0a8bSEric Dumazet 
1862404e0a8bSEric Dumazet 		sock_rps_save_rxhash(sk, skb);
1863404e0a8bSEric Dumazet 		if (dst) {
1864505fbcf0SEric Dumazet 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1865505fbcf0SEric Dumazet 			    dst->ops->check(dst, 0) == NULL) {
186692101b3bSDavid S. Miller 				dst_release(dst);
186792101b3bSDavid S. Miller 				sk->sk_rx_dst = NULL;
186892101b3bSDavid S. Miller 			}
186992101b3bSDavid S. Miller 		}
1870aa8223c7SArnaldo Carvalho de Melo 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1871cfb6eeb4SYOSHIFUJI Hideaki 			rsk = sk;
18721da177e4SLinus Torvalds 			goto reset;
1873cfb6eeb4SYOSHIFUJI Hideaki 		}
18741da177e4SLinus Torvalds 		return 0;
18751da177e4SLinus Torvalds 	}
18761da177e4SLinus Torvalds 
1877ab6a5bb6SArnaldo Carvalho de Melo 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
18781da177e4SLinus Torvalds 		goto csum_err;
18791da177e4SLinus Torvalds 
18801da177e4SLinus Torvalds 	if (sk->sk_state == TCP_LISTEN) {
18811da177e4SLinus Torvalds 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
18821da177e4SLinus Torvalds 		if (!nsk)
18831da177e4SLinus Torvalds 			goto discard;
18841da177e4SLinus Torvalds 
18851da177e4SLinus Torvalds 		if (nsk != sk) {
1886bdeab991STom Herbert 			sock_rps_save_rxhash(nsk, skb);
1887cfb6eeb4SYOSHIFUJI Hideaki 			if (tcp_child_process(sk, nsk, skb)) {
1888cfb6eeb4SYOSHIFUJI Hideaki 				rsk = nsk;
18891da177e4SLinus Torvalds 				goto reset;
1890cfb6eeb4SYOSHIFUJI Hideaki 			}
18911da177e4SLinus Torvalds 			return 0;
18921da177e4SLinus Torvalds 		}
1893ca55158cSEric Dumazet 	} else
1894bdeab991STom Herbert 		sock_rps_save_rxhash(sk, skb);
1895ca55158cSEric Dumazet 
1896aa8223c7SArnaldo Carvalho de Melo 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1897cfb6eeb4SYOSHIFUJI Hideaki 		rsk = sk;
18981da177e4SLinus Torvalds 		goto reset;
1899cfb6eeb4SYOSHIFUJI Hideaki 	}
19001da177e4SLinus Torvalds 	return 0;
19011da177e4SLinus Torvalds 
19021da177e4SLinus Torvalds reset:
1903cfb6eeb4SYOSHIFUJI Hideaki 	tcp_v4_send_reset(rsk, skb);
19041da177e4SLinus Torvalds discard:
19051da177e4SLinus Torvalds 	kfree_skb(skb);
19061da177e4SLinus Torvalds 	/* Be careful here. If this function gets more complicated and
19071da177e4SLinus Torvalds 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
19081da177e4SLinus Torvalds 	 * might be destroyed here. This current version compiles correctly,
19091da177e4SLinus Torvalds 	 * but you have been warned.
19101da177e4SLinus Torvalds 	 */
19111da177e4SLinus Torvalds 	return 0;
19121da177e4SLinus Torvalds 
19131da177e4SLinus Torvalds csum_err:
191463231bddSPavel Emelyanov 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
19151da177e4SLinus Torvalds 	goto discard;
19161da177e4SLinus Torvalds }
19174bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_do_rcv);
19181da177e4SLinus Torvalds 
1919160eb5a6SDavid S. Miller void tcp_v4_early_demux(struct sk_buff *skb)
192041063e9dSDavid S. Miller {
192141063e9dSDavid S. Miller 	const struct iphdr *iph;
192241063e9dSDavid S. Miller 	const struct tcphdr *th;
192341063e9dSDavid S. Miller 	struct sock *sk;
192441063e9dSDavid S. Miller 
192541063e9dSDavid S. Miller 	if (skb->pkt_type != PACKET_HOST)
1926160eb5a6SDavid S. Miller 		return;
192741063e9dSDavid S. Miller 
192845f00f99SEric Dumazet 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1929160eb5a6SDavid S. Miller 		return;
193041063e9dSDavid S. Miller 
193141063e9dSDavid S. Miller 	iph = ip_hdr(skb);
193245f00f99SEric Dumazet 	th = tcp_hdr(skb);
193341063e9dSDavid S. Miller 
193441063e9dSDavid S. Miller 	if (th->doff < sizeof(struct tcphdr) / 4)
1935160eb5a6SDavid S. Miller 		return;
193641063e9dSDavid S. Miller 
193745f00f99SEric Dumazet 	sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
193841063e9dSDavid S. Miller 				       iph->saddr, th->source,
19397011d085SVijay Subramanian 				       iph->daddr, ntohs(th->dest),
19409cb429d6SEric Dumazet 				       skb->skb_iif);
194141063e9dSDavid S. Miller 	if (sk) {
194241063e9dSDavid S. Miller 		skb->sk = sk;
194341063e9dSDavid S. Miller 		skb->destructor = sock_edemux;
194441063e9dSDavid S. Miller 		if (sk->sk_state != TCP_TIME_WAIT) {
194541063e9dSDavid S. Miller 			struct dst_entry *dst = sk->sk_rx_dst;
1946505fbcf0SEric Dumazet 
194741063e9dSDavid S. Miller 			if (dst)
194841063e9dSDavid S. Miller 				dst = dst_check(dst, 0);
194992101b3bSDavid S. Miller 			if (dst &&
1950505fbcf0SEric Dumazet 			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
195141063e9dSDavid S. Miller 				skb_dst_set_noref(skb, dst);
195241063e9dSDavid S. Miller 		}
195341063e9dSDavid S. Miller 	}
195441063e9dSDavid S. Miller }
195541063e9dSDavid S. Miller 
19561da177e4SLinus Torvalds /*
19571da177e4SLinus Torvalds  *	From tcp_input.c
19581da177e4SLinus Torvalds  */
19591da177e4SLinus Torvalds 
19601da177e4SLinus Torvalds int tcp_v4_rcv(struct sk_buff *skb)
19611da177e4SLinus Torvalds {
1962eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph;
1963cf533ea5SEric Dumazet 	const struct tcphdr *th;
19641da177e4SLinus Torvalds 	struct sock *sk;
19651da177e4SLinus Torvalds 	int ret;
1966a86b1e30SPavel Emelyanov 	struct net *net = dev_net(skb->dev);
19671da177e4SLinus Torvalds 
19681da177e4SLinus Torvalds 	if (skb->pkt_type != PACKET_HOST)
19691da177e4SLinus Torvalds 		goto discard_it;
19701da177e4SLinus Torvalds 
19711da177e4SLinus Torvalds 	/* Count it even if it's bad */
197263231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
19731da177e4SLinus Torvalds 
19741da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
19751da177e4SLinus Torvalds 		goto discard_it;
19761da177e4SLinus Torvalds 
1977aa8223c7SArnaldo Carvalho de Melo 	th = tcp_hdr(skb);
19781da177e4SLinus Torvalds 
19791da177e4SLinus Torvalds 	if (th->doff < sizeof(struct tcphdr) / 4)
19801da177e4SLinus Torvalds 		goto bad_packet;
19811da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, th->doff * 4))
19821da177e4SLinus Torvalds 		goto discard_it;
19831da177e4SLinus Torvalds 
19841da177e4SLinus Torvalds 	/* An explanation is required here, I think.
19851da177e4SLinus Torvalds 	 * Packet length and doff are validated by header prediction,
1986caa20d9aSStephen Hemminger 	 * provided case of th->doff==0 is eliminated.
19871da177e4SLinus Torvalds 	 * So, we defer the checks. */
198860476372SHerbert Xu 	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
19891da177e4SLinus Torvalds 		goto bad_packet;
19901da177e4SLinus Torvalds 
1991aa8223c7SArnaldo Carvalho de Melo 	th = tcp_hdr(skb);
1992eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
19931da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
19941da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
19951da177e4SLinus Torvalds 				    skb->len - th->doff * 4);
19961da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
19971da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->when	 = 0;
1998b82d1bb4SEric Dumazet 	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
19991da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->sacked	 = 0;
20001da177e4SLinus Torvalds 
20019a1f27c4SArnaldo Carvalho de Melo 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
20021da177e4SLinus Torvalds 	if (!sk)
20031da177e4SLinus Torvalds 		goto no_tcp_socket;
20041da177e4SLinus Torvalds 
2005bb134d5dSEric Dumazet process:
2006bb134d5dSEric Dumazet 	if (sk->sk_state == TCP_TIME_WAIT)
2007bb134d5dSEric Dumazet 		goto do_time_wait;
2008bb134d5dSEric Dumazet 
20096cce09f8SEric Dumazet 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
20106cce09f8SEric Dumazet 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
2011d218d111SStephen Hemminger 		goto discard_and_relse;
20126cce09f8SEric Dumazet 	}
2013d218d111SStephen Hemminger 
20141da177e4SLinus Torvalds 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
20151da177e4SLinus Torvalds 		goto discard_and_relse;
2016b59c2701SPatrick McHardy 	nf_reset(skb);
20171da177e4SLinus Torvalds 
2018fda9ef5dSDmitry Mishin 	if (sk_filter(sk, skb))
20191da177e4SLinus Torvalds 		goto discard_and_relse;
20201da177e4SLinus Torvalds 
20211da177e4SLinus Torvalds 	skb->dev = NULL;
20221da177e4SLinus Torvalds 
2023c6366184SIngo Molnar 	bh_lock_sock_nested(sk);
20241da177e4SLinus Torvalds 	ret = 0;
20251da177e4SLinus Torvalds 	if (!sock_owned_by_user(sk)) {
20261a2449a8SChris Leech #ifdef CONFIG_NET_DMA
20271a2449a8SChris Leech 		struct tcp_sock *tp = tcp_sk(sk);
20281a2449a8SChris Leech 		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
2029a2bd1140SDave Jiang 			tp->ucopy.dma_chan = net_dma_find_channel();
20301a2449a8SChris Leech 		if (tp->ucopy.dma_chan)
20311a2449a8SChris Leech 			ret = tcp_v4_do_rcv(sk, skb);
20321a2449a8SChris Leech 		else
20331a2449a8SChris Leech #endif
20341a2449a8SChris Leech 		{
20351da177e4SLinus Torvalds 			if (!tcp_prequeue(sk, skb))
20361da177e4SLinus Torvalds 				ret = tcp_v4_do_rcv(sk, skb);
20371a2449a8SChris Leech 		}
2038da882c1fSEric Dumazet 	} else if (unlikely(sk_add_backlog(sk, skb,
2039da882c1fSEric Dumazet 					   sk->sk_rcvbuf + sk->sk_sndbuf))) {
20406b03a53aSZhu Yi 		bh_unlock_sock(sk);
20416cce09f8SEric Dumazet 		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
20426b03a53aSZhu Yi 		goto discard_and_relse;
20436b03a53aSZhu Yi 	}
20441da177e4SLinus Torvalds 	bh_unlock_sock(sk);
20451da177e4SLinus Torvalds 
20461da177e4SLinus Torvalds 	sock_put(sk);
20471da177e4SLinus Torvalds 
20481da177e4SLinus Torvalds 	return ret;
20491da177e4SLinus Torvalds 
20501da177e4SLinus Torvalds no_tcp_socket:
20511da177e4SLinus Torvalds 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
20521da177e4SLinus Torvalds 		goto discard_it;
20531da177e4SLinus Torvalds 
20541da177e4SLinus Torvalds 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
20551da177e4SLinus Torvalds bad_packet:
205663231bddSPavel Emelyanov 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
20571da177e4SLinus Torvalds 	} else {
2058cfb6eeb4SYOSHIFUJI Hideaki 		tcp_v4_send_reset(NULL, skb);
20591da177e4SLinus Torvalds 	}
20601da177e4SLinus Torvalds 
20611da177e4SLinus Torvalds discard_it:
20621da177e4SLinus Torvalds 	/* Discard frame. */
20631da177e4SLinus Torvalds 	kfree_skb(skb);
20641da177e4SLinus Torvalds 	return 0;
20651da177e4SLinus Torvalds 
20661da177e4SLinus Torvalds discard_and_relse:
20671da177e4SLinus Torvalds 	sock_put(sk);
20681da177e4SLinus Torvalds 	goto discard_it;
20691da177e4SLinus Torvalds 
20701da177e4SLinus Torvalds do_time_wait:
20711da177e4SLinus Torvalds 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
20729469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
20731da177e4SLinus Torvalds 		goto discard_it;
20741da177e4SLinus Torvalds 	}
20751da177e4SLinus Torvalds 
20761da177e4SLinus Torvalds 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
207763231bddSPavel Emelyanov 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
20789469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
20791da177e4SLinus Torvalds 		goto discard_it;
20801da177e4SLinus Torvalds 	}
20819469c7b4SYOSHIFUJI Hideaki 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
20821da177e4SLinus Torvalds 	case TCP_TW_SYN: {
2083c346dca1SYOSHIFUJI Hideaki 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
2084c67499c0SPavel Emelyanov 							&tcp_hashinfo,
2085eddc9ec5SArnaldo Carvalho de Melo 							iph->daddr, th->dest,
2086463c84b9SArnaldo Carvalho de Melo 							inet_iif(skb));
20871da177e4SLinus Torvalds 		if (sk2) {
20889469c7b4SYOSHIFUJI Hideaki 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
20899469c7b4SYOSHIFUJI Hideaki 			inet_twsk_put(inet_twsk(sk));
20901da177e4SLinus Torvalds 			sk = sk2;
20911da177e4SLinus Torvalds 			goto process;
20921da177e4SLinus Torvalds 		}
20931da177e4SLinus Torvalds 		/* Fall through to ACK */
20941da177e4SLinus Torvalds 	}
20951da177e4SLinus Torvalds 	case TCP_TW_ACK:
20961da177e4SLinus Torvalds 		tcp_v4_timewait_ack(sk, skb);
20971da177e4SLinus Torvalds 		break;
20981da177e4SLinus Torvalds 	case TCP_TW_RST:
20991da177e4SLinus Torvalds 		goto no_tcp_socket;
21001da177e4SLinus Torvalds 	case TCP_TW_SUCCESS:;
21011da177e4SLinus Torvalds 	}
21021da177e4SLinus Torvalds 	goto discard_it;
21031da177e4SLinus Torvalds }
21041da177e4SLinus Torvalds 
2105ccb7c410SDavid S. Miller static struct timewait_sock_ops tcp_timewait_sock_ops = {
2106ccb7c410SDavid S. Miller 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
2107ccb7c410SDavid S. Miller 	.twsk_unique	= tcp_twsk_unique,
2108ccb7c410SDavid S. Miller 	.twsk_destructor= tcp_twsk_destructor,
2109ccb7c410SDavid S. Miller };
21101da177e4SLinus Torvalds 
211163d02d15SEric Dumazet void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
21125d299f3dSEric Dumazet {
21135d299f3dSEric Dumazet 	struct dst_entry *dst = skb_dst(skb);
21145d299f3dSEric Dumazet 
21155d299f3dSEric Dumazet 	dst_hold(dst);
21165d299f3dSEric Dumazet 	sk->sk_rx_dst = dst;
21175d299f3dSEric Dumazet 	inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
21185d299f3dSEric Dumazet }
211963d02d15SEric Dumazet EXPORT_SYMBOL(inet_sk_rx_dst_set);
21205d299f3dSEric Dumazet 
21213b401a81SStephen Hemminger const struct inet_connection_sock_af_ops ipv4_specific = {
21221da177e4SLinus Torvalds 	.queue_xmit	   = ip_queue_xmit,
21231da177e4SLinus Torvalds 	.send_check	   = tcp_v4_send_check,
212432519f11SArnaldo Carvalho de Melo 	.rebuild_header	   = inet_sk_rebuild_header,
21255d299f3dSEric Dumazet 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
21261da177e4SLinus Torvalds 	.conn_request	   = tcp_v4_conn_request,
21271da177e4SLinus Torvalds 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
21281da177e4SLinus Torvalds 	.net_header_len	   = sizeof(struct iphdr),
21291da177e4SLinus Torvalds 	.setsockopt	   = ip_setsockopt,
21301da177e4SLinus Torvalds 	.getsockopt	   = ip_getsockopt,
2131543d9cfeSArnaldo Carvalho de Melo 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
2132543d9cfeSArnaldo Carvalho de Melo 	.sockaddr_len	   = sizeof(struct sockaddr_in),
2133ab1e0a13SArnaldo Carvalho de Melo 	.bind_conflict	   = inet_csk_bind_conflict,
21343fdadf7dSDmitry Mishin #ifdef CONFIG_COMPAT
21353fdadf7dSDmitry Mishin 	.compat_setsockopt = compat_ip_setsockopt,
21363fdadf7dSDmitry Mishin 	.compat_getsockopt = compat_ip_getsockopt,
21373fdadf7dSDmitry Mishin #endif
21381da177e4SLinus Torvalds };
21394bc2f18bSEric Dumazet EXPORT_SYMBOL(ipv4_specific);
21401da177e4SLinus Torvalds 
2141cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
2142b2e4b3deSStephen Hemminger static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
2143cfb6eeb4SYOSHIFUJI Hideaki 	.md5_lookup		= tcp_v4_md5_lookup,
214449a72dfbSAdam Langley 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
2145cfb6eeb4SYOSHIFUJI Hideaki 	.md5_parse		= tcp_v4_parse_md5_keys,
2146cfb6eeb4SYOSHIFUJI Hideaki };
2147b6332e6cSAndrew Morton #endif
2148cfb6eeb4SYOSHIFUJI Hideaki 
21491da177e4SLinus Torvalds /* NOTE: A lot of things set to zero explicitly by call to
21501da177e4SLinus Torvalds  *       sk_alloc() so need not be done here.
21511da177e4SLinus Torvalds  */
21521da177e4SLinus Torvalds static int tcp_v4_init_sock(struct sock *sk)
21531da177e4SLinus Torvalds {
21546687e988SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
21551da177e4SLinus Torvalds 
2156900f65d3SNeal Cardwell 	tcp_init_sock(sk);
21571da177e4SLinus Torvalds 
21588292a17aSArnaldo Carvalho de Melo 	icsk->icsk_af_ops = &ipv4_specific;
2159900f65d3SNeal Cardwell 
2160cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
2161ac807fa8SDavid S. Miller 	tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
2162cfb6eeb4SYOSHIFUJI Hideaki #endif
21631da177e4SLinus Torvalds 
21641da177e4SLinus Torvalds 	return 0;
21651da177e4SLinus Torvalds }
21661da177e4SLinus Torvalds 
21677d06b2e0SBrian Haley void tcp_v4_destroy_sock(struct sock *sk)
21681da177e4SLinus Torvalds {
21691da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
21701da177e4SLinus Torvalds 
21711da177e4SLinus Torvalds 	tcp_clear_xmit_timers(sk);
21721da177e4SLinus Torvalds 
21736687e988SArnaldo Carvalho de Melo 	tcp_cleanup_congestion_control(sk);
2174317a76f9SStephen Hemminger 
21751da177e4SLinus Torvalds 	/* Cleanup up the write buffer. */
2176fe067e8aSDavid S. Miller 	tcp_write_queue_purge(sk);
21771da177e4SLinus Torvalds 
21781da177e4SLinus Torvalds 	/* Cleans up our, hopefully empty, out_of_order_queue. */
21791da177e4SLinus Torvalds 	__skb_queue_purge(&tp->out_of_order_queue);
21801da177e4SLinus Torvalds 
2181cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
2182cfb6eeb4SYOSHIFUJI Hideaki 	/* Clean up the MD5 key list, if any */
2183cfb6eeb4SYOSHIFUJI Hideaki 	if (tp->md5sig_info) {
2184a915da9bSEric Dumazet 		tcp_clear_md5_list(sk);
2185a8afca03SEric Dumazet 		kfree_rcu(tp->md5sig_info, rcu);
2186cfb6eeb4SYOSHIFUJI Hideaki 		tp->md5sig_info = NULL;
2187cfb6eeb4SYOSHIFUJI Hideaki 	}
2188cfb6eeb4SYOSHIFUJI Hideaki #endif
2189cfb6eeb4SYOSHIFUJI Hideaki 
21901a2449a8SChris Leech #ifdef CONFIG_NET_DMA
21911a2449a8SChris Leech 	/* Cleans up our sk_async_wait_queue */
21921a2449a8SChris Leech 	__skb_queue_purge(&sk->sk_async_wait_queue);
21931a2449a8SChris Leech #endif
21941a2449a8SChris Leech 
21951da177e4SLinus Torvalds 	/* Clean prequeue, it must be empty really */
21961da177e4SLinus Torvalds 	__skb_queue_purge(&tp->ucopy.prequeue);
21971da177e4SLinus Torvalds 
21981da177e4SLinus Torvalds 	/* Clean up a referenced TCP bind bucket. */
2199463c84b9SArnaldo Carvalho de Melo 	if (inet_csk(sk)->icsk_bind_hash)
2200ab1e0a13SArnaldo Carvalho de Melo 		inet_put_port(sk);
22011da177e4SLinus Torvalds 
2202435cf559SWilliam Allen Simpson 	/* TCP Cookie Transactions */
2203435cf559SWilliam Allen Simpson 	if (tp->cookie_values != NULL) {
2204435cf559SWilliam Allen Simpson 		kref_put(&tp->cookie_values->kref,
2205435cf559SWilliam Allen Simpson 			 tcp_cookie_values_release);
2206435cf559SWilliam Allen Simpson 		tp->cookie_values = NULL;
2207435cf559SWilliam Allen Simpson 	}
2208168a8f58SJerry Chu 	BUG_ON(tp->fastopen_rsk != NULL);
2209435cf559SWilliam Allen Simpson 
2210cf60af03SYuchung Cheng 	/* If socket is aborted during connect operation */
2211cf60af03SYuchung Cheng 	tcp_free_fastopen_req(tp);
2212cf60af03SYuchung Cheng 
2213180d8cd9SGlauber Costa 	sk_sockets_allocated_dec(sk);
2214d1a4c0b3SGlauber Costa 	sock_release_memcg(sk);
22151da177e4SLinus Torvalds }
22161da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_destroy_sock);
22171da177e4SLinus Torvalds 
22181da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
22191da177e4SLinus Torvalds /* Proc filesystem TCP sock list dumping. */
22201da177e4SLinus Torvalds 
22213ab5aee7SEric Dumazet static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
22221da177e4SLinus Torvalds {
22233ab5aee7SEric Dumazet 	return hlist_nulls_empty(head) ? NULL :
22248feaf0c0SArnaldo Carvalho de Melo 		list_entry(head->first, struct inet_timewait_sock, tw_node);
22251da177e4SLinus Torvalds }
22261da177e4SLinus Torvalds 
22278feaf0c0SArnaldo Carvalho de Melo static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
22281da177e4SLinus Torvalds {
22293ab5aee7SEric Dumazet 	return !is_a_nulls(tw->tw_node.next) ?
22303ab5aee7SEric Dumazet 		hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
22311da177e4SLinus Torvalds }
22321da177e4SLinus Torvalds 
2233a8b690f9STom Herbert /*
2234a8b690f9STom Herbert  * Get next listener socket follow cur.  If cur is NULL, get first socket
2235a8b690f9STom Herbert  * starting from bucket given in st->bucket; when st->bucket is zero the
2236a8b690f9STom Herbert  * very first socket in the hash table is returned.
2237a8b690f9STom Herbert  */
22381da177e4SLinus Torvalds static void *listening_get_next(struct seq_file *seq, void *cur)
22391da177e4SLinus Torvalds {
2240463c84b9SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk;
2241c25eb3bfSEric Dumazet 	struct hlist_nulls_node *node;
22421da177e4SLinus Torvalds 	struct sock *sk = cur;
22435caea4eaSEric Dumazet 	struct inet_listen_hashbucket *ilb;
22441da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
2245a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
22461da177e4SLinus Torvalds 
22471da177e4SLinus Torvalds 	if (!sk) {
2248a8b690f9STom Herbert 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
22495caea4eaSEric Dumazet 		spin_lock_bh(&ilb->lock);
2250c25eb3bfSEric Dumazet 		sk = sk_nulls_head(&ilb->head);
2251a8b690f9STom Herbert 		st->offset = 0;
22521da177e4SLinus Torvalds 		goto get_sk;
22531da177e4SLinus Torvalds 	}
22545caea4eaSEric Dumazet 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
22551da177e4SLinus Torvalds 	++st->num;
2256a8b690f9STom Herbert 	++st->offset;
22571da177e4SLinus Torvalds 
22581da177e4SLinus Torvalds 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
225960236fddSArnaldo Carvalho de Melo 		struct request_sock *req = cur;
22601da177e4SLinus Torvalds 
2261463c84b9SArnaldo Carvalho de Melo 		icsk = inet_csk(st->syn_wait_sk);
22621da177e4SLinus Torvalds 		req = req->dl_next;
22631da177e4SLinus Torvalds 		while (1) {
22641da177e4SLinus Torvalds 			while (req) {
2265bdccc4caSDaniel Lezcano 				if (req->rsk_ops->family == st->family) {
22661da177e4SLinus Torvalds 					cur = req;
22671da177e4SLinus Torvalds 					goto out;
22681da177e4SLinus Torvalds 				}
22691da177e4SLinus Torvalds 				req = req->dl_next;
22701da177e4SLinus Torvalds 			}
227172a3effaSEric Dumazet 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
22721da177e4SLinus Torvalds 				break;
22731da177e4SLinus Torvalds get_req:
2274463c84b9SArnaldo Carvalho de Melo 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
22751da177e4SLinus Torvalds 		}
22761bde5ac4SEric Dumazet 		sk	  = sk_nulls_next(st->syn_wait_sk);
22771da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_LISTENING;
2278463c84b9SArnaldo Carvalho de Melo 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
22791da177e4SLinus Torvalds 	} else {
2280463c84b9SArnaldo Carvalho de Melo 		icsk = inet_csk(sk);
2281463c84b9SArnaldo Carvalho de Melo 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2282463c84b9SArnaldo Carvalho de Melo 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
22831da177e4SLinus Torvalds 			goto start_req;
2284463c84b9SArnaldo Carvalho de Melo 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
22851bde5ac4SEric Dumazet 		sk = sk_nulls_next(sk);
22861da177e4SLinus Torvalds 	}
22871da177e4SLinus Torvalds get_sk:
2288c25eb3bfSEric Dumazet 	sk_nulls_for_each_from(sk, node) {
22898475ef9fSPavel Emelyanov 		if (!net_eq(sock_net(sk), net))
22908475ef9fSPavel Emelyanov 			continue;
22918475ef9fSPavel Emelyanov 		if (sk->sk_family == st->family) {
22921da177e4SLinus Torvalds 			cur = sk;
22931da177e4SLinus Torvalds 			goto out;
22941da177e4SLinus Torvalds 		}
2295463c84b9SArnaldo Carvalho de Melo 		icsk = inet_csk(sk);
2296463c84b9SArnaldo Carvalho de Melo 		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2297463c84b9SArnaldo Carvalho de Melo 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
22981da177e4SLinus Torvalds start_req:
22991da177e4SLinus Torvalds 			st->uid		= sock_i_uid(sk);
23001da177e4SLinus Torvalds 			st->syn_wait_sk = sk;
23011da177e4SLinus Torvalds 			st->state	= TCP_SEQ_STATE_OPENREQ;
23021da177e4SLinus Torvalds 			st->sbucket	= 0;
23031da177e4SLinus Torvalds 			goto get_req;
23041da177e4SLinus Torvalds 		}
2305463c84b9SArnaldo Carvalho de Melo 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
23061da177e4SLinus Torvalds 	}
23075caea4eaSEric Dumazet 	spin_unlock_bh(&ilb->lock);
2308a8b690f9STom Herbert 	st->offset = 0;
23090f7ff927SArnaldo Carvalho de Melo 	if (++st->bucket < INET_LHTABLE_SIZE) {
23105caea4eaSEric Dumazet 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
23115caea4eaSEric Dumazet 		spin_lock_bh(&ilb->lock);
2312c25eb3bfSEric Dumazet 		sk = sk_nulls_head(&ilb->head);
23131da177e4SLinus Torvalds 		goto get_sk;
23141da177e4SLinus Torvalds 	}
23151da177e4SLinus Torvalds 	cur = NULL;
23161da177e4SLinus Torvalds out:
23171da177e4SLinus Torvalds 	return cur;
23181da177e4SLinus Torvalds }
23191da177e4SLinus Torvalds 
23201da177e4SLinus Torvalds static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
23211da177e4SLinus Torvalds {
2322a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
2323a8b690f9STom Herbert 	void *rc;
2324a8b690f9STom Herbert 
2325a8b690f9STom Herbert 	st->bucket = 0;
2326a8b690f9STom Herbert 	st->offset = 0;
2327a8b690f9STom Herbert 	rc = listening_get_next(seq, NULL);
23281da177e4SLinus Torvalds 
23291da177e4SLinus Torvalds 	while (rc && *pos) {
23301da177e4SLinus Torvalds 		rc = listening_get_next(seq, rc);
23311da177e4SLinus Torvalds 		--*pos;
23321da177e4SLinus Torvalds 	}
23331da177e4SLinus Torvalds 	return rc;
23341da177e4SLinus Torvalds }
23351da177e4SLinus Torvalds 
2336a2a385d6SEric Dumazet static inline bool empty_bucket(struct tcp_iter_state *st)
23376eac5604SAndi Kleen {
23383ab5aee7SEric Dumazet 	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
23393ab5aee7SEric Dumazet 		hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
23406eac5604SAndi Kleen }
23416eac5604SAndi Kleen 
2342a8b690f9STom Herbert /*
2343a8b690f9STom Herbert  * Get first established socket starting from bucket given in st->bucket.
2344a8b690f9STom Herbert  * If st->bucket is zero, the very first socket in the hash is returned.
2345a8b690f9STom Herbert  */
23461da177e4SLinus Torvalds static void *established_get_first(struct seq_file *seq)
23471da177e4SLinus Torvalds {
23481da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
2349a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
23501da177e4SLinus Torvalds 	void *rc = NULL;
23511da177e4SLinus Torvalds 
2352a8b690f9STom Herbert 	st->offset = 0;
2353a8b690f9STom Herbert 	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
23541da177e4SLinus Torvalds 		struct sock *sk;
23553ab5aee7SEric Dumazet 		struct hlist_nulls_node *node;
23568feaf0c0SArnaldo Carvalho de Melo 		struct inet_timewait_sock *tw;
23579db66bdcSEric Dumazet 		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
23581da177e4SLinus Torvalds 
23596eac5604SAndi Kleen 		/* Lockless fast path for the common case of empty buckets */
23606eac5604SAndi Kleen 		if (empty_bucket(st))
23616eac5604SAndi Kleen 			continue;
23626eac5604SAndi Kleen 
23639db66bdcSEric Dumazet 		spin_lock_bh(lock);
23643ab5aee7SEric Dumazet 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2365f40c8174SDaniel Lezcano 			if (sk->sk_family != st->family ||
2366878628fbSYOSHIFUJI Hideaki 			    !net_eq(sock_net(sk), net)) {
23671da177e4SLinus Torvalds 				continue;
23681da177e4SLinus Torvalds 			}
23691da177e4SLinus Torvalds 			rc = sk;
23701da177e4SLinus Torvalds 			goto out;
23711da177e4SLinus Torvalds 		}
23721da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_TIME_WAIT;
23738feaf0c0SArnaldo Carvalho de Melo 		inet_twsk_for_each(tw, node,
2374dbca9b27SEric Dumazet 				   &tcp_hashinfo.ehash[st->bucket].twchain) {
237528518fc1SPavel Emelyanov 			if (tw->tw_family != st->family ||
2376878628fbSYOSHIFUJI Hideaki 			    !net_eq(twsk_net(tw), net)) {
23771da177e4SLinus Torvalds 				continue;
23781da177e4SLinus Torvalds 			}
23791da177e4SLinus Torvalds 			rc = tw;
23801da177e4SLinus Torvalds 			goto out;
23811da177e4SLinus Torvalds 		}
23829db66bdcSEric Dumazet 		spin_unlock_bh(lock);
23831da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_ESTABLISHED;
23841da177e4SLinus Torvalds 	}
23851da177e4SLinus Torvalds out:
23861da177e4SLinus Torvalds 	return rc;
23871da177e4SLinus Torvalds }
23881da177e4SLinus Torvalds 
23891da177e4SLinus Torvalds static void *established_get_next(struct seq_file *seq, void *cur)
23901da177e4SLinus Torvalds {
23911da177e4SLinus Torvalds 	struct sock *sk = cur;
23928feaf0c0SArnaldo Carvalho de Melo 	struct inet_timewait_sock *tw;
23933ab5aee7SEric Dumazet 	struct hlist_nulls_node *node;
23941da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
2395a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
23961da177e4SLinus Torvalds 
23971da177e4SLinus Torvalds 	++st->num;
2398a8b690f9STom Herbert 	++st->offset;
23991da177e4SLinus Torvalds 
24001da177e4SLinus Torvalds 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
24011da177e4SLinus Torvalds 		tw = cur;
24021da177e4SLinus Torvalds 		tw = tw_next(tw);
24031da177e4SLinus Torvalds get_tw:
2404878628fbSYOSHIFUJI Hideaki 		while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
24051da177e4SLinus Torvalds 			tw = tw_next(tw);
24061da177e4SLinus Torvalds 		}
24071da177e4SLinus Torvalds 		if (tw) {
24081da177e4SLinus Torvalds 			cur = tw;
24091da177e4SLinus Torvalds 			goto out;
24101da177e4SLinus Torvalds 		}
24119db66bdcSEric Dumazet 		spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
24121da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_ESTABLISHED;
24131da177e4SLinus Torvalds 
24146eac5604SAndi Kleen 		/* Look for next non empty bucket */
2415a8b690f9STom Herbert 		st->offset = 0;
2416f373b53bSEric Dumazet 		while (++st->bucket <= tcp_hashinfo.ehash_mask &&
24176eac5604SAndi Kleen 				empty_bucket(st))
24186eac5604SAndi Kleen 			;
2419f373b53bSEric Dumazet 		if (st->bucket > tcp_hashinfo.ehash_mask)
24206eac5604SAndi Kleen 			return NULL;
24216eac5604SAndi Kleen 
24229db66bdcSEric Dumazet 		spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
24233ab5aee7SEric Dumazet 		sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
24241da177e4SLinus Torvalds 	} else
24253ab5aee7SEric Dumazet 		sk = sk_nulls_next(sk);
24261da177e4SLinus Torvalds 
24273ab5aee7SEric Dumazet 	sk_nulls_for_each_from(sk, node) {
2428878628fbSYOSHIFUJI Hideaki 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
24291da177e4SLinus Torvalds 			goto found;
24301da177e4SLinus Torvalds 	}
24311da177e4SLinus Torvalds 
24321da177e4SLinus Torvalds 	st->state = TCP_SEQ_STATE_TIME_WAIT;
2433dbca9b27SEric Dumazet 	tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
24341da177e4SLinus Torvalds 	goto get_tw;
24351da177e4SLinus Torvalds found:
24361da177e4SLinus Torvalds 	cur = sk;
24371da177e4SLinus Torvalds out:
24381da177e4SLinus Torvalds 	return cur;
24391da177e4SLinus Torvalds }
24401da177e4SLinus Torvalds 
24411da177e4SLinus Torvalds static void *established_get_idx(struct seq_file *seq, loff_t pos)
24421da177e4SLinus Torvalds {
2443a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
2444a8b690f9STom Herbert 	void *rc;
2445a8b690f9STom Herbert 
2446a8b690f9STom Herbert 	st->bucket = 0;
2447a8b690f9STom Herbert 	rc = established_get_first(seq);
24481da177e4SLinus Torvalds 
24491da177e4SLinus Torvalds 	while (rc && pos) {
24501da177e4SLinus Torvalds 		rc = established_get_next(seq, rc);
24511da177e4SLinus Torvalds 		--pos;
24521da177e4SLinus Torvalds 	}
24531da177e4SLinus Torvalds 	return rc;
24541da177e4SLinus Torvalds }
24551da177e4SLinus Torvalds 
24561da177e4SLinus Torvalds static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
24571da177e4SLinus Torvalds {
24581da177e4SLinus Torvalds 	void *rc;
24591da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
24601da177e4SLinus Torvalds 
24611da177e4SLinus Torvalds 	st->state = TCP_SEQ_STATE_LISTENING;
24621da177e4SLinus Torvalds 	rc	  = listening_get_idx(seq, &pos);
24631da177e4SLinus Torvalds 
24641da177e4SLinus Torvalds 	if (!rc) {
24651da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_ESTABLISHED;
24661da177e4SLinus Torvalds 		rc	  = established_get_idx(seq, pos);
24671da177e4SLinus Torvalds 	}
24681da177e4SLinus Torvalds 
24691da177e4SLinus Torvalds 	return rc;
24701da177e4SLinus Torvalds }
24711da177e4SLinus Torvalds 
2472a8b690f9STom Herbert static void *tcp_seek_last_pos(struct seq_file *seq)
2473a8b690f9STom Herbert {
2474a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
2475a8b690f9STom Herbert 	int offset = st->offset;
2476a8b690f9STom Herbert 	int orig_num = st->num;
2477a8b690f9STom Herbert 	void *rc = NULL;
2478a8b690f9STom Herbert 
2479a8b690f9STom Herbert 	switch (st->state) {
2480a8b690f9STom Herbert 	case TCP_SEQ_STATE_OPENREQ:
2481a8b690f9STom Herbert 	case TCP_SEQ_STATE_LISTENING:
2482a8b690f9STom Herbert 		if (st->bucket >= INET_LHTABLE_SIZE)
2483a8b690f9STom Herbert 			break;
2484a8b690f9STom Herbert 		st->state = TCP_SEQ_STATE_LISTENING;
2485a8b690f9STom Herbert 		rc = listening_get_next(seq, NULL);
2486a8b690f9STom Herbert 		while (offset-- && rc)
2487a8b690f9STom Herbert 			rc = listening_get_next(seq, rc);
2488a8b690f9STom Herbert 		if (rc)
2489a8b690f9STom Herbert 			break;
2490a8b690f9STom Herbert 		st->bucket = 0;
2491a8b690f9STom Herbert 		/* Fallthrough */
2492a8b690f9STom Herbert 	case TCP_SEQ_STATE_ESTABLISHED:
2493a8b690f9STom Herbert 	case TCP_SEQ_STATE_TIME_WAIT:
2494a8b690f9STom Herbert 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2495a8b690f9STom Herbert 		if (st->bucket > tcp_hashinfo.ehash_mask)
2496a8b690f9STom Herbert 			break;
2497a8b690f9STom Herbert 		rc = established_get_first(seq);
2498a8b690f9STom Herbert 		while (offset-- && rc)
2499a8b690f9STom Herbert 			rc = established_get_next(seq, rc);
2500a8b690f9STom Herbert 	}
2501a8b690f9STom Herbert 
2502a8b690f9STom Herbert 	st->num = orig_num;
2503a8b690f9STom Herbert 
2504a8b690f9STom Herbert 	return rc;
2505a8b690f9STom Herbert }
2506a8b690f9STom Herbert 
25071da177e4SLinus Torvalds static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
25081da177e4SLinus Torvalds {
25091da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
2510a8b690f9STom Herbert 	void *rc;
2511a8b690f9STom Herbert 
2512a8b690f9STom Herbert 	if (*pos && *pos == st->last_pos) {
2513a8b690f9STom Herbert 		rc = tcp_seek_last_pos(seq);
2514a8b690f9STom Herbert 		if (rc)
2515a8b690f9STom Herbert 			goto out;
2516a8b690f9STom Herbert 	}
2517a8b690f9STom Herbert 
25181da177e4SLinus Torvalds 	st->state = TCP_SEQ_STATE_LISTENING;
25191da177e4SLinus Torvalds 	st->num = 0;
2520a8b690f9STom Herbert 	st->bucket = 0;
2521a8b690f9STom Herbert 	st->offset = 0;
2522a8b690f9STom Herbert 	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2523a8b690f9STom Herbert 
2524a8b690f9STom Herbert out:
2525a8b690f9STom Herbert 	st->last_pos = *pos;
2526a8b690f9STom Herbert 	return rc;
25271da177e4SLinus Torvalds }
25281da177e4SLinus Torvalds 
25291da177e4SLinus Torvalds static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
25301da177e4SLinus Torvalds {
2531a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
25321da177e4SLinus Torvalds 	void *rc = NULL;
25331da177e4SLinus Torvalds 
25341da177e4SLinus Torvalds 	if (v == SEQ_START_TOKEN) {
25351da177e4SLinus Torvalds 		rc = tcp_get_idx(seq, 0);
25361da177e4SLinus Torvalds 		goto out;
25371da177e4SLinus Torvalds 	}
25381da177e4SLinus Torvalds 
25391da177e4SLinus Torvalds 	switch (st->state) {
25401da177e4SLinus Torvalds 	case TCP_SEQ_STATE_OPENREQ:
25411da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
25421da177e4SLinus Torvalds 		rc = listening_get_next(seq, v);
25431da177e4SLinus Torvalds 		if (!rc) {
25441da177e4SLinus Torvalds 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2545a8b690f9STom Herbert 			st->bucket = 0;
2546a8b690f9STom Herbert 			st->offset = 0;
25471da177e4SLinus Torvalds 			rc	  = established_get_first(seq);
25481da177e4SLinus Torvalds 		}
25491da177e4SLinus Torvalds 		break;
25501da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
25511da177e4SLinus Torvalds 	case TCP_SEQ_STATE_TIME_WAIT:
25521da177e4SLinus Torvalds 		rc = established_get_next(seq, v);
25531da177e4SLinus Torvalds 		break;
25541da177e4SLinus Torvalds 	}
25551da177e4SLinus Torvalds out:
25561da177e4SLinus Torvalds 	++*pos;
2557a8b690f9STom Herbert 	st->last_pos = *pos;
25581da177e4SLinus Torvalds 	return rc;
25591da177e4SLinus Torvalds }
25601da177e4SLinus Torvalds 
25611da177e4SLinus Torvalds static void tcp_seq_stop(struct seq_file *seq, void *v)
25621da177e4SLinus Torvalds {
25631da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
25641da177e4SLinus Torvalds 
25651da177e4SLinus Torvalds 	switch (st->state) {
25661da177e4SLinus Torvalds 	case TCP_SEQ_STATE_OPENREQ:
25671da177e4SLinus Torvalds 		if (v) {
2568463c84b9SArnaldo Carvalho de Melo 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2569463c84b9SArnaldo Carvalho de Melo 			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
25701da177e4SLinus Torvalds 		}
25711da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
25721da177e4SLinus Torvalds 		if (v != SEQ_START_TOKEN)
25735caea4eaSEric Dumazet 			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
25741da177e4SLinus Torvalds 		break;
25751da177e4SLinus Torvalds 	case TCP_SEQ_STATE_TIME_WAIT:
25761da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
25771da177e4SLinus Torvalds 		if (v)
25789db66bdcSEric Dumazet 			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
25791da177e4SLinus Torvalds 		break;
25801da177e4SLinus Torvalds 	}
25811da177e4SLinus Torvalds }
25821da177e4SLinus Torvalds 
258373cb88ecSArjan van de Ven int tcp_seq_open(struct inode *inode, struct file *file)
25841da177e4SLinus Torvalds {
25851da177e4SLinus Torvalds 	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
25861da177e4SLinus Torvalds 	struct tcp_iter_state *s;
258752d6f3f1SDenis V. Lunev 	int err;
25881da177e4SLinus Torvalds 
258952d6f3f1SDenis V. Lunev 	err = seq_open_net(inode, file, &afinfo->seq_ops,
259052d6f3f1SDenis V. Lunev 			  sizeof(struct tcp_iter_state));
259152d6f3f1SDenis V. Lunev 	if (err < 0)
259252d6f3f1SDenis V. Lunev 		return err;
2593f40c8174SDaniel Lezcano 
259452d6f3f1SDenis V. Lunev 	s = ((struct seq_file *)file->private_data)->private;
25951da177e4SLinus Torvalds 	s->family		= afinfo->family;
2596a8b690f9STom Herbert 	s->last_pos 		= 0;
2597f40c8174SDaniel Lezcano 	return 0;
2598f40c8174SDaniel Lezcano }
259973cb88ecSArjan van de Ven EXPORT_SYMBOL(tcp_seq_open);
2600f40c8174SDaniel Lezcano 
26016f8b13bcSDaniel Lezcano int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
26021da177e4SLinus Torvalds {
26031da177e4SLinus Torvalds 	int rc = 0;
26041da177e4SLinus Torvalds 	struct proc_dir_entry *p;
26051da177e4SLinus Torvalds 
26069427c4b3SDenis V. Lunev 	afinfo->seq_ops.start		= tcp_seq_start;
26079427c4b3SDenis V. Lunev 	afinfo->seq_ops.next		= tcp_seq_next;
26089427c4b3SDenis V. Lunev 	afinfo->seq_ops.stop		= tcp_seq_stop;
26099427c4b3SDenis V. Lunev 
261084841c3cSDenis V. Lunev 	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
261173cb88ecSArjan van de Ven 			     afinfo->seq_fops, afinfo);
261284841c3cSDenis V. Lunev 	if (!p)
26131da177e4SLinus Torvalds 		rc = -ENOMEM;
26141da177e4SLinus Torvalds 	return rc;
26151da177e4SLinus Torvalds }
26164bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_register);
26171da177e4SLinus Torvalds 
26186f8b13bcSDaniel Lezcano void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
26191da177e4SLinus Torvalds {
26206f8b13bcSDaniel Lezcano 	proc_net_remove(net, afinfo->name);
26211da177e4SLinus Torvalds }
26224bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_unregister);
26231da177e4SLinus Torvalds 
2624cf533ea5SEric Dumazet static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2625a7cb5a49SEric W. Biederman 			 struct seq_file *f, int i, kuid_t uid, int *len)
26261da177e4SLinus Torvalds {
26272e6599cbSArnaldo Carvalho de Melo 	const struct inet_request_sock *ireq = inet_rsk(req);
2628a399a805SEric Dumazet 	long delta = req->expires - jiffies;
26291da177e4SLinus Torvalds 
26305e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
263171338aa7SDan Rosenberg 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
26321da177e4SLinus Torvalds 		i,
26332e6599cbSArnaldo Carvalho de Melo 		ireq->loc_addr,
2634c720c7e8SEric Dumazet 		ntohs(inet_sk(sk)->inet_sport),
26352e6599cbSArnaldo Carvalho de Melo 		ireq->rmt_addr,
26362e6599cbSArnaldo Carvalho de Melo 		ntohs(ireq->rmt_port),
26371da177e4SLinus Torvalds 		TCP_SYN_RECV,
26381da177e4SLinus Torvalds 		0, 0, /* could print option size, but that is af dependent. */
26391da177e4SLinus Torvalds 		1,    /* timers active (only the expire timer) */
2640a399a805SEric Dumazet 		jiffies_delta_to_clock_t(delta),
26411da177e4SLinus Torvalds 		req->retrans,
2642a7cb5a49SEric W. Biederman 		from_kuid_munged(seq_user_ns(f), uid),
26431da177e4SLinus Torvalds 		0,  /* non standard timer */
26441da177e4SLinus Torvalds 		0, /* open_requests have no inode */
26451da177e4SLinus Torvalds 		atomic_read(&sk->sk_refcnt),
26465e659e4cSPavel Emelyanov 		req,
26475e659e4cSPavel Emelyanov 		len);
26481da177e4SLinus Torvalds }
26491da177e4SLinus Torvalds 
26505e659e4cSPavel Emelyanov static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
26511da177e4SLinus Torvalds {
26521da177e4SLinus Torvalds 	int timer_active;
26531da177e4SLinus Torvalds 	unsigned long timer_expires;
2654cf533ea5SEric Dumazet 	const struct tcp_sock *tp = tcp_sk(sk);
2655cf4c6bf8SIlpo Järvinen 	const struct inet_connection_sock *icsk = inet_csk(sk);
2656cf533ea5SEric Dumazet 	const struct inet_sock *inet = inet_sk(sk);
2657168a8f58SJerry Chu 	struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
2658c720c7e8SEric Dumazet 	__be32 dest = inet->inet_daddr;
2659c720c7e8SEric Dumazet 	__be32 src = inet->inet_rcv_saddr;
2660c720c7e8SEric Dumazet 	__u16 destp = ntohs(inet->inet_dport);
2661c720c7e8SEric Dumazet 	__u16 srcp = ntohs(inet->inet_sport);
266249d09007SEric Dumazet 	int rx_queue;
26631da177e4SLinus Torvalds 
2664463c84b9SArnaldo Carvalho de Melo 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
26651da177e4SLinus Torvalds 		timer_active	= 1;
2666463c84b9SArnaldo Carvalho de Melo 		timer_expires	= icsk->icsk_timeout;
2667463c84b9SArnaldo Carvalho de Melo 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
26681da177e4SLinus Torvalds 		timer_active	= 4;
2669463c84b9SArnaldo Carvalho de Melo 		timer_expires	= icsk->icsk_timeout;
2670cf4c6bf8SIlpo Järvinen 	} else if (timer_pending(&sk->sk_timer)) {
26711da177e4SLinus Torvalds 		timer_active	= 2;
2672cf4c6bf8SIlpo Järvinen 		timer_expires	= sk->sk_timer.expires;
26731da177e4SLinus Torvalds 	} else {
26741da177e4SLinus Torvalds 		timer_active	= 0;
26751da177e4SLinus Torvalds 		timer_expires = jiffies;
26761da177e4SLinus Torvalds 	}
26771da177e4SLinus Torvalds 
267849d09007SEric Dumazet 	if (sk->sk_state == TCP_LISTEN)
267949d09007SEric Dumazet 		rx_queue = sk->sk_ack_backlog;
268049d09007SEric Dumazet 	else
268149d09007SEric Dumazet 		/*
268249d09007SEric Dumazet 		 * because we dont lock socket, we might find a transient negative value
268349d09007SEric Dumazet 		 */
268449d09007SEric Dumazet 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
268549d09007SEric Dumazet 
26865e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
268771338aa7SDan Rosenberg 			"%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
2688cf4c6bf8SIlpo Järvinen 		i, src, srcp, dest, destp, sk->sk_state,
268947da8ee6SSridhar Samudrala 		tp->write_seq - tp->snd_una,
269049d09007SEric Dumazet 		rx_queue,
26911da177e4SLinus Torvalds 		timer_active,
2692a399a805SEric Dumazet 		jiffies_delta_to_clock_t(timer_expires - jiffies),
2693463c84b9SArnaldo Carvalho de Melo 		icsk->icsk_retransmits,
2694a7cb5a49SEric W. Biederman 		from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
26956687e988SArnaldo Carvalho de Melo 		icsk->icsk_probes_out,
2696cf4c6bf8SIlpo Järvinen 		sock_i_ino(sk),
2697cf4c6bf8SIlpo Järvinen 		atomic_read(&sk->sk_refcnt), sk,
26987be87351SStephen Hemminger 		jiffies_to_clock_t(icsk->icsk_rto),
26997be87351SStephen Hemminger 		jiffies_to_clock_t(icsk->icsk_ack.ato),
2700463c84b9SArnaldo Carvalho de Melo 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
27011da177e4SLinus Torvalds 		tp->snd_cwnd,
2702168a8f58SJerry Chu 		sk->sk_state == TCP_LISTEN ?
2703168a8f58SJerry Chu 		    (fastopenq ? fastopenq->max_qlen : 0) :
2704168a8f58SJerry Chu 		    (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh),
27055e659e4cSPavel Emelyanov 		len);
27061da177e4SLinus Torvalds }
27071da177e4SLinus Torvalds 
2708cf533ea5SEric Dumazet static void get_timewait4_sock(const struct inet_timewait_sock *tw,
27095e659e4cSPavel Emelyanov 			       struct seq_file *f, int i, int *len)
27101da177e4SLinus Torvalds {
271123f33c2dSAl Viro 	__be32 dest, src;
27121da177e4SLinus Torvalds 	__u16 destp, srcp;
2713a399a805SEric Dumazet 	long delta = tw->tw_ttd - jiffies;
27141da177e4SLinus Torvalds 
27151da177e4SLinus Torvalds 	dest  = tw->tw_daddr;
27161da177e4SLinus Torvalds 	src   = tw->tw_rcv_saddr;
27171da177e4SLinus Torvalds 	destp = ntohs(tw->tw_dport);
27181da177e4SLinus Torvalds 	srcp  = ntohs(tw->tw_sport);
27191da177e4SLinus Torvalds 
27205e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
272171338aa7SDan Rosenberg 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
27221da177e4SLinus Torvalds 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2723a399a805SEric Dumazet 		3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
27245e659e4cSPavel Emelyanov 		atomic_read(&tw->tw_refcnt), tw, len);
27251da177e4SLinus Torvalds }
27261da177e4SLinus Torvalds 
27271da177e4SLinus Torvalds #define TMPSZ 150
27281da177e4SLinus Torvalds 
27291da177e4SLinus Torvalds static int tcp4_seq_show(struct seq_file *seq, void *v)
27301da177e4SLinus Torvalds {
27311da177e4SLinus Torvalds 	struct tcp_iter_state *st;
27325e659e4cSPavel Emelyanov 	int len;
27331da177e4SLinus Torvalds 
27341da177e4SLinus Torvalds 	if (v == SEQ_START_TOKEN) {
27351da177e4SLinus Torvalds 		seq_printf(seq, "%-*s\n", TMPSZ - 1,
27361da177e4SLinus Torvalds 			   "  sl  local_address rem_address   st tx_queue "
27371da177e4SLinus Torvalds 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
27381da177e4SLinus Torvalds 			   "inode");
27391da177e4SLinus Torvalds 		goto out;
27401da177e4SLinus Torvalds 	}
27411da177e4SLinus Torvalds 	st = seq->private;
27421da177e4SLinus Torvalds 
27431da177e4SLinus Torvalds 	switch (st->state) {
27441da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
27451da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
27465e659e4cSPavel Emelyanov 		get_tcp4_sock(v, seq, st->num, &len);
27471da177e4SLinus Torvalds 		break;
27481da177e4SLinus Torvalds 	case TCP_SEQ_STATE_OPENREQ:
27495e659e4cSPavel Emelyanov 		get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
27501da177e4SLinus Torvalds 		break;
27511da177e4SLinus Torvalds 	case TCP_SEQ_STATE_TIME_WAIT:
27525e659e4cSPavel Emelyanov 		get_timewait4_sock(v, seq, st->num, &len);
27531da177e4SLinus Torvalds 		break;
27541da177e4SLinus Torvalds 	}
27555e659e4cSPavel Emelyanov 	seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
27561da177e4SLinus Torvalds out:
27571da177e4SLinus Torvalds 	return 0;
27581da177e4SLinus Torvalds }
27591da177e4SLinus Torvalds 
276073cb88ecSArjan van de Ven static const struct file_operations tcp_afinfo_seq_fops = {
276173cb88ecSArjan van de Ven 	.owner   = THIS_MODULE,
276273cb88ecSArjan van de Ven 	.open    = tcp_seq_open,
276373cb88ecSArjan van de Ven 	.read    = seq_read,
276473cb88ecSArjan van de Ven 	.llseek  = seq_lseek,
276573cb88ecSArjan van de Ven 	.release = seq_release_net
276673cb88ecSArjan van de Ven };
276773cb88ecSArjan van de Ven 
27681da177e4SLinus Torvalds static struct tcp_seq_afinfo tcp4_seq_afinfo = {
27691da177e4SLinus Torvalds 	.name		= "tcp",
27701da177e4SLinus Torvalds 	.family		= AF_INET,
277173cb88ecSArjan van de Ven 	.seq_fops	= &tcp_afinfo_seq_fops,
27729427c4b3SDenis V. Lunev 	.seq_ops	= {
27739427c4b3SDenis V. Lunev 		.show		= tcp4_seq_show,
27749427c4b3SDenis V. Lunev 	},
27751da177e4SLinus Torvalds };
27761da177e4SLinus Torvalds 
27772c8c1e72SAlexey Dobriyan static int __net_init tcp4_proc_init_net(struct net *net)
2778757764f6SPavel Emelyanov {
2779757764f6SPavel Emelyanov 	return tcp_proc_register(net, &tcp4_seq_afinfo);
2780757764f6SPavel Emelyanov }
2781757764f6SPavel Emelyanov 
27822c8c1e72SAlexey Dobriyan static void __net_exit tcp4_proc_exit_net(struct net *net)
2783757764f6SPavel Emelyanov {
2784757764f6SPavel Emelyanov 	tcp_proc_unregister(net, &tcp4_seq_afinfo);
2785757764f6SPavel Emelyanov }
2786757764f6SPavel Emelyanov 
2787757764f6SPavel Emelyanov static struct pernet_operations tcp4_net_ops = {
2788757764f6SPavel Emelyanov 	.init = tcp4_proc_init_net,
2789757764f6SPavel Emelyanov 	.exit = tcp4_proc_exit_net,
2790757764f6SPavel Emelyanov };
2791757764f6SPavel Emelyanov 
27921da177e4SLinus Torvalds int __init tcp4_proc_init(void)
27931da177e4SLinus Torvalds {
2794757764f6SPavel Emelyanov 	return register_pernet_subsys(&tcp4_net_ops);
27951da177e4SLinus Torvalds }
27961da177e4SLinus Torvalds 
27971da177e4SLinus Torvalds void tcp4_proc_exit(void)
27981da177e4SLinus Torvalds {
2799757764f6SPavel Emelyanov 	unregister_pernet_subsys(&tcp4_net_ops);
28001da177e4SLinus Torvalds }
28011da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */
28021da177e4SLinus Torvalds 
2803bf296b12SHerbert Xu struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2804bf296b12SHerbert Xu {
2805b71d1d42SEric Dumazet 	const struct iphdr *iph = skb_gro_network_header(skb);
2806861b6501SEric Dumazet 	__wsum wsum;
2807861b6501SEric Dumazet 	__sum16 sum;
2808bf296b12SHerbert Xu 
2809bf296b12SHerbert Xu 	switch (skb->ip_summed) {
2810bf296b12SHerbert Xu 	case CHECKSUM_COMPLETE:
281186911732SHerbert Xu 		if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2812bf296b12SHerbert Xu 				  skb->csum)) {
2813bf296b12SHerbert Xu 			skb->ip_summed = CHECKSUM_UNNECESSARY;
2814bf296b12SHerbert Xu 			break;
2815bf296b12SHerbert Xu 		}
2816861b6501SEric Dumazet flush:
2817bf296b12SHerbert Xu 		NAPI_GRO_CB(skb)->flush = 1;
2818bf296b12SHerbert Xu 		return NULL;
2819861b6501SEric Dumazet 
2820861b6501SEric Dumazet 	case CHECKSUM_NONE:
2821861b6501SEric Dumazet 		wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
2822861b6501SEric Dumazet 					  skb_gro_len(skb), IPPROTO_TCP, 0);
2823861b6501SEric Dumazet 		sum = csum_fold(skb_checksum(skb,
2824861b6501SEric Dumazet 					     skb_gro_offset(skb),
2825861b6501SEric Dumazet 					     skb_gro_len(skb),
2826861b6501SEric Dumazet 					     wsum));
2827861b6501SEric Dumazet 		if (sum)
2828861b6501SEric Dumazet 			goto flush;
2829861b6501SEric Dumazet 
2830861b6501SEric Dumazet 		skb->ip_summed = CHECKSUM_UNNECESSARY;
2831861b6501SEric Dumazet 		break;
2832bf296b12SHerbert Xu 	}
2833bf296b12SHerbert Xu 
2834bf296b12SHerbert Xu 	return tcp_gro_receive(head, skb);
2835bf296b12SHerbert Xu }
2836bf296b12SHerbert Xu 
2837bf296b12SHerbert Xu int tcp4_gro_complete(struct sk_buff *skb)
2838bf296b12SHerbert Xu {
2839b71d1d42SEric Dumazet 	const struct iphdr *iph = ip_hdr(skb);
2840bf296b12SHerbert Xu 	struct tcphdr *th = tcp_hdr(skb);
2841bf296b12SHerbert Xu 
2842bf296b12SHerbert Xu 	th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2843bf296b12SHerbert Xu 				  iph->saddr, iph->daddr, 0);
2844bf296b12SHerbert Xu 	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2845bf296b12SHerbert Xu 
2846bf296b12SHerbert Xu 	return tcp_gro_complete(skb);
2847bf296b12SHerbert Xu }
2848bf296b12SHerbert Xu 
28491da177e4SLinus Torvalds struct proto tcp_prot = {
28501da177e4SLinus Torvalds 	.name			= "TCP",
28511da177e4SLinus Torvalds 	.owner			= THIS_MODULE,
28521da177e4SLinus Torvalds 	.close			= tcp_close,
28531da177e4SLinus Torvalds 	.connect		= tcp_v4_connect,
28541da177e4SLinus Torvalds 	.disconnect		= tcp_disconnect,
2855463c84b9SArnaldo Carvalho de Melo 	.accept			= inet_csk_accept,
28561da177e4SLinus Torvalds 	.ioctl			= tcp_ioctl,
28571da177e4SLinus Torvalds 	.init			= tcp_v4_init_sock,
28581da177e4SLinus Torvalds 	.destroy		= tcp_v4_destroy_sock,
28591da177e4SLinus Torvalds 	.shutdown		= tcp_shutdown,
28601da177e4SLinus Torvalds 	.setsockopt		= tcp_setsockopt,
28611da177e4SLinus Torvalds 	.getsockopt		= tcp_getsockopt,
28621da177e4SLinus Torvalds 	.recvmsg		= tcp_recvmsg,
28637ba42910SChangli Gao 	.sendmsg		= tcp_sendmsg,
28647ba42910SChangli Gao 	.sendpage		= tcp_sendpage,
28651da177e4SLinus Torvalds 	.backlog_rcv		= tcp_v4_do_rcv,
286646d3ceabSEric Dumazet 	.release_cb		= tcp_release_cb,
2867563d34d0SEric Dumazet 	.mtu_reduced		= tcp_v4_mtu_reduced,
2868ab1e0a13SArnaldo Carvalho de Melo 	.hash			= inet_hash,
2869ab1e0a13SArnaldo Carvalho de Melo 	.unhash			= inet_unhash,
2870ab1e0a13SArnaldo Carvalho de Melo 	.get_port		= inet_csk_get_port,
28711da177e4SLinus Torvalds 	.enter_memory_pressure	= tcp_enter_memory_pressure,
28721da177e4SLinus Torvalds 	.sockets_allocated	= &tcp_sockets_allocated,
28730a5578cfSArnaldo Carvalho de Melo 	.orphan_count		= &tcp_orphan_count,
28741da177e4SLinus Torvalds 	.memory_allocated	= &tcp_memory_allocated,
28751da177e4SLinus Torvalds 	.memory_pressure	= &tcp_memory_pressure,
28761da177e4SLinus Torvalds 	.sysctl_wmem		= sysctl_tcp_wmem,
28771da177e4SLinus Torvalds 	.sysctl_rmem		= sysctl_tcp_rmem,
28781da177e4SLinus Torvalds 	.max_header		= MAX_TCP_HEADER,
28791da177e4SLinus Torvalds 	.obj_size		= sizeof(struct tcp_sock),
28803ab5aee7SEric Dumazet 	.slab_flags		= SLAB_DESTROY_BY_RCU,
28816d6ee43eSArnaldo Carvalho de Melo 	.twsk_prot		= &tcp_timewait_sock_ops,
288260236fddSArnaldo Carvalho de Melo 	.rsk_prot		= &tcp_request_sock_ops,
288339d8cda7SPavel Emelyanov 	.h.hashinfo		= &tcp_hashinfo,
28847ba42910SChangli Gao 	.no_autobind		= true,
2885543d9cfeSArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT
2886543d9cfeSArnaldo Carvalho de Melo 	.compat_setsockopt	= compat_tcp_setsockopt,
2887543d9cfeSArnaldo Carvalho de Melo 	.compat_getsockopt	= compat_tcp_getsockopt,
2888543d9cfeSArnaldo Carvalho de Melo #endif
2889c255a458SAndrew Morton #ifdef CONFIG_MEMCG_KMEM
2890d1a4c0b3SGlauber Costa 	.init_cgroup		= tcp_init_cgroup,
2891d1a4c0b3SGlauber Costa 	.destroy_cgroup		= tcp_destroy_cgroup,
2892d1a4c0b3SGlauber Costa 	.proto_cgroup		= tcp_proto_cgroup,
2893d1a4c0b3SGlauber Costa #endif
28941da177e4SLinus Torvalds };
28954bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_prot);
28961da177e4SLinus Torvalds 
2897046ee902SDenis V. Lunev static int __net_init tcp_sk_init(struct net *net)
2898046ee902SDenis V. Lunev {
2899be9f4a44SEric Dumazet 	return 0;
2900046ee902SDenis V. Lunev }
2901046ee902SDenis V. Lunev 
2902046ee902SDenis V. Lunev static void __net_exit tcp_sk_exit(struct net *net)
2903046ee902SDenis V. Lunev {
2904b099ce26SEric W. Biederman }
2905b099ce26SEric W. Biederman 
2906b099ce26SEric W. Biederman static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2907b099ce26SEric W. Biederman {
2908b099ce26SEric W. Biederman 	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2909046ee902SDenis V. Lunev }
2910046ee902SDenis V. Lunev 
2911046ee902SDenis V. Lunev static struct pernet_operations __net_initdata tcp_sk_ops = {
2912046ee902SDenis V. Lunev        .init	   = tcp_sk_init,
2913046ee902SDenis V. Lunev        .exit	   = tcp_sk_exit,
2914b099ce26SEric W. Biederman        .exit_batch = tcp_sk_exit_batch,
2915046ee902SDenis V. Lunev };
2916046ee902SDenis V. Lunev 
29179b0f976fSDenis V. Lunev void __init tcp_v4_init(void)
29181da177e4SLinus Torvalds {
29195caea4eaSEric Dumazet 	inet_hashinfo_init(&tcp_hashinfo);
29206a1b3054SEric W. Biederman 	if (register_pernet_subsys(&tcp_sk_ops))
29211da177e4SLinus Torvalds 		panic("Failed to create the TCP control socket.\n");
29221da177e4SLinus Torvalds }
2923