xref: /linux/net/ipv4/tcp_ipv4.c (revision 1f3b359f1004bd34b7b0bad70b93e3c7af92a37b)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
31da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
41da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *		Implementation of the Transmission Control Protocol(TCP).
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  *		IPv4 specific functions
91da177e4SLinus Torvalds  *
101da177e4SLinus Torvalds  *
111da177e4SLinus Torvalds  *		code split from:
121da177e4SLinus Torvalds  *		linux/ipv4/tcp.c
131da177e4SLinus Torvalds  *		linux/ipv4/tcp_input.c
141da177e4SLinus Torvalds  *		linux/ipv4/tcp_output.c
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  *		See tcp.c for author information
171da177e4SLinus Torvalds  *
181da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
191da177e4SLinus Torvalds  *      modify it under the terms of the GNU General Public License
201da177e4SLinus Torvalds  *      as published by the Free Software Foundation; either version
211da177e4SLinus Torvalds  *      2 of the License, or (at your option) any later version.
221da177e4SLinus Torvalds  */
231da177e4SLinus Torvalds 
241da177e4SLinus Torvalds /*
251da177e4SLinus Torvalds  * Changes:
261da177e4SLinus Torvalds  *		David S. Miller	:	New socket lookup architecture.
271da177e4SLinus Torvalds  *					This code is dedicated to John Dyson.
281da177e4SLinus Torvalds  *		David S. Miller :	Change semantics of established hash,
291da177e4SLinus Torvalds  *					half is devoted to TIME_WAIT sockets
301da177e4SLinus Torvalds  *					and the rest go in the other half.
311da177e4SLinus Torvalds  *		Andi Kleen :		Add support for syncookies and fixed
321da177e4SLinus Torvalds  *					some bugs: ip options weren't passed to
331da177e4SLinus Torvalds  *					the TCP layer, missed a check for an
341da177e4SLinus Torvalds  *					ACK bit.
351da177e4SLinus Torvalds  *		Andi Kleen :		Implemented fast path mtu discovery.
361da177e4SLinus Torvalds  *	     				Fixed many serious bugs in the
3760236fddSArnaldo Carvalho de Melo  *					request_sock handling and moved
381da177e4SLinus Torvalds  *					most of it into the af independent code.
391da177e4SLinus Torvalds  *					Added tail drop and some other bugfixes.
40caa20d9aSStephen Hemminger  *					Added new listen semantics.
411da177e4SLinus Torvalds  *		Mike McLagan	:	Routing by source
421da177e4SLinus Torvalds  *	Juan Jose Ciarlante:		ip_dynaddr bits
431da177e4SLinus Torvalds  *		Andi Kleen:		various fixes.
441da177e4SLinus Torvalds  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
451da177e4SLinus Torvalds  *					coma.
461da177e4SLinus Torvalds  *	Andi Kleen		:	Fix new listen.
471da177e4SLinus Torvalds  *	Andi Kleen		:	Fix accept error reporting.
481da177e4SLinus Torvalds  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
491da177e4SLinus Torvalds  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
501da177e4SLinus Torvalds  *					a single port at the same time.
511da177e4SLinus Torvalds  */
521da177e4SLinus Torvalds 
53afd46503SJoe Perches #define pr_fmt(fmt) "TCP: " fmt
541da177e4SLinus Torvalds 
55eb4dea58SHerbert Xu #include <linux/bottom_half.h>
561da177e4SLinus Torvalds #include <linux/types.h>
571da177e4SLinus Torvalds #include <linux/fcntl.h>
581da177e4SLinus Torvalds #include <linux/module.h>
591da177e4SLinus Torvalds #include <linux/random.h>
601da177e4SLinus Torvalds #include <linux/cache.h>
611da177e4SLinus Torvalds #include <linux/jhash.h>
621da177e4SLinus Torvalds #include <linux/init.h>
631da177e4SLinus Torvalds #include <linux/times.h>
645a0e3ad6STejun Heo #include <linux/slab.h>
651da177e4SLinus Torvalds 
66457c4cbcSEric W. Biederman #include <net/net_namespace.h>
671da177e4SLinus Torvalds #include <net/icmp.h>
68304a1618SArnaldo Carvalho de Melo #include <net/inet_hashtables.h>
691da177e4SLinus Torvalds #include <net/tcp.h>
7020380731SArnaldo Carvalho de Melo #include <net/transp_v6.h>
711da177e4SLinus Torvalds #include <net/ipv6.h>
721da177e4SLinus Torvalds #include <net/inet_common.h>
736d6ee43eSArnaldo Carvalho de Melo #include <net/timewait_sock.h>
741da177e4SLinus Torvalds #include <net/xfrm.h>
756e5714eaSDavid S. Miller #include <net/secure_seq.h>
76076bb0c8SEliezer Tamir #include <net/busy_poll.h>
771da177e4SLinus Torvalds 
781da177e4SLinus Torvalds #include <linux/inet.h>
791da177e4SLinus Torvalds #include <linux/ipv6.h>
801da177e4SLinus Torvalds #include <linux/stddef.h>
811da177e4SLinus Torvalds #include <linux/proc_fs.h>
821da177e4SLinus Torvalds #include <linux/seq_file.h>
836797318eSIvan Delalande #include <linux/inetdevice.h>
841da177e4SLinus Torvalds 
85cf80e0e4SHerbert Xu #include <crypto/hash.h>
86cfb6eeb4SYOSHIFUJI Hideaki #include <linux/scatterlist.h>
87cfb6eeb4SYOSHIFUJI Hideaki 
88cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
89a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
90318cf7aaSEric Dumazet 			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
91cfb6eeb4SYOSHIFUJI Hideaki #endif
92cfb6eeb4SYOSHIFUJI Hideaki 
935caea4eaSEric Dumazet struct inet_hashinfo tcp_hashinfo;
944bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_hashinfo);
951da177e4SLinus Torvalds 
9684b114b9SEric Dumazet static u32 tcp_v4_init_seq(const struct sk_buff *skb)
971da177e4SLinus Torvalds {
9884b114b9SEric Dumazet 	return secure_tcp_seq(ip_hdr(skb)->daddr,
99eddc9ec5SArnaldo Carvalho de Melo 			      ip_hdr(skb)->saddr,
100aa8223c7SArnaldo Carvalho de Melo 			      tcp_hdr(skb)->dest,
10184b114b9SEric Dumazet 			      tcp_hdr(skb)->source);
10284b114b9SEric Dumazet }
10384b114b9SEric Dumazet 
1045d2ed052SEric Dumazet static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
10584b114b9SEric Dumazet {
1065d2ed052SEric Dumazet 	return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
1071da177e4SLinus Torvalds }
1081da177e4SLinus Torvalds 
1096d6ee43eSArnaldo Carvalho de Melo int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
1106d6ee43eSArnaldo Carvalho de Melo {
1116d6ee43eSArnaldo Carvalho de Melo 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
1126d6ee43eSArnaldo Carvalho de Melo 	struct tcp_sock *tp = tcp_sk(sk);
1136d6ee43eSArnaldo Carvalho de Melo 
1146d6ee43eSArnaldo Carvalho de Melo 	/* With PAWS, it is safe from the viewpoint
1156d6ee43eSArnaldo Carvalho de Melo 	   of data integrity. Even without PAWS it is safe provided sequence
1166d6ee43eSArnaldo Carvalho de Melo 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
1176d6ee43eSArnaldo Carvalho de Melo 
1186d6ee43eSArnaldo Carvalho de Melo 	   Actually, the idea is close to VJ's one, only timestamp cache is
1196d6ee43eSArnaldo Carvalho de Melo 	   held not per host, but per port pair and TW bucket is used as state
1206d6ee43eSArnaldo Carvalho de Melo 	   holder.
1216d6ee43eSArnaldo Carvalho de Melo 
1226d6ee43eSArnaldo Carvalho de Melo 	   If TW bucket has been already destroyed we fall back to VJ's scheme
1236d6ee43eSArnaldo Carvalho de Melo 	   and use initial timestamp retrieved from peer table.
1246d6ee43eSArnaldo Carvalho de Melo 	 */
1256d6ee43eSArnaldo Carvalho de Melo 	if (tcptw->tw_ts_recent_stamp &&
12656ab6b93SHaishuang Yan 	    (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse &&
1279d729f72SJames Morris 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
1286d6ee43eSArnaldo Carvalho de Melo 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
1296d6ee43eSArnaldo Carvalho de Melo 		if (tp->write_seq == 0)
1306d6ee43eSArnaldo Carvalho de Melo 			tp->write_seq = 1;
1316d6ee43eSArnaldo Carvalho de Melo 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
1326d6ee43eSArnaldo Carvalho de Melo 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
1336d6ee43eSArnaldo Carvalho de Melo 		sock_hold(sktw);
1346d6ee43eSArnaldo Carvalho de Melo 		return 1;
1356d6ee43eSArnaldo Carvalho de Melo 	}
1366d6ee43eSArnaldo Carvalho de Melo 
1376d6ee43eSArnaldo Carvalho de Melo 	return 0;
1386d6ee43eSArnaldo Carvalho de Melo }
1396d6ee43eSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(tcp_twsk_unique);
1406d6ee43eSArnaldo Carvalho de Melo 
1411da177e4SLinus Torvalds /* This will initiate an outgoing connection. */
1421da177e4SLinus Torvalds int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
1431da177e4SLinus Torvalds {
1442d7192d6SDavid S. Miller 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1451da177e4SLinus Torvalds 	struct inet_sock *inet = inet_sk(sk);
1461da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
147dca8b089SDavid S. Miller 	__be16 orig_sport, orig_dport;
148bada8adcSAl Viro 	__be32 daddr, nexthop;
149da905bd1SDavid S. Miller 	struct flowi4 *fl4;
1502d7192d6SDavid S. Miller 	struct rtable *rt;
1511da177e4SLinus Torvalds 	int err;
152f6d8bd05SEric Dumazet 	struct ip_options_rcu *inet_opt;
1531946e672SHaishuang Yan 	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
1541da177e4SLinus Torvalds 
1551da177e4SLinus Torvalds 	if (addr_len < sizeof(struct sockaddr_in))
1561da177e4SLinus Torvalds 		return -EINVAL;
1571da177e4SLinus Torvalds 
1581da177e4SLinus Torvalds 	if (usin->sin_family != AF_INET)
1591da177e4SLinus Torvalds 		return -EAFNOSUPPORT;
1601da177e4SLinus Torvalds 
1611da177e4SLinus Torvalds 	nexthop = daddr = usin->sin_addr.s_addr;
162f6d8bd05SEric Dumazet 	inet_opt = rcu_dereference_protected(inet->inet_opt,
1631e1d04e6SHannes Frederic Sowa 					     lockdep_sock_is_held(sk));
164f6d8bd05SEric Dumazet 	if (inet_opt && inet_opt->opt.srr) {
1651da177e4SLinus Torvalds 		if (!daddr)
1661da177e4SLinus Torvalds 			return -EINVAL;
167f6d8bd05SEric Dumazet 		nexthop = inet_opt->opt.faddr;
1681da177e4SLinus Torvalds 	}
1691da177e4SLinus Torvalds 
170dca8b089SDavid S. Miller 	orig_sport = inet->inet_sport;
171dca8b089SDavid S. Miller 	orig_dport = usin->sin_port;
172da905bd1SDavid S. Miller 	fl4 = &inet->cork.fl.u.ip4;
173da905bd1SDavid S. Miller 	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
1741da177e4SLinus Torvalds 			      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
1751da177e4SLinus Torvalds 			      IPPROTO_TCP,
1760e0d44abSSteffen Klassert 			      orig_sport, orig_dport, sk);
177b23dd4feSDavid S. Miller 	if (IS_ERR(rt)) {
178b23dd4feSDavid S. Miller 		err = PTR_ERR(rt);
179b23dd4feSDavid S. Miller 		if (err == -ENETUNREACH)
180f1d8cba6SEric Dumazet 			IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
181b23dd4feSDavid S. Miller 		return err;
182584bdf8cSWei Dong 	}
1831da177e4SLinus Torvalds 
1841da177e4SLinus Torvalds 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
1851da177e4SLinus Torvalds 		ip_rt_put(rt);
1861da177e4SLinus Torvalds 		return -ENETUNREACH;
1871da177e4SLinus Torvalds 	}
1881da177e4SLinus Torvalds 
189f6d8bd05SEric Dumazet 	if (!inet_opt || !inet_opt->opt.srr)
190da905bd1SDavid S. Miller 		daddr = fl4->daddr;
1911da177e4SLinus Torvalds 
192c720c7e8SEric Dumazet 	if (!inet->inet_saddr)
193da905bd1SDavid S. Miller 		inet->inet_saddr = fl4->saddr;
194d1e559d0SEric Dumazet 	sk_rcv_saddr_set(sk, inet->inet_saddr);
1951da177e4SLinus Torvalds 
196c720c7e8SEric Dumazet 	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1971da177e4SLinus Torvalds 		/* Reset inherited state */
1981da177e4SLinus Torvalds 		tp->rx_opt.ts_recent	   = 0;
1991da177e4SLinus Torvalds 		tp->rx_opt.ts_recent_stamp = 0;
200ee995283SPavel Emelyanov 		if (likely(!tp->repair))
2011da177e4SLinus Torvalds 			tp->write_seq	   = 0;
2021da177e4SLinus Torvalds 	}
2031da177e4SLinus Torvalds 
204c720c7e8SEric Dumazet 	inet->inet_dport = usin->sin_port;
205d1e559d0SEric Dumazet 	sk_daddr_set(sk, daddr);
2061da177e4SLinus Torvalds 
207d83d8461SArnaldo Carvalho de Melo 	inet_csk(sk)->icsk_ext_hdr_len = 0;
208f6d8bd05SEric Dumazet 	if (inet_opt)
209f6d8bd05SEric Dumazet 		inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
2101da177e4SLinus Torvalds 
211bee7ca9eSWilliam Allen Simpson 	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
2121da177e4SLinus Torvalds 
2131da177e4SLinus Torvalds 	/* Socket identity is still unknown (sport may be zero).
2141da177e4SLinus Torvalds 	 * However we set state to SYN-SENT and not releasing socket
2151da177e4SLinus Torvalds 	 * lock select source port, enter ourselves into the hash tables and
2161da177e4SLinus Torvalds 	 * complete initialization after this.
2171da177e4SLinus Torvalds 	 */
2181da177e4SLinus Torvalds 	tcp_set_state(sk, TCP_SYN_SENT);
2191946e672SHaishuang Yan 	err = inet_hash_connect(tcp_death_row, sk);
2201da177e4SLinus Torvalds 	if (err)
2211da177e4SLinus Torvalds 		goto failure;
2221da177e4SLinus Torvalds 
223877d1f62STom Herbert 	sk_set_txhash(sk);
2249e7ceb06SSathya Perla 
225da905bd1SDavid S. Miller 	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
226c720c7e8SEric Dumazet 			       inet->inet_sport, inet->inet_dport, sk);
227b23dd4feSDavid S. Miller 	if (IS_ERR(rt)) {
228b23dd4feSDavid S. Miller 		err = PTR_ERR(rt);
229b23dd4feSDavid S. Miller 		rt = NULL;
2301da177e4SLinus Torvalds 		goto failure;
231b23dd4feSDavid S. Miller 	}
2321da177e4SLinus Torvalds 	/* OK, now commit destination to socket.  */
233bcd76111SHerbert Xu 	sk->sk_gso_type = SKB_GSO_TCPV4;
234d8d1f30bSChangli Gao 	sk_setup_caps(sk, &rt->dst);
23519f6d3f3SWei Wang 	rt = NULL;
2361da177e4SLinus Torvalds 
23700355fa5SAlexey Kodanev 	if (likely(!tp->repair)) {
23884b114b9SEric Dumazet 		if (!tp->write_seq)
23984b114b9SEric Dumazet 			tp->write_seq = secure_tcp_seq(inet->inet_saddr,
240c720c7e8SEric Dumazet 						       inet->inet_daddr,
241c720c7e8SEric Dumazet 						       inet->inet_sport,
24284b114b9SEric Dumazet 						       usin->sin_port);
2435d2ed052SEric Dumazet 		tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
2445d2ed052SEric Dumazet 						 inet->inet_saddr,
24584b114b9SEric Dumazet 						 inet->inet_daddr);
24600355fa5SAlexey Kodanev 	}
2471da177e4SLinus Torvalds 
248c720c7e8SEric Dumazet 	inet->inet_id = tp->write_seq ^ jiffies;
2491da177e4SLinus Torvalds 
25019f6d3f3SWei Wang 	if (tcp_fastopen_defer_connect(sk, &err))
25119f6d3f3SWei Wang 		return err;
25219f6d3f3SWei Wang 	if (err)
25319f6d3f3SWei Wang 		goto failure;
25419f6d3f3SWei Wang 
2551da177e4SLinus Torvalds 	err = tcp_connect(sk);
256ee995283SPavel Emelyanov 
2571da177e4SLinus Torvalds 	if (err)
2581da177e4SLinus Torvalds 		goto failure;
2591da177e4SLinus Torvalds 
2601da177e4SLinus Torvalds 	return 0;
2611da177e4SLinus Torvalds 
2621da177e4SLinus Torvalds failure:
2637174259eSArnaldo Carvalho de Melo 	/*
2647174259eSArnaldo Carvalho de Melo 	 * This unhashes the socket and releases the local port,
2657174259eSArnaldo Carvalho de Melo 	 * if necessary.
2667174259eSArnaldo Carvalho de Melo 	 */
2671da177e4SLinus Torvalds 	tcp_set_state(sk, TCP_CLOSE);
2681da177e4SLinus Torvalds 	ip_rt_put(rt);
2691da177e4SLinus Torvalds 	sk->sk_route_caps = 0;
270c720c7e8SEric Dumazet 	inet->inet_dport = 0;
2711da177e4SLinus Torvalds 	return err;
2721da177e4SLinus Torvalds }
2734bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_connect);
2741da177e4SLinus Torvalds 
2751da177e4SLinus Torvalds /*
276563d34d0SEric Dumazet  * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
277563d34d0SEric Dumazet  * It can be called through tcp_release_cb() if socket was owned by user
278563d34d0SEric Dumazet  * at the time tcp_v4_err() was called to handle ICMP message.
2791da177e4SLinus Torvalds  */
2804fab9071SNeal Cardwell void tcp_v4_mtu_reduced(struct sock *sk)
2811da177e4SLinus Torvalds {
2821da177e4SLinus Torvalds 	struct inet_sock *inet = inet_sk(sk);
28302b2faafSEric Dumazet 	struct dst_entry *dst;
28402b2faafSEric Dumazet 	u32 mtu;
2851da177e4SLinus Torvalds 
28602b2faafSEric Dumazet 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
28702b2faafSEric Dumazet 		return;
28802b2faafSEric Dumazet 	mtu = tcp_sk(sk)->mtu_info;
28980d0a69fSDavid S. Miller 	dst = inet_csk_update_pmtu(sk, mtu);
29080d0a69fSDavid S. Miller 	if (!dst)
2911da177e4SLinus Torvalds 		return;
2921da177e4SLinus Torvalds 
2931da177e4SLinus Torvalds 	/* Something is about to be wrong... Remember soft error
2941da177e4SLinus Torvalds 	 * for the case, if this connection will not able to recover.
2951da177e4SLinus Torvalds 	 */
2961da177e4SLinus Torvalds 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
2971da177e4SLinus Torvalds 		sk->sk_err_soft = EMSGSIZE;
2981da177e4SLinus Torvalds 
2991da177e4SLinus Torvalds 	mtu = dst_mtu(dst);
3001da177e4SLinus Torvalds 
3011da177e4SLinus Torvalds 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
302482fc609SHannes Frederic Sowa 	    ip_sk_accept_pmtu(sk) &&
303d83d8461SArnaldo Carvalho de Melo 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
3041da177e4SLinus Torvalds 		tcp_sync_mss(sk, mtu);
3051da177e4SLinus Torvalds 
3061da177e4SLinus Torvalds 		/* Resend the TCP packet because it's
3071da177e4SLinus Torvalds 		 * clear that the old packet has been
3081da177e4SLinus Torvalds 		 * dropped. This is the new "fast" path mtu
3091da177e4SLinus Torvalds 		 * discovery.
3101da177e4SLinus Torvalds 		 */
3111da177e4SLinus Torvalds 		tcp_simple_retransmit(sk);
3121da177e4SLinus Torvalds 	} /* else let the usual retransmit timer handle it */
3131da177e4SLinus Torvalds }
3144fab9071SNeal Cardwell EXPORT_SYMBOL(tcp_v4_mtu_reduced);
3151da177e4SLinus Torvalds 
31655be7a9cSDavid S. Miller static void do_redirect(struct sk_buff *skb, struct sock *sk)
31755be7a9cSDavid S. Miller {
31855be7a9cSDavid S. Miller 	struct dst_entry *dst = __sk_dst_check(sk, 0);
31955be7a9cSDavid S. Miller 
3201ed5c48fSDavid S. Miller 	if (dst)
3216700c270SDavid S. Miller 		dst->ops->redirect(dst, sk, skb);
32255be7a9cSDavid S. Miller }
32355be7a9cSDavid S. Miller 
32426e37360SEric Dumazet 
32526e37360SEric Dumazet /* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
3269cf74903SEric Dumazet void tcp_req_err(struct sock *sk, u32 seq, bool abort)
32726e37360SEric Dumazet {
32826e37360SEric Dumazet 	struct request_sock *req = inet_reqsk(sk);
32926e37360SEric Dumazet 	struct net *net = sock_net(sk);
33026e37360SEric Dumazet 
33126e37360SEric Dumazet 	/* ICMPs are not backlogged, hence we cannot get
33226e37360SEric Dumazet 	 * an established socket here.
33326e37360SEric Dumazet 	 */
33426e37360SEric Dumazet 	if (seq != tcp_rsk(req)->snt_isn) {
33502a1d6e7SEric Dumazet 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
3369cf74903SEric Dumazet 	} else if (abort) {
33726e37360SEric Dumazet 		/*
33826e37360SEric Dumazet 		 * Still in SYN_RECV, just remove it silently.
33926e37360SEric Dumazet 		 * There is no good way to pass the error to the newly
34026e37360SEric Dumazet 		 * created socket, and POSIX does not want network
34126e37360SEric Dumazet 		 * errors returned from accept().
34226e37360SEric Dumazet 		 */
343c6973669SFan Du 		inet_csk_reqsk_queue_drop(req->rsk_listener, req);
3449caad864SEric Dumazet 		tcp_listendrop(req->rsk_listener);
34526e37360SEric Dumazet 	}
346ef84d8ceSEric Dumazet 	reqsk_put(req);
34726e37360SEric Dumazet }
34826e37360SEric Dumazet EXPORT_SYMBOL(tcp_req_err);
34926e37360SEric Dumazet 
3501da177e4SLinus Torvalds /*
3511da177e4SLinus Torvalds  * This routine is called by the ICMP module when it gets some
3521da177e4SLinus Torvalds  * sort of error condition.  If err < 0 then the socket should
3531da177e4SLinus Torvalds  * be closed and the error returned to the user.  If err > 0
3541da177e4SLinus Torvalds  * it's just the icmp type << 8 | icmp code.  After adjustment
3551da177e4SLinus Torvalds  * header points to the first 8 bytes of the tcp header.  We need
3561da177e4SLinus Torvalds  * to find the appropriate port.
3571da177e4SLinus Torvalds  *
3581da177e4SLinus Torvalds  * The locking strategy used here is very "optimistic". When
3591da177e4SLinus Torvalds  * someone else accesses the socket the ICMP is just dropped
3601da177e4SLinus Torvalds  * and for some paths there is no check at all.
3611da177e4SLinus Torvalds  * A more general error queue to queue errors for later handling
3621da177e4SLinus Torvalds  * is probably better.
3631da177e4SLinus Torvalds  *
3641da177e4SLinus Torvalds  */
3651da177e4SLinus Torvalds 
3664d1a2d9eSDamian Lukowski void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
3671da177e4SLinus Torvalds {
368b71d1d42SEric Dumazet 	const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
3694d1a2d9eSDamian Lukowski 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
370f1ecd5d9SDamian Lukowski 	struct inet_connection_sock *icsk;
3711da177e4SLinus Torvalds 	struct tcp_sock *tp;
3721da177e4SLinus Torvalds 	struct inet_sock *inet;
3734d1a2d9eSDamian Lukowski 	const int type = icmp_hdr(icmp_skb)->type;
3744d1a2d9eSDamian Lukowski 	const int code = icmp_hdr(icmp_skb)->code;
3751da177e4SLinus Torvalds 	struct sock *sk;
376f1ecd5d9SDamian Lukowski 	struct sk_buff *skb;
3770a672f74SYuchung Cheng 	struct request_sock *fastopen;
3789a568de4SEric Dumazet 	u32 seq, snd_una;
3799a568de4SEric Dumazet 	s32 remaining;
3809a568de4SEric Dumazet 	u32 delta_us;
3811da177e4SLinus Torvalds 	int err;
3824d1a2d9eSDamian Lukowski 	struct net *net = dev_net(icmp_skb->dev);
3831da177e4SLinus Torvalds 
38426e37360SEric Dumazet 	sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
38526e37360SEric Dumazet 				       th->dest, iph->saddr, ntohs(th->source),
3863fa6f616SDavid Ahern 				       inet_iif(icmp_skb), 0);
3871da177e4SLinus Torvalds 	if (!sk) {
3885d3848bcSEric Dumazet 		__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
3891da177e4SLinus Torvalds 		return;
3901da177e4SLinus Torvalds 	}
3911da177e4SLinus Torvalds 	if (sk->sk_state == TCP_TIME_WAIT) {
3929469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
3931da177e4SLinus Torvalds 		return;
3941da177e4SLinus Torvalds 	}
39526e37360SEric Dumazet 	seq = ntohl(th->seq);
39626e37360SEric Dumazet 	if (sk->sk_state == TCP_NEW_SYN_RECV)
3979cf74903SEric Dumazet 		return tcp_req_err(sk, seq,
3989cf74903SEric Dumazet 				  type == ICMP_PARAMETERPROB ||
3999cf74903SEric Dumazet 				  type == ICMP_TIME_EXCEEDED ||
4009cf74903SEric Dumazet 				  (type == ICMP_DEST_UNREACH &&
4019cf74903SEric Dumazet 				   (code == ICMP_NET_UNREACH ||
4029cf74903SEric Dumazet 				    code == ICMP_HOST_UNREACH)));
4031da177e4SLinus Torvalds 
4041da177e4SLinus Torvalds 	bh_lock_sock(sk);
4051da177e4SLinus Torvalds 	/* If too many ICMPs get dropped on busy
4061da177e4SLinus Torvalds 	 * servers this needs to be solved differently.
407563d34d0SEric Dumazet 	 * We do take care of PMTU discovery (RFC1191) special case :
408563d34d0SEric Dumazet 	 * we can receive locally generated ICMP messages while socket is held.
4091da177e4SLinus Torvalds 	 */
410b74aa930SEric Dumazet 	if (sock_owned_by_user(sk)) {
411b74aa930SEric Dumazet 		if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
41202a1d6e7SEric Dumazet 			__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
413b74aa930SEric Dumazet 	}
4141da177e4SLinus Torvalds 	if (sk->sk_state == TCP_CLOSE)
4151da177e4SLinus Torvalds 		goto out;
4161da177e4SLinus Torvalds 
41797e3ecd1Sstephen hemminger 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
41802a1d6e7SEric Dumazet 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
41997e3ecd1Sstephen hemminger 		goto out;
42097e3ecd1Sstephen hemminger 	}
42197e3ecd1Sstephen hemminger 
422f1ecd5d9SDamian Lukowski 	icsk = inet_csk(sk);
4231da177e4SLinus Torvalds 	tp = tcp_sk(sk);
4240a672f74SYuchung Cheng 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
4250a672f74SYuchung Cheng 	fastopen = tp->fastopen_rsk;
4260a672f74SYuchung Cheng 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
4271da177e4SLinus Torvalds 	if (sk->sk_state != TCP_LISTEN &&
4280a672f74SYuchung Cheng 	    !between(seq, snd_una, tp->snd_nxt)) {
42902a1d6e7SEric Dumazet 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
4301da177e4SLinus Torvalds 		goto out;
4311da177e4SLinus Torvalds 	}
4321da177e4SLinus Torvalds 
4331da177e4SLinus Torvalds 	switch (type) {
43455be7a9cSDavid S. Miller 	case ICMP_REDIRECT:
43545caeaa5SJon Maxwell 		if (!sock_owned_by_user(sk))
43655be7a9cSDavid S. Miller 			do_redirect(icmp_skb, sk);
43755be7a9cSDavid S. Miller 		goto out;
4381da177e4SLinus Torvalds 	case ICMP_SOURCE_QUENCH:
4391da177e4SLinus Torvalds 		/* Just silently ignore these. */
4401da177e4SLinus Torvalds 		goto out;
4411da177e4SLinus Torvalds 	case ICMP_PARAMETERPROB:
4421da177e4SLinus Torvalds 		err = EPROTO;
4431da177e4SLinus Torvalds 		break;
4441da177e4SLinus Torvalds 	case ICMP_DEST_UNREACH:
4451da177e4SLinus Torvalds 		if (code > NR_ICMP_UNREACH)
4461da177e4SLinus Torvalds 			goto out;
4471da177e4SLinus Torvalds 
4481da177e4SLinus Torvalds 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
4490d4f0608SEric Dumazet 			/* We are not interested in TCP_LISTEN and open_requests
4500d4f0608SEric Dumazet 			 * (SYN-ACKs send out by Linux are always <576bytes so
4510d4f0608SEric Dumazet 			 * they should go through unfragmented).
4520d4f0608SEric Dumazet 			 */
4530d4f0608SEric Dumazet 			if (sk->sk_state == TCP_LISTEN)
4540d4f0608SEric Dumazet 				goto out;
4550d4f0608SEric Dumazet 
456563d34d0SEric Dumazet 			tp->mtu_info = info;
457144d56e9SEric Dumazet 			if (!sock_owned_by_user(sk)) {
458563d34d0SEric Dumazet 				tcp_v4_mtu_reduced(sk);
459144d56e9SEric Dumazet 			} else {
4607aa5470cSEric Dumazet 				if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
461144d56e9SEric Dumazet 					sock_hold(sk);
462144d56e9SEric Dumazet 			}
4631da177e4SLinus Torvalds 			goto out;
4641da177e4SLinus Torvalds 		}
4651da177e4SLinus Torvalds 
4661da177e4SLinus Torvalds 		err = icmp_err_convert[code].errno;
467f1ecd5d9SDamian Lukowski 		/* check if icmp_skb allows revert of backoff
468f1ecd5d9SDamian Lukowski 		 * (see draft-zimmermann-tcp-lcd) */
469f1ecd5d9SDamian Lukowski 		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
470f1ecd5d9SDamian Lukowski 			break;
471f1ecd5d9SDamian Lukowski 		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
4720a672f74SYuchung Cheng 		    !icsk->icsk_backoff || fastopen)
473f1ecd5d9SDamian Lukowski 			break;
474f1ecd5d9SDamian Lukowski 
4758f49c270SDavid S. Miller 		if (sock_owned_by_user(sk))
4768f49c270SDavid S. Miller 			break;
4778f49c270SDavid S. Miller 
478f1ecd5d9SDamian Lukowski 		icsk->icsk_backoff--;
479fcdd1cf4SEric Dumazet 		icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
480fcdd1cf4SEric Dumazet 					       TCP_TIMEOUT_INIT;
481fcdd1cf4SEric Dumazet 		icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
482f1ecd5d9SDamian Lukowski 
483f1ecd5d9SDamian Lukowski 		skb = tcp_write_queue_head(sk);
484f1ecd5d9SDamian Lukowski 		BUG_ON(!skb);
485f1ecd5d9SDamian Lukowski 
4869a568de4SEric Dumazet 		tcp_mstamp_refresh(tp);
4879a568de4SEric Dumazet 		delta_us = (u32)(tp->tcp_mstamp - skb->skb_mstamp);
4887faee5c0SEric Dumazet 		remaining = icsk->icsk_rto -
4899a568de4SEric Dumazet 			    usecs_to_jiffies(delta_us);
490f1ecd5d9SDamian Lukowski 
4919a568de4SEric Dumazet 		if (remaining > 0) {
492f1ecd5d9SDamian Lukowski 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
493f1ecd5d9SDamian Lukowski 						  remaining, TCP_RTO_MAX);
494f1ecd5d9SDamian Lukowski 		} else {
495f1ecd5d9SDamian Lukowski 			/* RTO revert clocked out retransmission.
496f1ecd5d9SDamian Lukowski 			 * Will retransmit now */
497f1ecd5d9SDamian Lukowski 			tcp_retransmit_timer(sk);
498f1ecd5d9SDamian Lukowski 		}
499f1ecd5d9SDamian Lukowski 
5001da177e4SLinus Torvalds 		break;
5011da177e4SLinus Torvalds 	case ICMP_TIME_EXCEEDED:
5021da177e4SLinus Torvalds 		err = EHOSTUNREACH;
5031da177e4SLinus Torvalds 		break;
5041da177e4SLinus Torvalds 	default:
5051da177e4SLinus Torvalds 		goto out;
5061da177e4SLinus Torvalds 	}
5071da177e4SLinus Torvalds 
5081da177e4SLinus Torvalds 	switch (sk->sk_state) {
5091da177e4SLinus Torvalds 	case TCP_SYN_SENT:
5100a672f74SYuchung Cheng 	case TCP_SYN_RECV:
5110a672f74SYuchung Cheng 		/* Only in fast or simultaneous open. If a fast open socket is
5120a672f74SYuchung Cheng 		 * is already accepted it is treated as a connected one below.
5131da177e4SLinus Torvalds 		 */
51451456b29SIan Morris 		if (fastopen && !fastopen->sk)
5150a672f74SYuchung Cheng 			break;
5160a672f74SYuchung Cheng 
5171da177e4SLinus Torvalds 		if (!sock_owned_by_user(sk)) {
5181da177e4SLinus Torvalds 			sk->sk_err = err;
5191da177e4SLinus Torvalds 
5201da177e4SLinus Torvalds 			sk->sk_error_report(sk);
5211da177e4SLinus Torvalds 
5221da177e4SLinus Torvalds 			tcp_done(sk);
5231da177e4SLinus Torvalds 		} else {
5241da177e4SLinus Torvalds 			sk->sk_err_soft = err;
5251da177e4SLinus Torvalds 		}
5261da177e4SLinus Torvalds 		goto out;
5271da177e4SLinus Torvalds 	}
5281da177e4SLinus Torvalds 
5291da177e4SLinus Torvalds 	/* If we've already connected we will keep trying
5301da177e4SLinus Torvalds 	 * until we time out, or the user gives up.
5311da177e4SLinus Torvalds 	 *
5321da177e4SLinus Torvalds 	 * rfc1122 4.2.3.9 allows to consider as hard errors
5331da177e4SLinus Torvalds 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
5341da177e4SLinus Torvalds 	 * but it is obsoleted by pmtu discovery).
5351da177e4SLinus Torvalds 	 *
5361da177e4SLinus Torvalds 	 * Note, that in modern internet, where routing is unreliable
5371da177e4SLinus Torvalds 	 * and in each dark corner broken firewalls sit, sending random
5381da177e4SLinus Torvalds 	 * errors ordered by their masters even this two messages finally lose
5391da177e4SLinus Torvalds 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
5401da177e4SLinus Torvalds 	 *
5411da177e4SLinus Torvalds 	 * Now we are in compliance with RFCs.
5421da177e4SLinus Torvalds 	 *							--ANK (980905)
5431da177e4SLinus Torvalds 	 */
5441da177e4SLinus Torvalds 
5451da177e4SLinus Torvalds 	inet = inet_sk(sk);
5461da177e4SLinus Torvalds 	if (!sock_owned_by_user(sk) && inet->recverr) {
5471da177e4SLinus Torvalds 		sk->sk_err = err;
5481da177e4SLinus Torvalds 		sk->sk_error_report(sk);
5491da177e4SLinus Torvalds 	} else	{ /* Only an error on timeout */
5501da177e4SLinus Torvalds 		sk->sk_err_soft = err;
5511da177e4SLinus Torvalds 	}
5521da177e4SLinus Torvalds 
5531da177e4SLinus Torvalds out:
5541da177e4SLinus Torvalds 	bh_unlock_sock(sk);
5551da177e4SLinus Torvalds 	sock_put(sk);
5561da177e4SLinus Torvalds }
5571da177e4SLinus Torvalds 
55828850dc7SDaniel Borkmann void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
5591da177e4SLinus Torvalds {
560aa8223c7SArnaldo Carvalho de Melo 	struct tcphdr *th = tcp_hdr(skb);
5611da177e4SLinus Torvalds 
56284fa7933SPatrick McHardy 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
563419f9f89SHerbert Xu 		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
564663ead3bSHerbert Xu 		skb->csum_start = skb_transport_header(skb) - skb->head;
565ff1dcadbSAl Viro 		skb->csum_offset = offsetof(struct tcphdr, check);
5661da177e4SLinus Torvalds 	} else {
567419f9f89SHerbert Xu 		th->check = tcp_v4_check(skb->len, saddr, daddr,
56807f0757aSJoe Perches 					 csum_partial(th,
5691da177e4SLinus Torvalds 						      th->doff << 2,
5701da177e4SLinus Torvalds 						      skb->csum));
5711da177e4SLinus Torvalds 	}
5721da177e4SLinus Torvalds }
5731da177e4SLinus Torvalds 
574419f9f89SHerbert Xu /* This routine computes an IPv4 TCP checksum. */
575bb296246SHerbert Xu void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
576419f9f89SHerbert Xu {
577cf533ea5SEric Dumazet 	const struct inet_sock *inet = inet_sk(sk);
578419f9f89SHerbert Xu 
579419f9f89SHerbert Xu 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
580419f9f89SHerbert Xu }
5814bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_send_check);
582419f9f89SHerbert Xu 
5831da177e4SLinus Torvalds /*
5841da177e4SLinus Torvalds  *	This routine will send an RST to the other tcp.
5851da177e4SLinus Torvalds  *
5861da177e4SLinus Torvalds  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
5871da177e4SLinus Torvalds  *		      for reset.
5881da177e4SLinus Torvalds  *	Answer: if a packet caused RST, it is not for a socket
5891da177e4SLinus Torvalds  *		existing in our system, if it is matched to a socket,
5901da177e4SLinus Torvalds  *		it is just duplicate segment or bug in other side's TCP.
5911da177e4SLinus Torvalds  *		So that we build reply only basing on parameters
5921da177e4SLinus Torvalds  *		arrived with segment.
5931da177e4SLinus Torvalds  *	Exception: precedence violation. We do not implement it in any case.
5941da177e4SLinus Torvalds  */
5951da177e4SLinus Torvalds 
596a00e7444SEric Dumazet static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
5971da177e4SLinus Torvalds {
598cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
599cfb6eeb4SYOSHIFUJI Hideaki 	struct {
600cfb6eeb4SYOSHIFUJI Hideaki 		struct tcphdr th;
601cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
602714e85beSAl Viro 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
603cfb6eeb4SYOSHIFUJI Hideaki #endif
604cfb6eeb4SYOSHIFUJI Hideaki 	} rep;
6051da177e4SLinus Torvalds 	struct ip_reply_arg arg;
606cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
607e46787f0SFlorian Westphal 	struct tcp_md5sig_key *key = NULL;
608658ddaafSShawn Lu 	const __u8 *hash_location = NULL;
609658ddaafSShawn Lu 	unsigned char newhash[16];
610658ddaafSShawn Lu 	int genhash;
611658ddaafSShawn Lu 	struct sock *sk1 = NULL;
612cfb6eeb4SYOSHIFUJI Hideaki #endif
613a86b1e30SPavel Emelyanov 	struct net *net;
6141da177e4SLinus Torvalds 
6151da177e4SLinus Torvalds 	/* Never send a reset in response to a reset. */
6161da177e4SLinus Torvalds 	if (th->rst)
6171da177e4SLinus Torvalds 		return;
6181da177e4SLinus Torvalds 
619c3658e8dSEric Dumazet 	/* If sk not NULL, it means we did a successful lookup and incoming
620c3658e8dSEric Dumazet 	 * route had to be correct. prequeue might have dropped our dst.
621c3658e8dSEric Dumazet 	 */
622c3658e8dSEric Dumazet 	if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
6231da177e4SLinus Torvalds 		return;
6241da177e4SLinus Torvalds 
6251da177e4SLinus Torvalds 	/* Swap the send and the receive. */
626cfb6eeb4SYOSHIFUJI Hideaki 	memset(&rep, 0, sizeof(rep));
627cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.dest   = th->source;
628cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.source = th->dest;
629cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.doff   = sizeof(struct tcphdr) / 4;
630cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.rst    = 1;
6311da177e4SLinus Torvalds 
6321da177e4SLinus Torvalds 	if (th->ack) {
633cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.seq = th->ack_seq;
6341da177e4SLinus Torvalds 	} else {
635cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.ack = 1;
636cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
6371da177e4SLinus Torvalds 				       skb->len - (th->doff << 2));
6381da177e4SLinus Torvalds 	}
6391da177e4SLinus Torvalds 
6407174259eSArnaldo Carvalho de Melo 	memset(&arg, 0, sizeof(arg));
641cfb6eeb4SYOSHIFUJI Hideaki 	arg.iov[0].iov_base = (unsigned char *)&rep;
642cfb6eeb4SYOSHIFUJI Hideaki 	arg.iov[0].iov_len  = sizeof(rep.th);
643cfb6eeb4SYOSHIFUJI Hideaki 
6440f85feaeSEric Dumazet 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
645cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
6463b24d854SEric Dumazet 	rcu_read_lock();
647658ddaafSShawn Lu 	hash_location = tcp_parse_md5sig_option(th);
648271c3b9bSFlorian Westphal 	if (sk && sk_fullsock(sk)) {
649e46787f0SFlorian Westphal 		key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
650e46787f0SFlorian Westphal 					&ip_hdr(skb)->saddr, AF_INET);
651e46787f0SFlorian Westphal 	} else if (hash_location) {
652658ddaafSShawn Lu 		/*
653658ddaafSShawn Lu 		 * active side is lost. Try to find listening socket through
654658ddaafSShawn Lu 		 * source port, and then find md5 key through listening socket.
655658ddaafSShawn Lu 		 * we are not loose security here:
656658ddaafSShawn Lu 		 * Incoming packet is checked with md5 hash with finding key,
657658ddaafSShawn Lu 		 * no RST generated if md5 hash doesn't match.
658658ddaafSShawn Lu 		 */
659a583636aSCraig Gallek 		sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
660a583636aSCraig Gallek 					     ip_hdr(skb)->saddr,
661da5e3630STom Herbert 					     th->source, ip_hdr(skb)->daddr,
6623fa6f616SDavid Ahern 					     ntohs(th->source), inet_iif(skb),
6633fa6f616SDavid Ahern 					     tcp_v4_sdif(skb));
664658ddaafSShawn Lu 		/* don't send rst if it can't find key */
665658ddaafSShawn Lu 		if (!sk1)
6663b24d854SEric Dumazet 			goto out;
6673b24d854SEric Dumazet 
668658ddaafSShawn Lu 		key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
669658ddaafSShawn Lu 					&ip_hdr(skb)->saddr, AF_INET);
670658ddaafSShawn Lu 		if (!key)
6713b24d854SEric Dumazet 			goto out;
6723b24d854SEric Dumazet 
673658ddaafSShawn Lu 
67439f8e58eSEric Dumazet 		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
675658ddaafSShawn Lu 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
6763b24d854SEric Dumazet 			goto out;
6773b24d854SEric Dumazet 
678658ddaafSShawn Lu 	}
679658ddaafSShawn Lu 
680cfb6eeb4SYOSHIFUJI Hideaki 	if (key) {
681cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
682cfb6eeb4SYOSHIFUJI Hideaki 				   (TCPOPT_NOP << 16) |
683cfb6eeb4SYOSHIFUJI Hideaki 				   (TCPOPT_MD5SIG << 8) |
684cfb6eeb4SYOSHIFUJI Hideaki 				   TCPOLEN_MD5SIG);
685cfb6eeb4SYOSHIFUJI Hideaki 		/* Update length and the length the header thinks exists */
686cfb6eeb4SYOSHIFUJI Hideaki 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
687cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.doff = arg.iov[0].iov_len / 4;
688cfb6eeb4SYOSHIFUJI Hideaki 
68949a72dfbSAdam Langley 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
69078e645cbSIlpo Järvinen 				     key, ip_hdr(skb)->saddr,
69178e645cbSIlpo Järvinen 				     ip_hdr(skb)->daddr, &rep.th);
692cfb6eeb4SYOSHIFUJI Hideaki 	}
693cfb6eeb4SYOSHIFUJI Hideaki #endif
694eddc9ec5SArnaldo Carvalho de Melo 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
695eddc9ec5SArnaldo Carvalho de Melo 				      ip_hdr(skb)->saddr, /* XXX */
69652cd5750SIlpo Järvinen 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
6971da177e4SLinus Torvalds 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
698271c3b9bSFlorian Westphal 	arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
699271c3b9bSFlorian Westphal 
700e2446eaaSShawn Lu 	/* When socket is gone, all binding information is lost.
7014c675258SAlexey Kuznetsov 	 * routing might fail in this case. No choice here, if we choose to force
7024c675258SAlexey Kuznetsov 	 * input interface, we will misroute in case of asymmetric route.
703e2446eaaSShawn Lu 	 */
7044c675258SAlexey Kuznetsov 	if (sk)
7054c675258SAlexey Kuznetsov 		arg.bound_dev_if = sk->sk_bound_dev_if;
7061da177e4SLinus Torvalds 
707271c3b9bSFlorian Westphal 	BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
708271c3b9bSFlorian Westphal 		     offsetof(struct inet_timewait_sock, tw_bound_dev_if));
709271c3b9bSFlorian Westphal 
71066b13d99SEric Dumazet 	arg.tos = ip_hdr(skb)->tos;
711e2d118a1SLorenzo Colitti 	arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
71247dcc20aSEric Dumazet 	local_bh_disable();
713bdbbb852SEric Dumazet 	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
714bdbbb852SEric Dumazet 			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
71524a2d43dSEric Dumazet 			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
71624a2d43dSEric Dumazet 			      &arg, arg.iov[0].iov_len);
7171da177e4SLinus Torvalds 
71890bbcc60SEric Dumazet 	__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
71990bbcc60SEric Dumazet 	__TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
72047dcc20aSEric Dumazet 	local_bh_enable();
721658ddaafSShawn Lu 
722658ddaafSShawn Lu #ifdef CONFIG_TCP_MD5SIG
7233b24d854SEric Dumazet out:
724658ddaafSShawn Lu 	rcu_read_unlock();
725658ddaafSShawn Lu #endif
7261da177e4SLinus Torvalds }
7271da177e4SLinus Torvalds 
7281da177e4SLinus Torvalds /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
7291da177e4SLinus Torvalds    outside socket context is ugly, certainly. What can I do?
7301da177e4SLinus Torvalds  */
7311da177e4SLinus Torvalds 
732e2d118a1SLorenzo Colitti static void tcp_v4_send_ack(const struct sock *sk,
733e62a123bSEric Dumazet 			    struct sk_buff *skb, u32 seq, u32 ack,
734ee684b6fSAndrey Vagin 			    u32 win, u32 tsval, u32 tsecr, int oif,
73588ef4a5aSKOVACS Krisztian 			    struct tcp_md5sig_key *key,
73666b13d99SEric Dumazet 			    int reply_flags, u8 tos)
7371da177e4SLinus Torvalds {
738cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
7391da177e4SLinus Torvalds 	struct {
7401da177e4SLinus Torvalds 		struct tcphdr th;
741714e85beSAl Viro 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
742cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
743cfb6eeb4SYOSHIFUJI Hideaki 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
744cfb6eeb4SYOSHIFUJI Hideaki #endif
745cfb6eeb4SYOSHIFUJI Hideaki 			];
7461da177e4SLinus Torvalds 	} rep;
747e2d118a1SLorenzo Colitti 	struct net *net = sock_net(sk);
7481da177e4SLinus Torvalds 	struct ip_reply_arg arg;
7491da177e4SLinus Torvalds 
7501da177e4SLinus Torvalds 	memset(&rep.th, 0, sizeof(struct tcphdr));
7517174259eSArnaldo Carvalho de Melo 	memset(&arg, 0, sizeof(arg));
7521da177e4SLinus Torvalds 
7531da177e4SLinus Torvalds 	arg.iov[0].iov_base = (unsigned char *)&rep;
7541da177e4SLinus Torvalds 	arg.iov[0].iov_len  = sizeof(rep.th);
755ee684b6fSAndrey Vagin 	if (tsecr) {
756cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
7571da177e4SLinus Torvalds 				   (TCPOPT_TIMESTAMP << 8) |
7581da177e4SLinus Torvalds 				   TCPOLEN_TIMESTAMP);
759ee684b6fSAndrey Vagin 		rep.opt[1] = htonl(tsval);
760ee684b6fSAndrey Vagin 		rep.opt[2] = htonl(tsecr);
761cb48cfe8SCraig Schlenter 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
7621da177e4SLinus Torvalds 	}
7631da177e4SLinus Torvalds 
7641da177e4SLinus Torvalds 	/* Swap the send and the receive. */
7651da177e4SLinus Torvalds 	rep.th.dest    = th->source;
7661da177e4SLinus Torvalds 	rep.th.source  = th->dest;
7671da177e4SLinus Torvalds 	rep.th.doff    = arg.iov[0].iov_len / 4;
7681da177e4SLinus Torvalds 	rep.th.seq     = htonl(seq);
7691da177e4SLinus Torvalds 	rep.th.ack_seq = htonl(ack);
7701da177e4SLinus Torvalds 	rep.th.ack     = 1;
7711da177e4SLinus Torvalds 	rep.th.window  = htons(win);
7721da177e4SLinus Torvalds 
773cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
774cfb6eeb4SYOSHIFUJI Hideaki 	if (key) {
775ee684b6fSAndrey Vagin 		int offset = (tsecr) ? 3 : 0;
776cfb6eeb4SYOSHIFUJI Hideaki 
777cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
778cfb6eeb4SYOSHIFUJI Hideaki 					  (TCPOPT_NOP << 16) |
779cfb6eeb4SYOSHIFUJI Hideaki 					  (TCPOPT_MD5SIG << 8) |
780cfb6eeb4SYOSHIFUJI Hideaki 					  TCPOLEN_MD5SIG);
781cfb6eeb4SYOSHIFUJI Hideaki 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
782cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.doff = arg.iov[0].iov_len/4;
783cfb6eeb4SYOSHIFUJI Hideaki 
78449a72dfbSAdam Langley 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
78590b7e112SAdam Langley 				    key, ip_hdr(skb)->saddr,
78690b7e112SAdam Langley 				    ip_hdr(skb)->daddr, &rep.th);
787cfb6eeb4SYOSHIFUJI Hideaki 	}
788cfb6eeb4SYOSHIFUJI Hideaki #endif
78988ef4a5aSKOVACS Krisztian 	arg.flags = reply_flags;
790eddc9ec5SArnaldo Carvalho de Melo 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
791eddc9ec5SArnaldo Carvalho de Melo 				      ip_hdr(skb)->saddr, /* XXX */
7921da177e4SLinus Torvalds 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
7931da177e4SLinus Torvalds 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
7949501f972SYOSHIFUJI Hideaki 	if (oif)
7959501f972SYOSHIFUJI Hideaki 		arg.bound_dev_if = oif;
79666b13d99SEric Dumazet 	arg.tos = tos;
797e2d118a1SLorenzo Colitti 	arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
79847dcc20aSEric Dumazet 	local_bh_disable();
799bdbbb852SEric Dumazet 	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
800bdbbb852SEric Dumazet 			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
80124a2d43dSEric Dumazet 			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
80224a2d43dSEric Dumazet 			      &arg, arg.iov[0].iov_len);
8031da177e4SLinus Torvalds 
80490bbcc60SEric Dumazet 	__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
80547dcc20aSEric Dumazet 	local_bh_enable();
8061da177e4SLinus Torvalds }
8071da177e4SLinus Torvalds 
8081da177e4SLinus Torvalds static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
8091da177e4SLinus Torvalds {
8108feaf0c0SArnaldo Carvalho de Melo 	struct inet_timewait_sock *tw = inet_twsk(sk);
811cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
8121da177e4SLinus Torvalds 
813e2d118a1SLorenzo Colitti 	tcp_v4_send_ack(sk, skb,
814e62a123bSEric Dumazet 			tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
8157174259eSArnaldo Carvalho de Melo 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
8169a568de4SEric Dumazet 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
8179501f972SYOSHIFUJI Hideaki 			tcptw->tw_ts_recent,
8189501f972SYOSHIFUJI Hideaki 			tw->tw_bound_dev_if,
81988ef4a5aSKOVACS Krisztian 			tcp_twsk_md5_key(tcptw),
82066b13d99SEric Dumazet 			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
82166b13d99SEric Dumazet 			tw->tw_tos
8229501f972SYOSHIFUJI Hideaki 			);
8231da177e4SLinus Torvalds 
8248feaf0c0SArnaldo Carvalho de Melo 	inet_twsk_put(tw);
8251da177e4SLinus Torvalds }
8261da177e4SLinus Torvalds 
827a00e7444SEric Dumazet static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
8287174259eSArnaldo Carvalho de Melo 				  struct request_sock *req)
8291da177e4SLinus Torvalds {
830168a8f58SJerry Chu 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
831168a8f58SJerry Chu 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
832168a8f58SJerry Chu 	 */
833e62a123bSEric Dumazet 	u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
834e62a123bSEric Dumazet 					     tcp_sk(sk)->snd_nxt;
835e62a123bSEric Dumazet 
83620a2b49fSEric Dumazet 	/* RFC 7323 2.3
83720a2b49fSEric Dumazet 	 * The window field (SEG.WND) of every outgoing segment, with the
83820a2b49fSEric Dumazet 	 * exception of <SYN> segments, MUST be right-shifted by
83920a2b49fSEric Dumazet 	 * Rcv.Wind.Shift bits:
84020a2b49fSEric Dumazet 	 */
841e2d118a1SLorenzo Colitti 	tcp_v4_send_ack(sk, skb, seq,
84220a2b49fSEric Dumazet 			tcp_rsk(req)->rcv_nxt,
84320a2b49fSEric Dumazet 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
8449a568de4SEric Dumazet 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
8459501f972SYOSHIFUJI Hideaki 			req->ts_recent,
8469501f972SYOSHIFUJI Hideaki 			0,
847a915da9bSEric Dumazet 			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
848a915da9bSEric Dumazet 					  AF_INET),
84966b13d99SEric Dumazet 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
85066b13d99SEric Dumazet 			ip_hdr(skb)->tos);
8511da177e4SLinus Torvalds }
8521da177e4SLinus Torvalds 
8531da177e4SLinus Torvalds /*
8549bf1d83eSKris Katterjohn  *	Send a SYN-ACK after having received a SYN.
85560236fddSArnaldo Carvalho de Melo  *	This still operates on a request_sock only, not on a big
8561da177e4SLinus Torvalds  *	socket.
8571da177e4SLinus Torvalds  */
8580f935dbeSEric Dumazet static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
859d6274bd8SOctavian Purdila 			      struct flowi *fl,
860e6b4d113SWilliam Allen Simpson 			      struct request_sock *req,
861ca6fb065SEric Dumazet 			      struct tcp_fastopen_cookie *foc,
862b3d05147SEric Dumazet 			      enum tcp_synack_type synack_type)
8631da177e4SLinus Torvalds {
8642e6599cbSArnaldo Carvalho de Melo 	const struct inet_request_sock *ireq = inet_rsk(req);
8656bd023f3SDavid S. Miller 	struct flowi4 fl4;
8661da177e4SLinus Torvalds 	int err = -1;
8671da177e4SLinus Torvalds 	struct sk_buff *skb;
8681da177e4SLinus Torvalds 
8691da177e4SLinus Torvalds 	/* First, grab a route. */
870ba3f7f04SDavid S. Miller 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
871fd80eb94SDenis V. Lunev 		return -1;
8721da177e4SLinus Torvalds 
873b3d05147SEric Dumazet 	skb = tcp_make_synack(sk, dst, req, foc, synack_type);
8741da177e4SLinus Torvalds 
8751da177e4SLinus Torvalds 	if (skb) {
876634fb979SEric Dumazet 		__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
8771da177e4SLinus Torvalds 
878634fb979SEric Dumazet 		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
879634fb979SEric Dumazet 					    ireq->ir_rmt_addr,
8802e6599cbSArnaldo Carvalho de Melo 					    ireq->opt);
881b9df3cb8SGerrit Renker 		err = net_xmit_eval(err);
8821da177e4SLinus Torvalds 	}
8831da177e4SLinus Torvalds 
8841da177e4SLinus Torvalds 	return err;
8851da177e4SLinus Torvalds }
8861da177e4SLinus Torvalds 
8871da177e4SLinus Torvalds /*
88860236fddSArnaldo Carvalho de Melo  *	IPv4 request_sock destructor.
8891da177e4SLinus Torvalds  */
89060236fddSArnaldo Carvalho de Melo static void tcp_v4_reqsk_destructor(struct request_sock *req)
8911da177e4SLinus Torvalds {
8922e6599cbSArnaldo Carvalho de Melo 	kfree(inet_rsk(req)->opt);
8931da177e4SLinus Torvalds }
8941da177e4SLinus Torvalds 
895cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
896cfb6eeb4SYOSHIFUJI Hideaki /*
897cfb6eeb4SYOSHIFUJI Hideaki  * RFC2385 MD5 checksumming requires a mapping of
898cfb6eeb4SYOSHIFUJI Hideaki  * IP address->MD5 Key.
899cfb6eeb4SYOSHIFUJI Hideaki  * We need to maintain these in the sk structure.
900cfb6eeb4SYOSHIFUJI Hideaki  */
901cfb6eeb4SYOSHIFUJI Hideaki 
902cfb6eeb4SYOSHIFUJI Hideaki /* Find the Key structure for an address.  */
903b83e3debSEric Dumazet struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
904a915da9bSEric Dumazet 					 const union tcp_md5_addr *addr,
905a915da9bSEric Dumazet 					 int family)
906cfb6eeb4SYOSHIFUJI Hideaki {
907fd3a154aSEric Dumazet 	const struct tcp_sock *tp = tcp_sk(sk);
908a915da9bSEric Dumazet 	struct tcp_md5sig_key *key;
909fd3a154aSEric Dumazet 	const struct tcp_md5sig_info *md5sig;
9106797318eSIvan Delalande 	__be32 mask;
9116797318eSIvan Delalande 	struct tcp_md5sig_key *best_match = NULL;
9126797318eSIvan Delalande 	bool match;
913cfb6eeb4SYOSHIFUJI Hideaki 
914a8afca03SEric Dumazet 	/* caller either holds rcu_read_lock() or socket lock */
915a8afca03SEric Dumazet 	md5sig = rcu_dereference_check(tp->md5sig_info,
9161e1d04e6SHannes Frederic Sowa 				       lockdep_sock_is_held(sk));
917a8afca03SEric Dumazet 	if (!md5sig)
918cfb6eeb4SYOSHIFUJI Hideaki 		return NULL;
919083a0326SArnd Bergmann 
920b67bfe0dSSasha Levin 	hlist_for_each_entry_rcu(key, &md5sig->head, node) {
921a915da9bSEric Dumazet 		if (key->family != family)
922a915da9bSEric Dumazet 			continue;
9236797318eSIvan Delalande 
9246797318eSIvan Delalande 		if (family == AF_INET) {
9256797318eSIvan Delalande 			mask = inet_make_mask(key->prefixlen);
9266797318eSIvan Delalande 			match = (key->addr.a4.s_addr & mask) ==
9276797318eSIvan Delalande 				(addr->a4.s_addr & mask);
9286797318eSIvan Delalande #if IS_ENABLED(CONFIG_IPV6)
9296797318eSIvan Delalande 		} else if (family == AF_INET6) {
9306797318eSIvan Delalande 			match = ipv6_prefix_equal(&key->addr.a6, &addr->a6,
9316797318eSIvan Delalande 						  key->prefixlen);
9326797318eSIvan Delalande #endif
9336797318eSIvan Delalande 		} else {
9346797318eSIvan Delalande 			match = false;
9356797318eSIvan Delalande 		}
9366797318eSIvan Delalande 
9376797318eSIvan Delalande 		if (match && (!best_match ||
9386797318eSIvan Delalande 			      key->prefixlen > best_match->prefixlen))
9396797318eSIvan Delalande 			best_match = key;
9406797318eSIvan Delalande 	}
9416797318eSIvan Delalande 	return best_match;
9426797318eSIvan Delalande }
9436797318eSIvan Delalande EXPORT_SYMBOL(tcp_md5_do_lookup);
9446797318eSIvan Delalande 
945e8f37d57SWu Fengguang static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
9466797318eSIvan Delalande 						      const union tcp_md5_addr *addr,
9476797318eSIvan Delalande 						      int family, u8 prefixlen)
9486797318eSIvan Delalande {
9496797318eSIvan Delalande 	const struct tcp_sock *tp = tcp_sk(sk);
9506797318eSIvan Delalande 	struct tcp_md5sig_key *key;
9516797318eSIvan Delalande 	unsigned int size = sizeof(struct in_addr);
9526797318eSIvan Delalande 	const struct tcp_md5sig_info *md5sig;
9536797318eSIvan Delalande 
9546797318eSIvan Delalande 	/* caller either holds rcu_read_lock() or socket lock */
9556797318eSIvan Delalande 	md5sig = rcu_dereference_check(tp->md5sig_info,
9566797318eSIvan Delalande 				       lockdep_sock_is_held(sk));
9576797318eSIvan Delalande 	if (!md5sig)
9586797318eSIvan Delalande 		return NULL;
9596797318eSIvan Delalande #if IS_ENABLED(CONFIG_IPV6)
9606797318eSIvan Delalande 	if (family == AF_INET6)
9616797318eSIvan Delalande 		size = sizeof(struct in6_addr);
9626797318eSIvan Delalande #endif
9636797318eSIvan Delalande 	hlist_for_each_entry_rcu(key, &md5sig->head, node) {
9646797318eSIvan Delalande 		if (key->family != family)
9656797318eSIvan Delalande 			continue;
9666797318eSIvan Delalande 		if (!memcmp(&key->addr, addr, size) &&
9676797318eSIvan Delalande 		    key->prefixlen == prefixlen)
968a915da9bSEric Dumazet 			return key;
969cfb6eeb4SYOSHIFUJI Hideaki 	}
970cfb6eeb4SYOSHIFUJI Hideaki 	return NULL;
971cfb6eeb4SYOSHIFUJI Hideaki }
972cfb6eeb4SYOSHIFUJI Hideaki 
973b83e3debSEric Dumazet struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
974fd3a154aSEric Dumazet 					 const struct sock *addr_sk)
975cfb6eeb4SYOSHIFUJI Hideaki {
976b52e6921SEric Dumazet 	const union tcp_md5_addr *addr;
977a915da9bSEric Dumazet 
978b52e6921SEric Dumazet 	addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
979a915da9bSEric Dumazet 	return tcp_md5_do_lookup(sk, addr, AF_INET);
980cfb6eeb4SYOSHIFUJI Hideaki }
981cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_md5_lookup);
982cfb6eeb4SYOSHIFUJI Hideaki 
983cfb6eeb4SYOSHIFUJI Hideaki /* This can be called on a newly created socket, from other files */
984a915da9bSEric Dumazet int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
9856797318eSIvan Delalande 		   int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
9866797318eSIvan Delalande 		   gfp_t gfp)
987cfb6eeb4SYOSHIFUJI Hideaki {
988cfb6eeb4SYOSHIFUJI Hideaki 	/* Add Key to the list */
989b0a713e9SMatthias M. Dellweg 	struct tcp_md5sig_key *key;
990cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
991f6685938SArnaldo Carvalho de Melo 	struct tcp_md5sig_info *md5sig;
992f6685938SArnaldo Carvalho de Melo 
9936797318eSIvan Delalande 	key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
994a915da9bSEric Dumazet 	if (key) {
995a915da9bSEric Dumazet 		/* Pre-existing entry - just update that one. */
996a915da9bSEric Dumazet 		memcpy(key->key, newkey, newkeylen);
997a915da9bSEric Dumazet 		key->keylen = newkeylen;
998a915da9bSEric Dumazet 		return 0;
999cfb6eeb4SYOSHIFUJI Hideaki 	}
1000260fcbebSYan, Zheng 
1001a8afca03SEric Dumazet 	md5sig = rcu_dereference_protected(tp->md5sig_info,
10021e1d04e6SHannes Frederic Sowa 					   lockdep_sock_is_held(sk));
1003a915da9bSEric Dumazet 	if (!md5sig) {
1004a915da9bSEric Dumazet 		md5sig = kmalloc(sizeof(*md5sig), gfp);
1005a915da9bSEric Dumazet 		if (!md5sig)
1006a915da9bSEric Dumazet 			return -ENOMEM;
1007a915da9bSEric Dumazet 
1008a915da9bSEric Dumazet 		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1009a915da9bSEric Dumazet 		INIT_HLIST_HEAD(&md5sig->head);
1010a8afca03SEric Dumazet 		rcu_assign_pointer(tp->md5sig_info, md5sig);
1011a915da9bSEric Dumazet 	}
1012a915da9bSEric Dumazet 
10135f3d9cb2SEric Dumazet 	key = sock_kmalloc(sk, sizeof(*key), gfp);
1014a915da9bSEric Dumazet 	if (!key)
1015a915da9bSEric Dumazet 		return -ENOMEM;
101671cea17eSEric Dumazet 	if (!tcp_alloc_md5sig_pool()) {
10175f3d9cb2SEric Dumazet 		sock_kfree_s(sk, key, sizeof(*key));
1018cfb6eeb4SYOSHIFUJI Hideaki 		return -ENOMEM;
1019cfb6eeb4SYOSHIFUJI Hideaki 	}
1020f6685938SArnaldo Carvalho de Melo 
1021a915da9bSEric Dumazet 	memcpy(key->key, newkey, newkeylen);
1022a915da9bSEric Dumazet 	key->keylen = newkeylen;
1023a915da9bSEric Dumazet 	key->family = family;
10246797318eSIvan Delalande 	key->prefixlen = prefixlen;
1025a915da9bSEric Dumazet 	memcpy(&key->addr, addr,
1026a915da9bSEric Dumazet 	       (family == AF_INET6) ? sizeof(struct in6_addr) :
1027a915da9bSEric Dumazet 				      sizeof(struct in_addr));
1028a915da9bSEric Dumazet 	hlist_add_head_rcu(&key->node, &md5sig->head);
1029cfb6eeb4SYOSHIFUJI Hideaki 	return 0;
1030cfb6eeb4SYOSHIFUJI Hideaki }
1031a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_add);
1032cfb6eeb4SYOSHIFUJI Hideaki 
10336797318eSIvan Delalande int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
10346797318eSIvan Delalande 		   u8 prefixlen)
1035cfb6eeb4SYOSHIFUJI Hideaki {
1036a915da9bSEric Dumazet 	struct tcp_md5sig_key *key;
1037cfb6eeb4SYOSHIFUJI Hideaki 
10386797318eSIvan Delalande 	key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
1039a915da9bSEric Dumazet 	if (!key)
1040cfb6eeb4SYOSHIFUJI Hideaki 		return -ENOENT;
1041a915da9bSEric Dumazet 	hlist_del_rcu(&key->node);
10425f3d9cb2SEric Dumazet 	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1043a915da9bSEric Dumazet 	kfree_rcu(key, rcu);
1044a915da9bSEric Dumazet 	return 0;
1045cfb6eeb4SYOSHIFUJI Hideaki }
1046a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_del);
1047cfb6eeb4SYOSHIFUJI Hideaki 
1048e0683e70Sstephen hemminger static void tcp_clear_md5_list(struct sock *sk)
1049cfb6eeb4SYOSHIFUJI Hideaki {
1050cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
1051a915da9bSEric Dumazet 	struct tcp_md5sig_key *key;
1052b67bfe0dSSasha Levin 	struct hlist_node *n;
1053a8afca03SEric Dumazet 	struct tcp_md5sig_info *md5sig;
1054cfb6eeb4SYOSHIFUJI Hideaki 
1055a8afca03SEric Dumazet 	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1056a8afca03SEric Dumazet 
1057b67bfe0dSSasha Levin 	hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
1058a915da9bSEric Dumazet 		hlist_del_rcu(&key->node);
10595f3d9cb2SEric Dumazet 		atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1060a915da9bSEric Dumazet 		kfree_rcu(key, rcu);
1061cfb6eeb4SYOSHIFUJI Hideaki 	}
1062cfb6eeb4SYOSHIFUJI Hideaki }
1063cfb6eeb4SYOSHIFUJI Hideaki 
10648917a777SIvan Delalande static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
10658917a777SIvan Delalande 				 char __user *optval, int optlen)
1066cfb6eeb4SYOSHIFUJI Hideaki {
1067cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig cmd;
1068cfb6eeb4SYOSHIFUJI Hideaki 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
10698917a777SIvan Delalande 	u8 prefixlen = 32;
1070cfb6eeb4SYOSHIFUJI Hideaki 
1071cfb6eeb4SYOSHIFUJI Hideaki 	if (optlen < sizeof(cmd))
1072cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
1073cfb6eeb4SYOSHIFUJI Hideaki 
1074cfb6eeb4SYOSHIFUJI Hideaki 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
1075cfb6eeb4SYOSHIFUJI Hideaki 		return -EFAULT;
1076cfb6eeb4SYOSHIFUJI Hideaki 
1077cfb6eeb4SYOSHIFUJI Hideaki 	if (sin->sin_family != AF_INET)
1078cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
1079cfb6eeb4SYOSHIFUJI Hideaki 
10808917a777SIvan Delalande 	if (optname == TCP_MD5SIG_EXT &&
10818917a777SIvan Delalande 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
10828917a777SIvan Delalande 		prefixlen = cmd.tcpm_prefixlen;
10838917a777SIvan Delalande 		if (prefixlen > 32)
10848917a777SIvan Delalande 			return -EINVAL;
10858917a777SIvan Delalande 	}
10868917a777SIvan Delalande 
108764a124edSDmitry Popov 	if (!cmd.tcpm_keylen)
1088a915da9bSEric Dumazet 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
10898917a777SIvan Delalande 				      AF_INET, prefixlen);
1090cfb6eeb4SYOSHIFUJI Hideaki 
1091cfb6eeb4SYOSHIFUJI Hideaki 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1092cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
1093cfb6eeb4SYOSHIFUJI Hideaki 
1094a915da9bSEric Dumazet 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
10958917a777SIvan Delalande 			      AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen,
1096a915da9bSEric Dumazet 			      GFP_KERNEL);
1097cfb6eeb4SYOSHIFUJI Hideaki }
1098cfb6eeb4SYOSHIFUJI Hideaki 
109919689e38SEric Dumazet static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
110019689e38SEric Dumazet 				   __be32 daddr, __be32 saddr,
110119689e38SEric Dumazet 				   const struct tcphdr *th, int nbytes)
1102cfb6eeb4SYOSHIFUJI Hideaki {
1103cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp4_pseudohdr *bp;
110449a72dfbSAdam Langley 	struct scatterlist sg;
110519689e38SEric Dumazet 	struct tcphdr *_th;
1106cfb6eeb4SYOSHIFUJI Hideaki 
110719689e38SEric Dumazet 	bp = hp->scratch;
1108cfb6eeb4SYOSHIFUJI Hideaki 	bp->saddr = saddr;
1109cfb6eeb4SYOSHIFUJI Hideaki 	bp->daddr = daddr;
1110cfb6eeb4SYOSHIFUJI Hideaki 	bp->pad = 0;
1111076fb722SYOSHIFUJI Hideaki 	bp->protocol = IPPROTO_TCP;
111249a72dfbSAdam Langley 	bp->len = cpu_to_be16(nbytes);
1113c7da57a1SDavid S. Miller 
111419689e38SEric Dumazet 	_th = (struct tcphdr *)(bp + 1);
111519689e38SEric Dumazet 	memcpy(_th, th, sizeof(*th));
111619689e38SEric Dumazet 	_th->check = 0;
111719689e38SEric Dumazet 
111819689e38SEric Dumazet 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
111919689e38SEric Dumazet 	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
112019689e38SEric Dumazet 				sizeof(*bp) + sizeof(*th));
1121cf80e0e4SHerbert Xu 	return crypto_ahash_update(hp->md5_req);
112249a72dfbSAdam Langley }
112349a72dfbSAdam Langley 
1124a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1125318cf7aaSEric Dumazet 			       __be32 daddr, __be32 saddr, const struct tcphdr *th)
112649a72dfbSAdam Langley {
112749a72dfbSAdam Langley 	struct tcp_md5sig_pool *hp;
1128cf80e0e4SHerbert Xu 	struct ahash_request *req;
112949a72dfbSAdam Langley 
113049a72dfbSAdam Langley 	hp = tcp_get_md5sig_pool();
113149a72dfbSAdam Langley 	if (!hp)
113249a72dfbSAdam Langley 		goto clear_hash_noput;
1133cf80e0e4SHerbert Xu 	req = hp->md5_req;
113449a72dfbSAdam Langley 
1135cf80e0e4SHerbert Xu 	if (crypto_ahash_init(req))
113649a72dfbSAdam Langley 		goto clear_hash;
113719689e38SEric Dumazet 	if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
113849a72dfbSAdam Langley 		goto clear_hash;
113949a72dfbSAdam Langley 	if (tcp_md5_hash_key(hp, key))
114049a72dfbSAdam Langley 		goto clear_hash;
1141cf80e0e4SHerbert Xu 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
1142cf80e0e4SHerbert Xu 	if (crypto_ahash_final(req))
1143cfb6eeb4SYOSHIFUJI Hideaki 		goto clear_hash;
1144cfb6eeb4SYOSHIFUJI Hideaki 
1145cfb6eeb4SYOSHIFUJI Hideaki 	tcp_put_md5sig_pool();
1146cfb6eeb4SYOSHIFUJI Hideaki 	return 0;
114749a72dfbSAdam Langley 
1148cfb6eeb4SYOSHIFUJI Hideaki clear_hash:
1149cfb6eeb4SYOSHIFUJI Hideaki 	tcp_put_md5sig_pool();
1150cfb6eeb4SYOSHIFUJI Hideaki clear_hash_noput:
1151cfb6eeb4SYOSHIFUJI Hideaki 	memset(md5_hash, 0, 16);
115249a72dfbSAdam Langley 	return 1;
1153cfb6eeb4SYOSHIFUJI Hideaki }
1154cfb6eeb4SYOSHIFUJI Hideaki 
115539f8e58eSEric Dumazet int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
115639f8e58eSEric Dumazet 			const struct sock *sk,
1157318cf7aaSEric Dumazet 			const struct sk_buff *skb)
1158cfb6eeb4SYOSHIFUJI Hideaki {
115949a72dfbSAdam Langley 	struct tcp_md5sig_pool *hp;
1160cf80e0e4SHerbert Xu 	struct ahash_request *req;
1161318cf7aaSEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
1162cfb6eeb4SYOSHIFUJI Hideaki 	__be32 saddr, daddr;
1163cfb6eeb4SYOSHIFUJI Hideaki 
116439f8e58eSEric Dumazet 	if (sk) { /* valid for establish/request sockets */
116539f8e58eSEric Dumazet 		saddr = sk->sk_rcv_saddr;
116639f8e58eSEric Dumazet 		daddr = sk->sk_daddr;
1167cfb6eeb4SYOSHIFUJI Hideaki 	} else {
116849a72dfbSAdam Langley 		const struct iphdr *iph = ip_hdr(skb);
116949a72dfbSAdam Langley 		saddr = iph->saddr;
117049a72dfbSAdam Langley 		daddr = iph->daddr;
1171cfb6eeb4SYOSHIFUJI Hideaki 	}
1172cfb6eeb4SYOSHIFUJI Hideaki 
117349a72dfbSAdam Langley 	hp = tcp_get_md5sig_pool();
117449a72dfbSAdam Langley 	if (!hp)
117549a72dfbSAdam Langley 		goto clear_hash_noput;
1176cf80e0e4SHerbert Xu 	req = hp->md5_req;
117749a72dfbSAdam Langley 
1178cf80e0e4SHerbert Xu 	if (crypto_ahash_init(req))
117949a72dfbSAdam Langley 		goto clear_hash;
118049a72dfbSAdam Langley 
118119689e38SEric Dumazet 	if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
118249a72dfbSAdam Langley 		goto clear_hash;
118349a72dfbSAdam Langley 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
118449a72dfbSAdam Langley 		goto clear_hash;
118549a72dfbSAdam Langley 	if (tcp_md5_hash_key(hp, key))
118649a72dfbSAdam Langley 		goto clear_hash;
1187cf80e0e4SHerbert Xu 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
1188cf80e0e4SHerbert Xu 	if (crypto_ahash_final(req))
118949a72dfbSAdam Langley 		goto clear_hash;
119049a72dfbSAdam Langley 
119149a72dfbSAdam Langley 	tcp_put_md5sig_pool();
119249a72dfbSAdam Langley 	return 0;
119349a72dfbSAdam Langley 
119449a72dfbSAdam Langley clear_hash:
119549a72dfbSAdam Langley 	tcp_put_md5sig_pool();
119649a72dfbSAdam Langley clear_hash_noput:
119749a72dfbSAdam Langley 	memset(md5_hash, 0, 16);
119849a72dfbSAdam Langley 	return 1;
119949a72dfbSAdam Langley }
120049a72dfbSAdam Langley EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1201cfb6eeb4SYOSHIFUJI Hideaki 
1202ba8e275aSEric Dumazet #endif
1203ba8e275aSEric Dumazet 
1204ff74e23fSEric Dumazet /* Called with rcu_read_lock() */
1205ba8e275aSEric Dumazet static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
12069ea88a15SDmitry Popov 				    const struct sk_buff *skb)
1207cfb6eeb4SYOSHIFUJI Hideaki {
1208ba8e275aSEric Dumazet #ifdef CONFIG_TCP_MD5SIG
1209cfb6eeb4SYOSHIFUJI Hideaki 	/*
1210cfb6eeb4SYOSHIFUJI Hideaki 	 * This gets called for each TCP segment that arrives
1211cfb6eeb4SYOSHIFUJI Hideaki 	 * so we want to be efficient.
1212cfb6eeb4SYOSHIFUJI Hideaki 	 * We have 3 drop cases:
1213cfb6eeb4SYOSHIFUJI Hideaki 	 * o No MD5 hash and one expected.
1214cfb6eeb4SYOSHIFUJI Hideaki 	 * o MD5 hash and we're not expecting one.
1215cfb6eeb4SYOSHIFUJI Hideaki 	 * o MD5 hash and its wrong.
1216cfb6eeb4SYOSHIFUJI Hideaki 	 */
1217cf533ea5SEric Dumazet 	const __u8 *hash_location = NULL;
1218cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *hash_expected;
1219eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
1220cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
1221cfb6eeb4SYOSHIFUJI Hideaki 	int genhash;
1222cfb6eeb4SYOSHIFUJI Hideaki 	unsigned char newhash[16];
1223cfb6eeb4SYOSHIFUJI Hideaki 
1224a915da9bSEric Dumazet 	hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1225a915da9bSEric Dumazet 					  AF_INET);
12267d5d5525SYOSHIFUJI Hideaki 	hash_location = tcp_parse_md5sig_option(th);
1227cfb6eeb4SYOSHIFUJI Hideaki 
1228cfb6eeb4SYOSHIFUJI Hideaki 	/* We've parsed the options - do we have a hash? */
1229cfb6eeb4SYOSHIFUJI Hideaki 	if (!hash_expected && !hash_location)
1230a2a385d6SEric Dumazet 		return false;
1231cfb6eeb4SYOSHIFUJI Hideaki 
1232cfb6eeb4SYOSHIFUJI Hideaki 	if (hash_expected && !hash_location) {
1233c10d9310SEric Dumazet 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1234a2a385d6SEric Dumazet 		return true;
1235cfb6eeb4SYOSHIFUJI Hideaki 	}
1236cfb6eeb4SYOSHIFUJI Hideaki 
1237cfb6eeb4SYOSHIFUJI Hideaki 	if (!hash_expected && hash_location) {
1238c10d9310SEric Dumazet 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1239a2a385d6SEric Dumazet 		return true;
1240cfb6eeb4SYOSHIFUJI Hideaki 	}
1241cfb6eeb4SYOSHIFUJI Hideaki 
1242cfb6eeb4SYOSHIFUJI Hideaki 	/* Okay, so this is hash_expected and hash_location -
1243cfb6eeb4SYOSHIFUJI Hideaki 	 * so we need to calculate the checksum.
1244cfb6eeb4SYOSHIFUJI Hideaki 	 */
124549a72dfbSAdam Langley 	genhash = tcp_v4_md5_hash_skb(newhash,
1246cfb6eeb4SYOSHIFUJI Hideaki 				      hash_expected,
124739f8e58eSEric Dumazet 				      NULL, skb);
1248cfb6eeb4SYOSHIFUJI Hideaki 
1249cfb6eeb4SYOSHIFUJI Hideaki 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
125072145a68SEric Dumazet 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
1251e87cc472SJoe Perches 		net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1252673d57e7SHarvey Harrison 				     &iph->saddr, ntohs(th->source),
1253673d57e7SHarvey Harrison 				     &iph->daddr, ntohs(th->dest),
1254e87cc472SJoe Perches 				     genhash ? " tcp_v4_calc_md5_hash failed"
1255e87cc472SJoe Perches 				     : "");
1256a2a385d6SEric Dumazet 		return true;
1257cfb6eeb4SYOSHIFUJI Hideaki 	}
1258a2a385d6SEric Dumazet 	return false;
1259cfb6eeb4SYOSHIFUJI Hideaki #endif
1260ba8e275aSEric Dumazet 	return false;
1261ba8e275aSEric Dumazet }
1262cfb6eeb4SYOSHIFUJI Hideaki 
1263b40cf18eSEric Dumazet static void tcp_v4_init_req(struct request_sock *req,
1264b40cf18eSEric Dumazet 			    const struct sock *sk_listener,
126516bea70aSOctavian Purdila 			    struct sk_buff *skb)
126616bea70aSOctavian Purdila {
126716bea70aSOctavian Purdila 	struct inet_request_sock *ireq = inet_rsk(req);
126816bea70aSOctavian Purdila 
126908d2cc3bSEric Dumazet 	sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
127008d2cc3bSEric Dumazet 	sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
127191ed1e66SPaolo Abeni 	ireq->opt = tcp_v4_save_options(sock_net(sk_listener), skb);
127216bea70aSOctavian Purdila }
127316bea70aSOctavian Purdila 
1274f964629eSEric Dumazet static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
1275f964629eSEric Dumazet 					  struct flowi *fl,
12764396e461SSoheil Hassas Yeganeh 					  const struct request_sock *req)
1277d94e0417SOctavian Purdila {
12784396e461SSoheil Hassas Yeganeh 	return inet_csk_route_req(sk, &fl->u.ip4, req);
1279d94e0417SOctavian Purdila }
1280d94e0417SOctavian Purdila 
128172a3effaSEric Dumazet struct request_sock_ops tcp_request_sock_ops __read_mostly = {
12821da177e4SLinus Torvalds 	.family		=	PF_INET,
12832e6599cbSArnaldo Carvalho de Melo 	.obj_size	=	sizeof(struct tcp_request_sock),
12845db92c99SOctavian Purdila 	.rtx_syn_ack	=	tcp_rtx_synack,
128560236fddSArnaldo Carvalho de Melo 	.send_ack	=	tcp_v4_reqsk_send_ack,
128660236fddSArnaldo Carvalho de Melo 	.destructor	=	tcp_v4_reqsk_destructor,
12871da177e4SLinus Torvalds 	.send_reset	=	tcp_v4_send_reset,
128872659eccSOctavian Purdila 	.syn_ack_timeout =	tcp_syn_ack_timeout,
12891da177e4SLinus Torvalds };
12901da177e4SLinus Torvalds 
1291b2e4b3deSStephen Hemminger static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
12922aec4a29SOctavian Purdila 	.mss_clamp	=	TCP_MSS_DEFAULT,
129316bea70aSOctavian Purdila #ifdef CONFIG_TCP_MD5SIG
1294fd3a154aSEric Dumazet 	.req_md5_lookup	=	tcp_v4_md5_lookup,
1295e3afe7b7SJohn Dykstra 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1296b6332e6cSAndrew Morton #endif
129716bea70aSOctavian Purdila 	.init_req	=	tcp_v4_init_req,
1298fb7b37a7SOctavian Purdila #ifdef CONFIG_SYN_COOKIES
1299fb7b37a7SOctavian Purdila 	.cookie_init_seq =	cookie_v4_init_sequence,
1300fb7b37a7SOctavian Purdila #endif
1301d94e0417SOctavian Purdila 	.route_req	=	tcp_v4_route_req,
130284b114b9SEric Dumazet 	.init_seq	=	tcp_v4_init_seq,
130384b114b9SEric Dumazet 	.init_ts_off	=	tcp_v4_init_ts_off,
1304d6274bd8SOctavian Purdila 	.send_synack	=	tcp_v4_send_synack,
130516bea70aSOctavian Purdila };
1306cfb6eeb4SYOSHIFUJI Hideaki 
13071da177e4SLinus Torvalds int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
13081da177e4SLinus Torvalds {
13091da177e4SLinus Torvalds 	/* Never answer to SYNs send to broadcast or multicast */
1310511c3f92SEric Dumazet 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
13111da177e4SLinus Torvalds 		goto drop;
13121da177e4SLinus Torvalds 
13131fb6f159SOctavian Purdila 	return tcp_conn_request(&tcp_request_sock_ops,
13141fb6f159SOctavian Purdila 				&tcp_request_sock_ipv4_ops, sk, skb);
13151da177e4SLinus Torvalds 
13161da177e4SLinus Torvalds drop:
13179caad864SEric Dumazet 	tcp_listendrop(sk);
13181da177e4SLinus Torvalds 	return 0;
13191da177e4SLinus Torvalds }
13204bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_conn_request);
13211da177e4SLinus Torvalds 
13221da177e4SLinus Torvalds 
13231da177e4SLinus Torvalds /*
13241da177e4SLinus Torvalds  * The three way handshake has completed - we got a valid synack -
13251da177e4SLinus Torvalds  * now create the new socket.
13261da177e4SLinus Torvalds  */
13270c27171eSEric Dumazet struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
132860236fddSArnaldo Carvalho de Melo 				  struct request_sock *req,
13295e0724d0SEric Dumazet 				  struct dst_entry *dst,
13305e0724d0SEric Dumazet 				  struct request_sock *req_unhash,
13315e0724d0SEric Dumazet 				  bool *own_req)
13321da177e4SLinus Torvalds {
13332e6599cbSArnaldo Carvalho de Melo 	struct inet_request_sock *ireq;
13341da177e4SLinus Torvalds 	struct inet_sock *newinet;
13351da177e4SLinus Torvalds 	struct tcp_sock *newtp;
13361da177e4SLinus Torvalds 	struct sock *newsk;
1337cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1338cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *key;
1339cfb6eeb4SYOSHIFUJI Hideaki #endif
1340f6d8bd05SEric Dumazet 	struct ip_options_rcu *inet_opt;
13411da177e4SLinus Torvalds 
13421da177e4SLinus Torvalds 	if (sk_acceptq_is_full(sk))
13431da177e4SLinus Torvalds 		goto exit_overflow;
13441da177e4SLinus Torvalds 
13451da177e4SLinus Torvalds 	newsk = tcp_create_openreq_child(sk, req, skb);
13461da177e4SLinus Torvalds 	if (!newsk)
1347093d2823SBalazs Scheidler 		goto exit_nonewsk;
13481da177e4SLinus Torvalds 
1349bcd76111SHerbert Xu 	newsk->sk_gso_type = SKB_GSO_TCPV4;
1350fae6ef87SNeal Cardwell 	inet_sk_rx_dst_set(newsk, skb);
13511da177e4SLinus Torvalds 
13521da177e4SLinus Torvalds 	newtp		      = tcp_sk(newsk);
13531da177e4SLinus Torvalds 	newinet		      = inet_sk(newsk);
13542e6599cbSArnaldo Carvalho de Melo 	ireq		      = inet_rsk(req);
1355d1e559d0SEric Dumazet 	sk_daddr_set(newsk, ireq->ir_rmt_addr);
1356d1e559d0SEric Dumazet 	sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
13576dd9a14eSDavid Ahern 	newsk->sk_bound_dev_if = ireq->ir_iif;
1358634fb979SEric Dumazet 	newinet->inet_saddr	      = ireq->ir_loc_addr;
1359f6d8bd05SEric Dumazet 	inet_opt	      = ireq->opt;
1360f6d8bd05SEric Dumazet 	rcu_assign_pointer(newinet->inet_opt, inet_opt);
13612e6599cbSArnaldo Carvalho de Melo 	ireq->opt	      = NULL;
1362463c84b9SArnaldo Carvalho de Melo 	newinet->mc_index     = inet_iif(skb);
1363eddc9ec5SArnaldo Carvalho de Melo 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
13644c507d28SJiri Benc 	newinet->rcv_tos      = ip_hdr(skb)->tos;
1365d83d8461SArnaldo Carvalho de Melo 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1366f6d8bd05SEric Dumazet 	if (inet_opt)
1367f6d8bd05SEric Dumazet 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1368c720c7e8SEric Dumazet 	newinet->inet_id = newtp->write_seq ^ jiffies;
13691da177e4SLinus Torvalds 
1370dfd25fffSEric Dumazet 	if (!dst) {
1371dfd25fffSEric Dumazet 		dst = inet_csk_route_child_sock(sk, newsk, req);
1372dfd25fffSEric Dumazet 		if (!dst)
13730e734419SDavid S. Miller 			goto put_and_exit;
1374dfd25fffSEric Dumazet 	} else {
1375dfd25fffSEric Dumazet 		/* syncookie case : see end of cookie_v4_check() */
1376dfd25fffSEric Dumazet 	}
13770e734419SDavid S. Miller 	sk_setup_caps(newsk, dst);
13780e734419SDavid S. Miller 
137981164413SDaniel Borkmann 	tcp_ca_openreq_child(newsk, dst);
138081164413SDaniel Borkmann 
13811da177e4SLinus Torvalds 	tcp_sync_mss(newsk, dst_mtu(dst));
13823541f9e8SEric Dumazet 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1383f5fff5dcSTom Quetchenbach 
13841da177e4SLinus Torvalds 	tcp_initialize_rcv_mss(newsk);
13851da177e4SLinus Torvalds 
1386cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1387cfb6eeb4SYOSHIFUJI Hideaki 	/* Copy over the MD5 key from the original socket */
1388a915da9bSEric Dumazet 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1389a915da9bSEric Dumazet 				AF_INET);
139000db4124SIan Morris 	if (key) {
1391cfb6eeb4SYOSHIFUJI Hideaki 		/*
1392cfb6eeb4SYOSHIFUJI Hideaki 		 * We're using one, so create a matching key
1393cfb6eeb4SYOSHIFUJI Hideaki 		 * on the newsk structure. If we fail to get
1394cfb6eeb4SYOSHIFUJI Hideaki 		 * memory, then we end up not copying the key
1395cfb6eeb4SYOSHIFUJI Hideaki 		 * across. Shucks.
1396cfb6eeb4SYOSHIFUJI Hideaki 		 */
1397a915da9bSEric Dumazet 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
13986797318eSIvan Delalande 			       AF_INET, 32, key->key, key->keylen, GFP_ATOMIC);
1399a465419bSEric Dumazet 		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1400cfb6eeb4SYOSHIFUJI Hideaki 	}
1401cfb6eeb4SYOSHIFUJI Hideaki #endif
1402cfb6eeb4SYOSHIFUJI Hideaki 
14030e734419SDavid S. Miller 	if (__inet_inherit_port(sk, newsk) < 0)
14040e734419SDavid S. Miller 		goto put_and_exit;
14055e0724d0SEric Dumazet 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1406805c4bc0SEric Dumazet 	if (*own_req)
140749a496c9SEric Dumazet 		tcp_move_syn(newtp, req);
14081da177e4SLinus Torvalds 
14091da177e4SLinus Torvalds 	return newsk;
14101da177e4SLinus Torvalds 
14111da177e4SLinus Torvalds exit_overflow:
1412c10d9310SEric Dumazet 	NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1413093d2823SBalazs Scheidler exit_nonewsk:
1414093d2823SBalazs Scheidler 	dst_release(dst);
14151da177e4SLinus Torvalds exit:
14169caad864SEric Dumazet 	tcp_listendrop(sk);
14171da177e4SLinus Torvalds 	return NULL;
14180e734419SDavid S. Miller put_and_exit:
1419e337e24dSChristoph Paasch 	inet_csk_prepare_forced_close(newsk);
1420e337e24dSChristoph Paasch 	tcp_done(newsk);
14210e734419SDavid S. Miller 	goto exit;
14221da177e4SLinus Torvalds }
14234bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
14241da177e4SLinus Torvalds 
1425079096f1SEric Dumazet static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
14261da177e4SLinus Torvalds {
14271da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES
1428079096f1SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
1429079096f1SEric Dumazet 
1430af9b4738SFlorian Westphal 	if (!th->syn)
1431461b74c3SCong Wang 		sk = cookie_v4_check(sk, skb);
14321da177e4SLinus Torvalds #endif
14331da177e4SLinus Torvalds 	return sk;
14341da177e4SLinus Torvalds }
14351da177e4SLinus Torvalds 
14361da177e4SLinus Torvalds /* The socket must have it's spinlock held when we get
1437e994b2f0SEric Dumazet  * here, unless it is a TCP_LISTEN socket.
14381da177e4SLinus Torvalds  *
14391da177e4SLinus Torvalds  * We have a potential double-lock case here, so even when
14401da177e4SLinus Torvalds  * doing backlog processing we use the BH locking scheme.
14411da177e4SLinus Torvalds  * This is because we cannot sleep with the original spinlock
14421da177e4SLinus Torvalds  * held.
14431da177e4SLinus Torvalds  */
14441da177e4SLinus Torvalds int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
14451da177e4SLinus Torvalds {
1446cfb6eeb4SYOSHIFUJI Hideaki 	struct sock *rsk;
1447cfb6eeb4SYOSHIFUJI Hideaki 
14481da177e4SLinus Torvalds 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
144992101b3bSDavid S. Miller 		struct dst_entry *dst = sk->sk_rx_dst;
1450404e0a8bSEric Dumazet 
1451404e0a8bSEric Dumazet 		sock_rps_save_rxhash(sk, skb);
14523d97379aSEric Dumazet 		sk_mark_napi_id(sk, skb);
1453404e0a8bSEric Dumazet 		if (dst) {
1454505fbcf0SEric Dumazet 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
145551456b29SIan Morris 			    !dst->ops->check(dst, 0)) {
145692101b3bSDavid S. Miller 				dst_release(dst);
145792101b3bSDavid S. Miller 				sk->sk_rx_dst = NULL;
145892101b3bSDavid S. Miller 			}
145992101b3bSDavid S. Miller 		}
1460e42e24c3SMatvejchikov Ilya 		tcp_rcv_established(sk, skb, tcp_hdr(skb));
14611da177e4SLinus Torvalds 		return 0;
14621da177e4SLinus Torvalds 	}
14631da177e4SLinus Torvalds 
146412e25e10SEric Dumazet 	if (tcp_checksum_complete(skb))
14651da177e4SLinus Torvalds 		goto csum_err;
14661da177e4SLinus Torvalds 
14671da177e4SLinus Torvalds 	if (sk->sk_state == TCP_LISTEN) {
1468079096f1SEric Dumazet 		struct sock *nsk = tcp_v4_cookie_check(sk, skb);
1469079096f1SEric Dumazet 
14701da177e4SLinus Torvalds 		if (!nsk)
14711da177e4SLinus Torvalds 			goto discard;
14721da177e4SLinus Torvalds 		if (nsk != sk) {
1473cfb6eeb4SYOSHIFUJI Hideaki 			if (tcp_child_process(sk, nsk, skb)) {
1474cfb6eeb4SYOSHIFUJI Hideaki 				rsk = nsk;
14751da177e4SLinus Torvalds 				goto reset;
1476cfb6eeb4SYOSHIFUJI Hideaki 			}
14771da177e4SLinus Torvalds 			return 0;
14781da177e4SLinus Torvalds 		}
1479ca55158cSEric Dumazet 	} else
1480bdeab991STom Herbert 		sock_rps_save_rxhash(sk, skb);
1481ca55158cSEric Dumazet 
148272ab4a86SEric Dumazet 	if (tcp_rcv_state_process(sk, skb)) {
1483cfb6eeb4SYOSHIFUJI Hideaki 		rsk = sk;
14841da177e4SLinus Torvalds 		goto reset;
1485cfb6eeb4SYOSHIFUJI Hideaki 	}
14861da177e4SLinus Torvalds 	return 0;
14871da177e4SLinus Torvalds 
14881da177e4SLinus Torvalds reset:
1489cfb6eeb4SYOSHIFUJI Hideaki 	tcp_v4_send_reset(rsk, skb);
14901da177e4SLinus Torvalds discard:
14911da177e4SLinus Torvalds 	kfree_skb(skb);
14921da177e4SLinus Torvalds 	/* Be careful here. If this function gets more complicated and
14931da177e4SLinus Torvalds 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
14941da177e4SLinus Torvalds 	 * might be destroyed here. This current version compiles correctly,
14951da177e4SLinus Torvalds 	 * but you have been warned.
14961da177e4SLinus Torvalds 	 */
14971da177e4SLinus Torvalds 	return 0;
14981da177e4SLinus Torvalds 
14991da177e4SLinus Torvalds csum_err:
1500c10d9310SEric Dumazet 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1501c10d9310SEric Dumazet 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
15021da177e4SLinus Torvalds 	goto discard;
15031da177e4SLinus Torvalds }
15044bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_do_rcv);
15051da177e4SLinus Torvalds 
1506160eb5a6SDavid S. Miller void tcp_v4_early_demux(struct sk_buff *skb)
150741063e9dSDavid S. Miller {
150841063e9dSDavid S. Miller 	const struct iphdr *iph;
150941063e9dSDavid S. Miller 	const struct tcphdr *th;
151041063e9dSDavid S. Miller 	struct sock *sk;
151141063e9dSDavid S. Miller 
151241063e9dSDavid S. Miller 	if (skb->pkt_type != PACKET_HOST)
1513160eb5a6SDavid S. Miller 		return;
151441063e9dSDavid S. Miller 
151545f00f99SEric Dumazet 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1516160eb5a6SDavid S. Miller 		return;
151741063e9dSDavid S. Miller 
151841063e9dSDavid S. Miller 	iph = ip_hdr(skb);
151945f00f99SEric Dumazet 	th = tcp_hdr(skb);
152041063e9dSDavid S. Miller 
152141063e9dSDavid S. Miller 	if (th->doff < sizeof(struct tcphdr) / 4)
1522160eb5a6SDavid S. Miller 		return;
152341063e9dSDavid S. Miller 
152445f00f99SEric Dumazet 	sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
152541063e9dSDavid S. Miller 				       iph->saddr, th->source,
15267011d085SVijay Subramanian 				       iph->daddr, ntohs(th->dest),
15273fa6f616SDavid Ahern 				       skb->skb_iif, inet_sdif(skb));
152841063e9dSDavid S. Miller 	if (sk) {
152941063e9dSDavid S. Miller 		skb->sk = sk;
153041063e9dSDavid S. Miller 		skb->destructor = sock_edemux;
1531f7e4eb03SEric Dumazet 		if (sk_fullsock(sk)) {
1532d0c294c5SMichal Kubeček 			struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1533505fbcf0SEric Dumazet 
153441063e9dSDavid S. Miller 			if (dst)
153541063e9dSDavid S. Miller 				dst = dst_check(dst, 0);
153692101b3bSDavid S. Miller 			if (dst &&
1537505fbcf0SEric Dumazet 			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
153841063e9dSDavid S. Miller 				skb_dst_set_noref(skb, dst);
153941063e9dSDavid S. Miller 		}
154041063e9dSDavid S. Miller 	}
154141063e9dSDavid S. Miller }
154241063e9dSDavid S. Miller 
1543c9c33212SEric Dumazet bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
1544c9c33212SEric Dumazet {
1545c9c33212SEric Dumazet 	u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
1546c9c33212SEric Dumazet 
1547c9c33212SEric Dumazet 	/* Only socket owner can try to collapse/prune rx queues
1548c9c33212SEric Dumazet 	 * to reduce memory overhead, so add a little headroom here.
1549c9c33212SEric Dumazet 	 * Few sockets backlog are possibly concurrently non empty.
1550c9c33212SEric Dumazet 	 */
1551c9c33212SEric Dumazet 	limit += 64*1024;
1552c9c33212SEric Dumazet 
1553c9c33212SEric Dumazet 	/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
1554c9c33212SEric Dumazet 	 * we can fix skb->truesize to its real value to avoid future drops.
1555c9c33212SEric Dumazet 	 * This is valid because skb is not yet charged to the socket.
1556c9c33212SEric Dumazet 	 * It has been noticed pure SACK packets were sometimes dropped
1557c9c33212SEric Dumazet 	 * (if cooked by drivers without copybreak feature).
1558c9c33212SEric Dumazet 	 */
155960b1af33SEric Dumazet 	skb_condense(skb);
1560c9c33212SEric Dumazet 
1561c9c33212SEric Dumazet 	if (unlikely(sk_add_backlog(sk, skb, limit))) {
1562c9c33212SEric Dumazet 		bh_unlock_sock(sk);
1563c9c33212SEric Dumazet 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
1564c9c33212SEric Dumazet 		return true;
1565c9c33212SEric Dumazet 	}
1566c9c33212SEric Dumazet 	return false;
1567c9c33212SEric Dumazet }
1568c9c33212SEric Dumazet EXPORT_SYMBOL(tcp_add_backlog);
1569c9c33212SEric Dumazet 
1570ac6e7800SEric Dumazet int tcp_filter(struct sock *sk, struct sk_buff *skb)
1571ac6e7800SEric Dumazet {
1572ac6e7800SEric Dumazet 	struct tcphdr *th = (struct tcphdr *)skb->data;
1573ac6e7800SEric Dumazet 	unsigned int eaten = skb->len;
1574ac6e7800SEric Dumazet 	int err;
1575ac6e7800SEric Dumazet 
1576ac6e7800SEric Dumazet 	err = sk_filter_trim_cap(sk, skb, th->doff * 4);
1577ac6e7800SEric Dumazet 	if (!err) {
1578ac6e7800SEric Dumazet 		eaten -= skb->len;
1579ac6e7800SEric Dumazet 		TCP_SKB_CB(skb)->end_seq -= eaten;
1580ac6e7800SEric Dumazet 	}
1581ac6e7800SEric Dumazet 	return err;
1582ac6e7800SEric Dumazet }
1583ac6e7800SEric Dumazet EXPORT_SYMBOL(tcp_filter);
1584ac6e7800SEric Dumazet 
15851da177e4SLinus Torvalds /*
15861da177e4SLinus Torvalds  *	From tcp_input.c
15871da177e4SLinus Torvalds  */
15881da177e4SLinus Torvalds 
15891da177e4SLinus Torvalds int tcp_v4_rcv(struct sk_buff *skb)
15901da177e4SLinus Torvalds {
15913b24d854SEric Dumazet 	struct net *net = dev_net(skb->dev);
15923fa6f616SDavid Ahern 	int sdif = inet_sdif(skb);
1593eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph;
1594cf533ea5SEric Dumazet 	const struct tcphdr *th;
15953b24d854SEric Dumazet 	bool refcounted;
15961da177e4SLinus Torvalds 	struct sock *sk;
15971da177e4SLinus Torvalds 	int ret;
15981da177e4SLinus Torvalds 
15991da177e4SLinus Torvalds 	if (skb->pkt_type != PACKET_HOST)
16001da177e4SLinus Torvalds 		goto discard_it;
16011da177e4SLinus Torvalds 
16021da177e4SLinus Torvalds 	/* Count it even if it's bad */
160390bbcc60SEric Dumazet 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
16041da177e4SLinus Torvalds 
16051da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
16061da177e4SLinus Torvalds 		goto discard_it;
16071da177e4SLinus Torvalds 
1608ea1627c2SEric Dumazet 	th = (const struct tcphdr *)skb->data;
16091da177e4SLinus Torvalds 
1610ea1627c2SEric Dumazet 	if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
16111da177e4SLinus Torvalds 		goto bad_packet;
16121da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, th->doff * 4))
16131da177e4SLinus Torvalds 		goto discard_it;
16141da177e4SLinus Torvalds 
16151da177e4SLinus Torvalds 	/* An explanation is required here, I think.
16161da177e4SLinus Torvalds 	 * Packet length and doff are validated by header prediction,
1617caa20d9aSStephen Hemminger 	 * provided case of th->doff==0 is eliminated.
16181da177e4SLinus Torvalds 	 * So, we defer the checks. */
1619ed70fcfcSTom Herbert 
1620ed70fcfcSTom Herbert 	if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
16216a5dc9e5SEric Dumazet 		goto csum_error;
16221da177e4SLinus Torvalds 
1623ea1627c2SEric Dumazet 	th = (const struct tcphdr *)skb->data;
1624eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
1625971f10ecSEric Dumazet 	/* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
1626971f10ecSEric Dumazet 	 * barrier() makes sure compiler wont play fool^Waliasing games.
1627971f10ecSEric Dumazet 	 */
1628971f10ecSEric Dumazet 	memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1629971f10ecSEric Dumazet 		sizeof(struct inet_skb_parm));
1630971f10ecSEric Dumazet 	barrier();
1631971f10ecSEric Dumazet 
16321da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
16331da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
16341da177e4SLinus Torvalds 				    skb->len - th->doff * 4);
16351da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1636e11ecddfSEric Dumazet 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
163704317dafSEric Dumazet 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1638b82d1bb4SEric Dumazet 	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
16391da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->sacked	 = 0;
164098aaa913SMike Maloney 	TCP_SKB_CB(skb)->has_rxtstamp =
164198aaa913SMike Maloney 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
16421da177e4SLinus Torvalds 
16434bdc3d66SEric Dumazet lookup:
1644a583636aSCraig Gallek 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
16453fa6f616SDavid Ahern 			       th->dest, sdif, &refcounted);
16461da177e4SLinus Torvalds 	if (!sk)
16471da177e4SLinus Torvalds 		goto no_tcp_socket;
16481da177e4SLinus Torvalds 
1649bb134d5dSEric Dumazet process:
1650bb134d5dSEric Dumazet 	if (sk->sk_state == TCP_TIME_WAIT)
1651bb134d5dSEric Dumazet 		goto do_time_wait;
1652bb134d5dSEric Dumazet 
1653079096f1SEric Dumazet 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1654079096f1SEric Dumazet 		struct request_sock *req = inet_reqsk(sk);
16557716682cSEric Dumazet 		struct sock *nsk;
1656079096f1SEric Dumazet 
1657079096f1SEric Dumazet 		sk = req->rsk_listener;
165872923555SEric Dumazet 		if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
1659e65c332dSEric Dumazet 			sk_drops_add(sk, skb);
166072923555SEric Dumazet 			reqsk_put(req);
166172923555SEric Dumazet 			goto discard_it;
166272923555SEric Dumazet 		}
16637716682cSEric Dumazet 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1664f03f2e15SEric Dumazet 			inet_csk_reqsk_queue_drop_and_put(sk, req);
16654bdc3d66SEric Dumazet 			goto lookup;
16664bdc3d66SEric Dumazet 		}
16673b24d854SEric Dumazet 		/* We own a reference on the listener, increase it again
16683b24d854SEric Dumazet 		 * as we might lose it too soon.
16693b24d854SEric Dumazet 		 */
16707716682cSEric Dumazet 		sock_hold(sk);
16713b24d854SEric Dumazet 		refcounted = true;
1672*1f3b359fSEric Dumazet 		nsk = NULL;
1673*1f3b359fSEric Dumazet 		if (!tcp_filter(sk, skb))
16747716682cSEric Dumazet 			nsk = tcp_check_req(sk, skb, req, false);
1675079096f1SEric Dumazet 		if (!nsk) {
1676079096f1SEric Dumazet 			reqsk_put(req);
16777716682cSEric Dumazet 			goto discard_and_relse;
1678079096f1SEric Dumazet 		}
1679079096f1SEric Dumazet 		if (nsk == sk) {
1680079096f1SEric Dumazet 			reqsk_put(req);
1681079096f1SEric Dumazet 		} else if (tcp_child_process(sk, nsk, skb)) {
1682079096f1SEric Dumazet 			tcp_v4_send_reset(nsk, skb);
16837716682cSEric Dumazet 			goto discard_and_relse;
1684079096f1SEric Dumazet 		} else {
16857716682cSEric Dumazet 			sock_put(sk);
1686079096f1SEric Dumazet 			return 0;
1687079096f1SEric Dumazet 		}
1688079096f1SEric Dumazet 	}
16896cce09f8SEric Dumazet 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
169002a1d6e7SEric Dumazet 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1691d218d111SStephen Hemminger 		goto discard_and_relse;
16926cce09f8SEric Dumazet 	}
1693d218d111SStephen Hemminger 
16941da177e4SLinus Torvalds 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
16951da177e4SLinus Torvalds 		goto discard_and_relse;
16969ea88a15SDmitry Popov 
16979ea88a15SDmitry Popov 	if (tcp_v4_inbound_md5_hash(sk, skb))
16989ea88a15SDmitry Popov 		goto discard_and_relse;
16999ea88a15SDmitry Popov 
1700b59c2701SPatrick McHardy 	nf_reset(skb);
17011da177e4SLinus Torvalds 
1702ac6e7800SEric Dumazet 	if (tcp_filter(sk, skb))
17031da177e4SLinus Torvalds 		goto discard_and_relse;
1704ac6e7800SEric Dumazet 	th = (const struct tcphdr *)skb->data;
1705ac6e7800SEric Dumazet 	iph = ip_hdr(skb);
17061da177e4SLinus Torvalds 
17071da177e4SLinus Torvalds 	skb->dev = NULL;
17081da177e4SLinus Torvalds 
1709e994b2f0SEric Dumazet 	if (sk->sk_state == TCP_LISTEN) {
1710e994b2f0SEric Dumazet 		ret = tcp_v4_do_rcv(sk, skb);
1711e994b2f0SEric Dumazet 		goto put_and_return;
1712e994b2f0SEric Dumazet 	}
1713e994b2f0SEric Dumazet 
1714e994b2f0SEric Dumazet 	sk_incoming_cpu_update(sk);
1715e994b2f0SEric Dumazet 
1716c6366184SIngo Molnar 	bh_lock_sock_nested(sk);
1717a44d6eacSMartin KaFai Lau 	tcp_segs_in(tcp_sk(sk), skb);
17181da177e4SLinus Torvalds 	ret = 0;
17191da177e4SLinus Torvalds 	if (!sock_owned_by_user(sk)) {
17201da177e4SLinus Torvalds 		ret = tcp_v4_do_rcv(sk, skb);
1721c9c33212SEric Dumazet 	} else if (tcp_add_backlog(sk, skb)) {
17226b03a53aSZhu Yi 		goto discard_and_relse;
17236b03a53aSZhu Yi 	}
17241da177e4SLinus Torvalds 	bh_unlock_sock(sk);
17251da177e4SLinus Torvalds 
1726e994b2f0SEric Dumazet put_and_return:
17273b24d854SEric Dumazet 	if (refcounted)
17281da177e4SLinus Torvalds 		sock_put(sk);
17291da177e4SLinus Torvalds 
17301da177e4SLinus Torvalds 	return ret;
17311da177e4SLinus Torvalds 
17321da177e4SLinus Torvalds no_tcp_socket:
17331da177e4SLinus Torvalds 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
17341da177e4SLinus Torvalds 		goto discard_it;
17351da177e4SLinus Torvalds 
173612e25e10SEric Dumazet 	if (tcp_checksum_complete(skb)) {
17376a5dc9e5SEric Dumazet csum_error:
173890bbcc60SEric Dumazet 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
17391da177e4SLinus Torvalds bad_packet:
174090bbcc60SEric Dumazet 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
17411da177e4SLinus Torvalds 	} else {
1742cfb6eeb4SYOSHIFUJI Hideaki 		tcp_v4_send_reset(NULL, skb);
17431da177e4SLinus Torvalds 	}
17441da177e4SLinus Torvalds 
17451da177e4SLinus Torvalds discard_it:
17461da177e4SLinus Torvalds 	/* Discard frame. */
17471da177e4SLinus Torvalds 	kfree_skb(skb);
17481da177e4SLinus Torvalds 	return 0;
17491da177e4SLinus Torvalds 
17501da177e4SLinus Torvalds discard_and_relse:
1751532182cdSEric Dumazet 	sk_drops_add(sk, skb);
17523b24d854SEric Dumazet 	if (refcounted)
17531da177e4SLinus Torvalds 		sock_put(sk);
17541da177e4SLinus Torvalds 	goto discard_it;
17551da177e4SLinus Torvalds 
17561da177e4SLinus Torvalds do_time_wait:
17571da177e4SLinus Torvalds 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
17589469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
17591da177e4SLinus Torvalds 		goto discard_it;
17601da177e4SLinus Torvalds 	}
17611da177e4SLinus Torvalds 
17626a5dc9e5SEric Dumazet 	if (tcp_checksum_complete(skb)) {
17636a5dc9e5SEric Dumazet 		inet_twsk_put(inet_twsk(sk));
17646a5dc9e5SEric Dumazet 		goto csum_error;
17651da177e4SLinus Torvalds 	}
17669469c7b4SYOSHIFUJI Hideaki 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
17671da177e4SLinus Torvalds 	case TCP_TW_SYN: {
1768c346dca1SYOSHIFUJI Hideaki 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1769a583636aSCraig Gallek 							&tcp_hashinfo, skb,
1770a583636aSCraig Gallek 							__tcp_hdrlen(th),
1771da5e3630STom Herbert 							iph->saddr, th->source,
1772eddc9ec5SArnaldo Carvalho de Melo 							iph->daddr, th->dest,
17733fa6f616SDavid Ahern 							inet_iif(skb),
17743fa6f616SDavid Ahern 							sdif);
17751da177e4SLinus Torvalds 		if (sk2) {
1776dbe7faa4SEric Dumazet 			inet_twsk_deschedule_put(inet_twsk(sk));
17771da177e4SLinus Torvalds 			sk = sk2;
17783b24d854SEric Dumazet 			refcounted = false;
17791da177e4SLinus Torvalds 			goto process;
17801da177e4SLinus Torvalds 		}
17811da177e4SLinus Torvalds 		/* Fall through to ACK */
17821da177e4SLinus Torvalds 	}
17831da177e4SLinus Torvalds 	case TCP_TW_ACK:
17841da177e4SLinus Torvalds 		tcp_v4_timewait_ack(sk, skb);
17851da177e4SLinus Torvalds 		break;
17861da177e4SLinus Torvalds 	case TCP_TW_RST:
1787271c3b9bSFlorian Westphal 		tcp_v4_send_reset(sk, skb);
1788271c3b9bSFlorian Westphal 		inet_twsk_deschedule_put(inet_twsk(sk));
1789271c3b9bSFlorian Westphal 		goto discard_it;
17901da177e4SLinus Torvalds 	case TCP_TW_SUCCESS:;
17911da177e4SLinus Torvalds 	}
17921da177e4SLinus Torvalds 	goto discard_it;
17931da177e4SLinus Torvalds }
17941da177e4SLinus Torvalds 
1795ccb7c410SDavid S. Miller static struct timewait_sock_ops tcp_timewait_sock_ops = {
1796ccb7c410SDavid S. Miller 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1797ccb7c410SDavid S. Miller 	.twsk_unique	= tcp_twsk_unique,
1798ccb7c410SDavid S. Miller 	.twsk_destructor= tcp_twsk_destructor,
1799ccb7c410SDavid S. Miller };
18001da177e4SLinus Torvalds 
180163d02d15SEric Dumazet void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
18025d299f3dSEric Dumazet {
18035d299f3dSEric Dumazet 	struct dst_entry *dst = skb_dst(skb);
18045d299f3dSEric Dumazet 
18055037e9efSEric Dumazet 	if (dst && dst_hold_safe(dst)) {
18065d299f3dSEric Dumazet 		sk->sk_rx_dst = dst;
18075d299f3dSEric Dumazet 		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
18085d299f3dSEric Dumazet 	}
1809ca777effSEric Dumazet }
181063d02d15SEric Dumazet EXPORT_SYMBOL(inet_sk_rx_dst_set);
18115d299f3dSEric Dumazet 
18123b401a81SStephen Hemminger const struct inet_connection_sock_af_ops ipv4_specific = {
18131da177e4SLinus Torvalds 	.queue_xmit	   = ip_queue_xmit,
18141da177e4SLinus Torvalds 	.send_check	   = tcp_v4_send_check,
181532519f11SArnaldo Carvalho de Melo 	.rebuild_header	   = inet_sk_rebuild_header,
18165d299f3dSEric Dumazet 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
18171da177e4SLinus Torvalds 	.conn_request	   = tcp_v4_conn_request,
18181da177e4SLinus Torvalds 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
18191da177e4SLinus Torvalds 	.net_header_len	   = sizeof(struct iphdr),
18201da177e4SLinus Torvalds 	.setsockopt	   = ip_setsockopt,
18211da177e4SLinus Torvalds 	.getsockopt	   = ip_getsockopt,
1822543d9cfeSArnaldo Carvalho de Melo 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1823543d9cfeSArnaldo Carvalho de Melo 	.sockaddr_len	   = sizeof(struct sockaddr_in),
18243fdadf7dSDmitry Mishin #ifdef CONFIG_COMPAT
18253fdadf7dSDmitry Mishin 	.compat_setsockopt = compat_ip_setsockopt,
18263fdadf7dSDmitry Mishin 	.compat_getsockopt = compat_ip_getsockopt,
18273fdadf7dSDmitry Mishin #endif
18284fab9071SNeal Cardwell 	.mtu_reduced	   = tcp_v4_mtu_reduced,
18291da177e4SLinus Torvalds };
18304bc2f18bSEric Dumazet EXPORT_SYMBOL(ipv4_specific);
18311da177e4SLinus Torvalds 
1832cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1833b2e4b3deSStephen Hemminger static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1834cfb6eeb4SYOSHIFUJI Hideaki 	.md5_lookup		= tcp_v4_md5_lookup,
183549a72dfbSAdam Langley 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
1836cfb6eeb4SYOSHIFUJI Hideaki 	.md5_parse		= tcp_v4_parse_md5_keys,
1837cfb6eeb4SYOSHIFUJI Hideaki };
1838b6332e6cSAndrew Morton #endif
1839cfb6eeb4SYOSHIFUJI Hideaki 
18401da177e4SLinus Torvalds /* NOTE: A lot of things set to zero explicitly by call to
18411da177e4SLinus Torvalds  *       sk_alloc() so need not be done here.
18421da177e4SLinus Torvalds  */
18431da177e4SLinus Torvalds static int tcp_v4_init_sock(struct sock *sk)
18441da177e4SLinus Torvalds {
18456687e988SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
18461da177e4SLinus Torvalds 
1847900f65d3SNeal Cardwell 	tcp_init_sock(sk);
18481da177e4SLinus Torvalds 
18498292a17aSArnaldo Carvalho de Melo 	icsk->icsk_af_ops = &ipv4_specific;
1850900f65d3SNeal Cardwell 
1851cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1852ac807fa8SDavid S. Miller 	tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1853cfb6eeb4SYOSHIFUJI Hideaki #endif
18541da177e4SLinus Torvalds 
18551da177e4SLinus Torvalds 	return 0;
18561da177e4SLinus Torvalds }
18571da177e4SLinus Torvalds 
18587d06b2e0SBrian Haley void tcp_v4_destroy_sock(struct sock *sk)
18591da177e4SLinus Torvalds {
18601da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
18611da177e4SLinus Torvalds 
18621da177e4SLinus Torvalds 	tcp_clear_xmit_timers(sk);
18631da177e4SLinus Torvalds 
18646687e988SArnaldo Carvalho de Melo 	tcp_cleanup_congestion_control(sk);
1865317a76f9SStephen Hemminger 
1866734942ccSDave Watson 	tcp_cleanup_ulp(sk);
1867734942ccSDave Watson 
18681da177e4SLinus Torvalds 	/* Cleanup up the write buffer. */
1869fe067e8aSDavid S. Miller 	tcp_write_queue_purge(sk);
18701da177e4SLinus Torvalds 
1871cf1ef3f0SWei Wang 	/* Check if we want to disable active TFO */
1872cf1ef3f0SWei Wang 	tcp_fastopen_active_disable_ofo_check(sk);
1873cf1ef3f0SWei Wang 
18741da177e4SLinus Torvalds 	/* Cleans up our, hopefully empty, out_of_order_queue. */
18759f5afeaeSYaogong Wang 	skb_rbtree_purge(&tp->out_of_order_queue);
18761da177e4SLinus Torvalds 
1877cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1878cfb6eeb4SYOSHIFUJI Hideaki 	/* Clean up the MD5 key list, if any */
1879cfb6eeb4SYOSHIFUJI Hideaki 	if (tp->md5sig_info) {
1880a915da9bSEric Dumazet 		tcp_clear_md5_list(sk);
1881a8afca03SEric Dumazet 		kfree_rcu(tp->md5sig_info, rcu);
1882cfb6eeb4SYOSHIFUJI Hideaki 		tp->md5sig_info = NULL;
1883cfb6eeb4SYOSHIFUJI Hideaki 	}
1884cfb6eeb4SYOSHIFUJI Hideaki #endif
1885cfb6eeb4SYOSHIFUJI Hideaki 
18861da177e4SLinus Torvalds 	/* Clean up a referenced TCP bind bucket. */
1887463c84b9SArnaldo Carvalho de Melo 	if (inet_csk(sk)->icsk_bind_hash)
1888ab1e0a13SArnaldo Carvalho de Melo 		inet_put_port(sk);
18891da177e4SLinus Torvalds 
189000db4124SIan Morris 	BUG_ON(tp->fastopen_rsk);
1891435cf559SWilliam Allen Simpson 
1892cf60af03SYuchung Cheng 	/* If socket is aborted during connect operation */
1893cf60af03SYuchung Cheng 	tcp_free_fastopen_req(tp);
1894cd8ae852SEric Dumazet 	tcp_saved_syn_free(tp);
1895cf60af03SYuchung Cheng 
1896180d8cd9SGlauber Costa 	sk_sockets_allocated_dec(sk);
18971da177e4SLinus Torvalds }
18981da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_destroy_sock);
18991da177e4SLinus Torvalds 
19001da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
19011da177e4SLinus Torvalds /* Proc filesystem TCP sock list dumping. */
19021da177e4SLinus Torvalds 
1903a8b690f9STom Herbert /*
1904a8b690f9STom Herbert  * Get next listener socket follow cur.  If cur is NULL, get first socket
1905a8b690f9STom Herbert  * starting from bucket given in st->bucket; when st->bucket is zero the
1906a8b690f9STom Herbert  * very first socket in the hash table is returned.
1907a8b690f9STom Herbert  */
19081da177e4SLinus Torvalds static void *listening_get_next(struct seq_file *seq, void *cur)
19091da177e4SLinus Torvalds {
19101da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
1911a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
19123b24d854SEric Dumazet 	struct inet_listen_hashbucket *ilb;
19133b24d854SEric Dumazet 	struct sock *sk = cur;
19141da177e4SLinus Torvalds 
19151da177e4SLinus Torvalds 	if (!sk) {
19163b24d854SEric Dumazet get_head:
1917a8b690f9STom Herbert 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
19189652dc2eSEric Dumazet 		spin_lock(&ilb->lock);
19193b24d854SEric Dumazet 		sk = sk_head(&ilb->head);
1920a8b690f9STom Herbert 		st->offset = 0;
19211da177e4SLinus Torvalds 		goto get_sk;
19221da177e4SLinus Torvalds 	}
19235caea4eaSEric Dumazet 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
19241da177e4SLinus Torvalds 	++st->num;
1925a8b690f9STom Herbert 	++st->offset;
19261da177e4SLinus Torvalds 
19273b24d854SEric Dumazet 	sk = sk_next(sk);
19281da177e4SLinus Torvalds get_sk:
19293b24d854SEric Dumazet 	sk_for_each_from(sk) {
19308475ef9fSPavel Emelyanov 		if (!net_eq(sock_net(sk), net))
19318475ef9fSPavel Emelyanov 			continue;
19323b24d854SEric Dumazet 		if (sk->sk_family == st->family)
19333b24d854SEric Dumazet 			return sk;
19341da177e4SLinus Torvalds 	}
19359652dc2eSEric Dumazet 	spin_unlock(&ilb->lock);
1936a8b690f9STom Herbert 	st->offset = 0;
19373b24d854SEric Dumazet 	if (++st->bucket < INET_LHTABLE_SIZE)
19383b24d854SEric Dumazet 		goto get_head;
19393b24d854SEric Dumazet 	return NULL;
19401da177e4SLinus Torvalds }
19411da177e4SLinus Torvalds 
19421da177e4SLinus Torvalds static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
19431da177e4SLinus Torvalds {
1944a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
1945a8b690f9STom Herbert 	void *rc;
1946a8b690f9STom Herbert 
1947a8b690f9STom Herbert 	st->bucket = 0;
1948a8b690f9STom Herbert 	st->offset = 0;
1949a8b690f9STom Herbert 	rc = listening_get_next(seq, NULL);
19501da177e4SLinus Torvalds 
19511da177e4SLinus Torvalds 	while (rc && *pos) {
19521da177e4SLinus Torvalds 		rc = listening_get_next(seq, rc);
19531da177e4SLinus Torvalds 		--*pos;
19541da177e4SLinus Torvalds 	}
19551da177e4SLinus Torvalds 	return rc;
19561da177e4SLinus Torvalds }
19571da177e4SLinus Torvalds 
195805dbc7b5SEric Dumazet static inline bool empty_bucket(const struct tcp_iter_state *st)
19596eac5604SAndi Kleen {
196005dbc7b5SEric Dumazet 	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
19616eac5604SAndi Kleen }
19626eac5604SAndi Kleen 
1963a8b690f9STom Herbert /*
1964a8b690f9STom Herbert  * Get first established socket starting from bucket given in st->bucket.
1965a8b690f9STom Herbert  * If st->bucket is zero, the very first socket in the hash is returned.
1966a8b690f9STom Herbert  */
19671da177e4SLinus Torvalds static void *established_get_first(struct seq_file *seq)
19681da177e4SLinus Torvalds {
19691da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
1970a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
19711da177e4SLinus Torvalds 	void *rc = NULL;
19721da177e4SLinus Torvalds 
1973a8b690f9STom Herbert 	st->offset = 0;
1974a8b690f9STom Herbert 	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
19751da177e4SLinus Torvalds 		struct sock *sk;
19763ab5aee7SEric Dumazet 		struct hlist_nulls_node *node;
19779db66bdcSEric Dumazet 		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
19781da177e4SLinus Torvalds 
19796eac5604SAndi Kleen 		/* Lockless fast path for the common case of empty buckets */
19806eac5604SAndi Kleen 		if (empty_bucket(st))
19816eac5604SAndi Kleen 			continue;
19826eac5604SAndi Kleen 
19839db66bdcSEric Dumazet 		spin_lock_bh(lock);
19843ab5aee7SEric Dumazet 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
1985f40c8174SDaniel Lezcano 			if (sk->sk_family != st->family ||
1986878628fbSYOSHIFUJI Hideaki 			    !net_eq(sock_net(sk), net)) {
19871da177e4SLinus Torvalds 				continue;
19881da177e4SLinus Torvalds 			}
19891da177e4SLinus Torvalds 			rc = sk;
19901da177e4SLinus Torvalds 			goto out;
19911da177e4SLinus Torvalds 		}
19929db66bdcSEric Dumazet 		spin_unlock_bh(lock);
19931da177e4SLinus Torvalds 	}
19941da177e4SLinus Torvalds out:
19951da177e4SLinus Torvalds 	return rc;
19961da177e4SLinus Torvalds }
19971da177e4SLinus Torvalds 
19981da177e4SLinus Torvalds static void *established_get_next(struct seq_file *seq, void *cur)
19991da177e4SLinus Torvalds {
20001da177e4SLinus Torvalds 	struct sock *sk = cur;
20013ab5aee7SEric Dumazet 	struct hlist_nulls_node *node;
20021da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
2003a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
20041da177e4SLinus Torvalds 
20051da177e4SLinus Torvalds 	++st->num;
2006a8b690f9STom Herbert 	++st->offset;
20071da177e4SLinus Torvalds 
20083ab5aee7SEric Dumazet 	sk = sk_nulls_next(sk);
20091da177e4SLinus Torvalds 
20103ab5aee7SEric Dumazet 	sk_nulls_for_each_from(sk, node) {
2011878628fbSYOSHIFUJI Hideaki 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
201205dbc7b5SEric Dumazet 			return sk;
20131da177e4SLinus Torvalds 	}
20141da177e4SLinus Torvalds 
201505dbc7b5SEric Dumazet 	spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
201605dbc7b5SEric Dumazet 	++st->bucket;
201705dbc7b5SEric Dumazet 	return established_get_first(seq);
20181da177e4SLinus Torvalds }
20191da177e4SLinus Torvalds 
20201da177e4SLinus Torvalds static void *established_get_idx(struct seq_file *seq, loff_t pos)
20211da177e4SLinus Torvalds {
2022a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
2023a8b690f9STom Herbert 	void *rc;
2024a8b690f9STom Herbert 
2025a8b690f9STom Herbert 	st->bucket = 0;
2026a8b690f9STom Herbert 	rc = established_get_first(seq);
20271da177e4SLinus Torvalds 
20281da177e4SLinus Torvalds 	while (rc && pos) {
20291da177e4SLinus Torvalds 		rc = established_get_next(seq, rc);
20301da177e4SLinus Torvalds 		--pos;
20311da177e4SLinus Torvalds 	}
20321da177e4SLinus Torvalds 	return rc;
20331da177e4SLinus Torvalds }
20341da177e4SLinus Torvalds 
20351da177e4SLinus Torvalds static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
20361da177e4SLinus Torvalds {
20371da177e4SLinus Torvalds 	void *rc;
20381da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
20391da177e4SLinus Torvalds 
20401da177e4SLinus Torvalds 	st->state = TCP_SEQ_STATE_LISTENING;
20411da177e4SLinus Torvalds 	rc	  = listening_get_idx(seq, &pos);
20421da177e4SLinus Torvalds 
20431da177e4SLinus Torvalds 	if (!rc) {
20441da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_ESTABLISHED;
20451da177e4SLinus Torvalds 		rc	  = established_get_idx(seq, pos);
20461da177e4SLinus Torvalds 	}
20471da177e4SLinus Torvalds 
20481da177e4SLinus Torvalds 	return rc;
20491da177e4SLinus Torvalds }
20501da177e4SLinus Torvalds 
2051a8b690f9STom Herbert static void *tcp_seek_last_pos(struct seq_file *seq)
2052a8b690f9STom Herbert {
2053a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
2054a8b690f9STom Herbert 	int offset = st->offset;
2055a8b690f9STom Herbert 	int orig_num = st->num;
2056a8b690f9STom Herbert 	void *rc = NULL;
2057a8b690f9STom Herbert 
2058a8b690f9STom Herbert 	switch (st->state) {
2059a8b690f9STom Herbert 	case TCP_SEQ_STATE_LISTENING:
2060a8b690f9STom Herbert 		if (st->bucket >= INET_LHTABLE_SIZE)
2061a8b690f9STom Herbert 			break;
2062a8b690f9STom Herbert 		st->state = TCP_SEQ_STATE_LISTENING;
2063a8b690f9STom Herbert 		rc = listening_get_next(seq, NULL);
2064a8b690f9STom Herbert 		while (offset-- && rc)
2065a8b690f9STom Herbert 			rc = listening_get_next(seq, rc);
2066a8b690f9STom Herbert 		if (rc)
2067a8b690f9STom Herbert 			break;
2068a8b690f9STom Herbert 		st->bucket = 0;
206905dbc7b5SEric Dumazet 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2070a8b690f9STom Herbert 		/* Fallthrough */
2071a8b690f9STom Herbert 	case TCP_SEQ_STATE_ESTABLISHED:
2072a8b690f9STom Herbert 		if (st->bucket > tcp_hashinfo.ehash_mask)
2073a8b690f9STom Herbert 			break;
2074a8b690f9STom Herbert 		rc = established_get_first(seq);
2075a8b690f9STom Herbert 		while (offset-- && rc)
2076a8b690f9STom Herbert 			rc = established_get_next(seq, rc);
2077a8b690f9STom Herbert 	}
2078a8b690f9STom Herbert 
2079a8b690f9STom Herbert 	st->num = orig_num;
2080a8b690f9STom Herbert 
2081a8b690f9STom Herbert 	return rc;
2082a8b690f9STom Herbert }
2083a8b690f9STom Herbert 
20841da177e4SLinus Torvalds static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
20851da177e4SLinus Torvalds {
20861da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
2087a8b690f9STom Herbert 	void *rc;
2088a8b690f9STom Herbert 
2089a8b690f9STom Herbert 	if (*pos && *pos == st->last_pos) {
2090a8b690f9STom Herbert 		rc = tcp_seek_last_pos(seq);
2091a8b690f9STom Herbert 		if (rc)
2092a8b690f9STom Herbert 			goto out;
2093a8b690f9STom Herbert 	}
2094a8b690f9STom Herbert 
20951da177e4SLinus Torvalds 	st->state = TCP_SEQ_STATE_LISTENING;
20961da177e4SLinus Torvalds 	st->num = 0;
2097a8b690f9STom Herbert 	st->bucket = 0;
2098a8b690f9STom Herbert 	st->offset = 0;
2099a8b690f9STom Herbert 	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2100a8b690f9STom Herbert 
2101a8b690f9STom Herbert out:
2102a8b690f9STom Herbert 	st->last_pos = *pos;
2103a8b690f9STom Herbert 	return rc;
21041da177e4SLinus Torvalds }
21051da177e4SLinus Torvalds 
21061da177e4SLinus Torvalds static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
21071da177e4SLinus Torvalds {
2108a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
21091da177e4SLinus Torvalds 	void *rc = NULL;
21101da177e4SLinus Torvalds 
21111da177e4SLinus Torvalds 	if (v == SEQ_START_TOKEN) {
21121da177e4SLinus Torvalds 		rc = tcp_get_idx(seq, 0);
21131da177e4SLinus Torvalds 		goto out;
21141da177e4SLinus Torvalds 	}
21151da177e4SLinus Torvalds 
21161da177e4SLinus Torvalds 	switch (st->state) {
21171da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
21181da177e4SLinus Torvalds 		rc = listening_get_next(seq, v);
21191da177e4SLinus Torvalds 		if (!rc) {
21201da177e4SLinus Torvalds 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2121a8b690f9STom Herbert 			st->bucket = 0;
2122a8b690f9STom Herbert 			st->offset = 0;
21231da177e4SLinus Torvalds 			rc	  = established_get_first(seq);
21241da177e4SLinus Torvalds 		}
21251da177e4SLinus Torvalds 		break;
21261da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
21271da177e4SLinus Torvalds 		rc = established_get_next(seq, v);
21281da177e4SLinus Torvalds 		break;
21291da177e4SLinus Torvalds 	}
21301da177e4SLinus Torvalds out:
21311da177e4SLinus Torvalds 	++*pos;
2132a8b690f9STom Herbert 	st->last_pos = *pos;
21331da177e4SLinus Torvalds 	return rc;
21341da177e4SLinus Torvalds }
21351da177e4SLinus Torvalds 
21361da177e4SLinus Torvalds static void tcp_seq_stop(struct seq_file *seq, void *v)
21371da177e4SLinus Torvalds {
21381da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
21391da177e4SLinus Torvalds 
21401da177e4SLinus Torvalds 	switch (st->state) {
21411da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
21421da177e4SLinus Torvalds 		if (v != SEQ_START_TOKEN)
21439652dc2eSEric Dumazet 			spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
21441da177e4SLinus Torvalds 		break;
21451da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
21461da177e4SLinus Torvalds 		if (v)
21479db66bdcSEric Dumazet 			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
21481da177e4SLinus Torvalds 		break;
21491da177e4SLinus Torvalds 	}
21501da177e4SLinus Torvalds }
21511da177e4SLinus Torvalds 
215273cb88ecSArjan van de Ven int tcp_seq_open(struct inode *inode, struct file *file)
21531da177e4SLinus Torvalds {
2154d9dda78bSAl Viro 	struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
21551da177e4SLinus Torvalds 	struct tcp_iter_state *s;
215652d6f3f1SDenis V. Lunev 	int err;
21571da177e4SLinus Torvalds 
215852d6f3f1SDenis V. Lunev 	err = seq_open_net(inode, file, &afinfo->seq_ops,
215952d6f3f1SDenis V. Lunev 			  sizeof(struct tcp_iter_state));
216052d6f3f1SDenis V. Lunev 	if (err < 0)
216152d6f3f1SDenis V. Lunev 		return err;
2162f40c8174SDaniel Lezcano 
216352d6f3f1SDenis V. Lunev 	s = ((struct seq_file *)file->private_data)->private;
21641da177e4SLinus Torvalds 	s->family		= afinfo->family;
2165a8b690f9STom Herbert 	s->last_pos		= 0;
2166f40c8174SDaniel Lezcano 	return 0;
2167f40c8174SDaniel Lezcano }
216873cb88ecSArjan van de Ven EXPORT_SYMBOL(tcp_seq_open);
2169f40c8174SDaniel Lezcano 
21706f8b13bcSDaniel Lezcano int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
21711da177e4SLinus Torvalds {
21721da177e4SLinus Torvalds 	int rc = 0;
21731da177e4SLinus Torvalds 	struct proc_dir_entry *p;
21741da177e4SLinus Torvalds 
21759427c4b3SDenis V. Lunev 	afinfo->seq_ops.start		= tcp_seq_start;
21769427c4b3SDenis V. Lunev 	afinfo->seq_ops.next		= tcp_seq_next;
21779427c4b3SDenis V. Lunev 	afinfo->seq_ops.stop		= tcp_seq_stop;
21789427c4b3SDenis V. Lunev 
217984841c3cSDenis V. Lunev 	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
218073cb88ecSArjan van de Ven 			     afinfo->seq_fops, afinfo);
218184841c3cSDenis V. Lunev 	if (!p)
21821da177e4SLinus Torvalds 		rc = -ENOMEM;
21831da177e4SLinus Torvalds 	return rc;
21841da177e4SLinus Torvalds }
21854bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_register);
21861da177e4SLinus Torvalds 
21876f8b13bcSDaniel Lezcano void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
21881da177e4SLinus Torvalds {
2189ece31ffdSGao feng 	remove_proc_entry(afinfo->name, net->proc_net);
21901da177e4SLinus Torvalds }
21914bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_unregister);
21921da177e4SLinus Torvalds 
2193d4f06873SEric Dumazet static void get_openreq4(const struct request_sock *req,
2194aa3a0c8cSEric Dumazet 			 struct seq_file *f, int i)
21951da177e4SLinus Torvalds {
21962e6599cbSArnaldo Carvalho de Melo 	const struct inet_request_sock *ireq = inet_rsk(req);
2197fa76ce73SEric Dumazet 	long delta = req->rsk_timer.expires - jiffies;
21981da177e4SLinus Torvalds 
21995e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2200652586dfSTetsuo Handa 		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
22011da177e4SLinus Torvalds 		i,
2202634fb979SEric Dumazet 		ireq->ir_loc_addr,
2203d4f06873SEric Dumazet 		ireq->ir_num,
2204634fb979SEric Dumazet 		ireq->ir_rmt_addr,
2205634fb979SEric Dumazet 		ntohs(ireq->ir_rmt_port),
22061da177e4SLinus Torvalds 		TCP_SYN_RECV,
22071da177e4SLinus Torvalds 		0, 0, /* could print option size, but that is af dependent. */
22081da177e4SLinus Torvalds 		1,    /* timers active (only the expire timer) */
2209a399a805SEric Dumazet 		jiffies_delta_to_clock_t(delta),
2210e6c022a4SEric Dumazet 		req->num_timeout,
2211aa3a0c8cSEric Dumazet 		from_kuid_munged(seq_user_ns(f),
2212aa3a0c8cSEric Dumazet 				 sock_i_uid(req->rsk_listener)),
22131da177e4SLinus Torvalds 		0,  /* non standard timer */
22141da177e4SLinus Torvalds 		0, /* open_requests have no inode */
2215d4f06873SEric Dumazet 		0,
2216652586dfSTetsuo Handa 		req);
22171da177e4SLinus Torvalds }
22181da177e4SLinus Torvalds 
2219652586dfSTetsuo Handa static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
22201da177e4SLinus Torvalds {
22211da177e4SLinus Torvalds 	int timer_active;
22221da177e4SLinus Torvalds 	unsigned long timer_expires;
2223cf533ea5SEric Dumazet 	const struct tcp_sock *tp = tcp_sk(sk);
2224cf4c6bf8SIlpo Järvinen 	const struct inet_connection_sock *icsk = inet_csk(sk);
2225cf533ea5SEric Dumazet 	const struct inet_sock *inet = inet_sk(sk);
22260536fcc0SEric Dumazet 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2227c720c7e8SEric Dumazet 	__be32 dest = inet->inet_daddr;
2228c720c7e8SEric Dumazet 	__be32 src = inet->inet_rcv_saddr;
2229c720c7e8SEric Dumazet 	__u16 destp = ntohs(inet->inet_dport);
2230c720c7e8SEric Dumazet 	__u16 srcp = ntohs(inet->inet_sport);
223149d09007SEric Dumazet 	int rx_queue;
223200fd38d9SEric Dumazet 	int state;
22331da177e4SLinus Torvalds 
22346ba8a3b1SNandita Dukkipati 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
223557dde7f7SYuchung Cheng 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
22366ba8a3b1SNandita Dukkipati 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
22371da177e4SLinus Torvalds 		timer_active	= 1;
2238463c84b9SArnaldo Carvalho de Melo 		timer_expires	= icsk->icsk_timeout;
2239463c84b9SArnaldo Carvalho de Melo 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
22401da177e4SLinus Torvalds 		timer_active	= 4;
2241463c84b9SArnaldo Carvalho de Melo 		timer_expires	= icsk->icsk_timeout;
2242cf4c6bf8SIlpo Järvinen 	} else if (timer_pending(&sk->sk_timer)) {
22431da177e4SLinus Torvalds 		timer_active	= 2;
2244cf4c6bf8SIlpo Järvinen 		timer_expires	= sk->sk_timer.expires;
22451da177e4SLinus Torvalds 	} else {
22461da177e4SLinus Torvalds 		timer_active	= 0;
22471da177e4SLinus Torvalds 		timer_expires = jiffies;
22481da177e4SLinus Torvalds 	}
22491da177e4SLinus Torvalds 
225000fd38d9SEric Dumazet 	state = sk_state_load(sk);
225100fd38d9SEric Dumazet 	if (state == TCP_LISTEN)
225249d09007SEric Dumazet 		rx_queue = sk->sk_ack_backlog;
225349d09007SEric Dumazet 	else
225400fd38d9SEric Dumazet 		/* Because we don't lock the socket,
225500fd38d9SEric Dumazet 		 * we might find a transient negative value.
225649d09007SEric Dumazet 		 */
225749d09007SEric Dumazet 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
225849d09007SEric Dumazet 
22595e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2260652586dfSTetsuo Handa 			"%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
226100fd38d9SEric Dumazet 		i, src, srcp, dest, destp, state,
226247da8ee6SSridhar Samudrala 		tp->write_seq - tp->snd_una,
226349d09007SEric Dumazet 		rx_queue,
22641da177e4SLinus Torvalds 		timer_active,
2265a399a805SEric Dumazet 		jiffies_delta_to_clock_t(timer_expires - jiffies),
2266463c84b9SArnaldo Carvalho de Melo 		icsk->icsk_retransmits,
2267a7cb5a49SEric W. Biederman 		from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
22686687e988SArnaldo Carvalho de Melo 		icsk->icsk_probes_out,
2269cf4c6bf8SIlpo Järvinen 		sock_i_ino(sk),
227041c6d650SReshetova, Elena 		refcount_read(&sk->sk_refcnt), sk,
22717be87351SStephen Hemminger 		jiffies_to_clock_t(icsk->icsk_rto),
22727be87351SStephen Hemminger 		jiffies_to_clock_t(icsk->icsk_ack.ato),
2273463c84b9SArnaldo Carvalho de Melo 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
22741da177e4SLinus Torvalds 		tp->snd_cwnd,
227500fd38d9SEric Dumazet 		state == TCP_LISTEN ?
227600fd38d9SEric Dumazet 		    fastopenq->max_qlen :
2277652586dfSTetsuo Handa 		    (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
22781da177e4SLinus Torvalds }
22791da177e4SLinus Torvalds 
2280cf533ea5SEric Dumazet static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2281652586dfSTetsuo Handa 			       struct seq_file *f, int i)
22821da177e4SLinus Torvalds {
2283789f558cSEric Dumazet 	long delta = tw->tw_timer.expires - jiffies;
228423f33c2dSAl Viro 	__be32 dest, src;
22851da177e4SLinus Torvalds 	__u16 destp, srcp;
22861da177e4SLinus Torvalds 
22871da177e4SLinus Torvalds 	dest  = tw->tw_daddr;
22881da177e4SLinus Torvalds 	src   = tw->tw_rcv_saddr;
22891da177e4SLinus Torvalds 	destp = ntohs(tw->tw_dport);
22901da177e4SLinus Torvalds 	srcp  = ntohs(tw->tw_sport);
22911da177e4SLinus Torvalds 
22925e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2293652586dfSTetsuo Handa 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
22941da177e4SLinus Torvalds 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2295a399a805SEric Dumazet 		3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
229641c6d650SReshetova, Elena 		refcount_read(&tw->tw_refcnt), tw);
22971da177e4SLinus Torvalds }
22981da177e4SLinus Torvalds 
22991da177e4SLinus Torvalds #define TMPSZ 150
23001da177e4SLinus Torvalds 
23011da177e4SLinus Torvalds static int tcp4_seq_show(struct seq_file *seq, void *v)
23021da177e4SLinus Torvalds {
23031da177e4SLinus Torvalds 	struct tcp_iter_state *st;
230405dbc7b5SEric Dumazet 	struct sock *sk = v;
23051da177e4SLinus Torvalds 
2306652586dfSTetsuo Handa 	seq_setwidth(seq, TMPSZ - 1);
23071da177e4SLinus Torvalds 	if (v == SEQ_START_TOKEN) {
2308652586dfSTetsuo Handa 		seq_puts(seq, "  sl  local_address rem_address   st tx_queue "
23091da177e4SLinus Torvalds 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
23101da177e4SLinus Torvalds 			   "inode");
23111da177e4SLinus Torvalds 		goto out;
23121da177e4SLinus Torvalds 	}
23131da177e4SLinus Torvalds 	st = seq->private;
23141da177e4SLinus Torvalds 
231505dbc7b5SEric Dumazet 	if (sk->sk_state == TCP_TIME_WAIT)
2316652586dfSTetsuo Handa 		get_timewait4_sock(v, seq, st->num);
2317079096f1SEric Dumazet 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2318079096f1SEric Dumazet 		get_openreq4(v, seq, st->num);
231905dbc7b5SEric Dumazet 	else
2320652586dfSTetsuo Handa 		get_tcp4_sock(v, seq, st->num);
23211da177e4SLinus Torvalds out:
2322652586dfSTetsuo Handa 	seq_pad(seq, '\n');
23231da177e4SLinus Torvalds 	return 0;
23241da177e4SLinus Torvalds }
23251da177e4SLinus Torvalds 
232673cb88ecSArjan van de Ven static const struct file_operations tcp_afinfo_seq_fops = {
232773cb88ecSArjan van de Ven 	.owner   = THIS_MODULE,
232873cb88ecSArjan van de Ven 	.open    = tcp_seq_open,
232973cb88ecSArjan van de Ven 	.read    = seq_read,
233073cb88ecSArjan van de Ven 	.llseek  = seq_lseek,
233173cb88ecSArjan van de Ven 	.release = seq_release_net
233273cb88ecSArjan van de Ven };
233373cb88ecSArjan van de Ven 
23341da177e4SLinus Torvalds static struct tcp_seq_afinfo tcp4_seq_afinfo = {
23351da177e4SLinus Torvalds 	.name		= "tcp",
23361da177e4SLinus Torvalds 	.family		= AF_INET,
233773cb88ecSArjan van de Ven 	.seq_fops	= &tcp_afinfo_seq_fops,
23389427c4b3SDenis V. Lunev 	.seq_ops	= {
23399427c4b3SDenis V. Lunev 		.show		= tcp4_seq_show,
23409427c4b3SDenis V. Lunev 	},
23411da177e4SLinus Torvalds };
23421da177e4SLinus Torvalds 
23432c8c1e72SAlexey Dobriyan static int __net_init tcp4_proc_init_net(struct net *net)
2344757764f6SPavel Emelyanov {
2345757764f6SPavel Emelyanov 	return tcp_proc_register(net, &tcp4_seq_afinfo);
2346757764f6SPavel Emelyanov }
2347757764f6SPavel Emelyanov 
23482c8c1e72SAlexey Dobriyan static void __net_exit tcp4_proc_exit_net(struct net *net)
2349757764f6SPavel Emelyanov {
2350757764f6SPavel Emelyanov 	tcp_proc_unregister(net, &tcp4_seq_afinfo);
2351757764f6SPavel Emelyanov }
2352757764f6SPavel Emelyanov 
2353757764f6SPavel Emelyanov static struct pernet_operations tcp4_net_ops = {
2354757764f6SPavel Emelyanov 	.init = tcp4_proc_init_net,
2355757764f6SPavel Emelyanov 	.exit = tcp4_proc_exit_net,
2356757764f6SPavel Emelyanov };
2357757764f6SPavel Emelyanov 
23581da177e4SLinus Torvalds int __init tcp4_proc_init(void)
23591da177e4SLinus Torvalds {
2360757764f6SPavel Emelyanov 	return register_pernet_subsys(&tcp4_net_ops);
23611da177e4SLinus Torvalds }
23621da177e4SLinus Torvalds 
23631da177e4SLinus Torvalds void tcp4_proc_exit(void)
23641da177e4SLinus Torvalds {
2365757764f6SPavel Emelyanov 	unregister_pernet_subsys(&tcp4_net_ops);
23661da177e4SLinus Torvalds }
23671da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */
23681da177e4SLinus Torvalds 
23691da177e4SLinus Torvalds struct proto tcp_prot = {
23701da177e4SLinus Torvalds 	.name			= "TCP",
23711da177e4SLinus Torvalds 	.owner			= THIS_MODULE,
23721da177e4SLinus Torvalds 	.close			= tcp_close,
23731da177e4SLinus Torvalds 	.connect		= tcp_v4_connect,
23741da177e4SLinus Torvalds 	.disconnect		= tcp_disconnect,
2375463c84b9SArnaldo Carvalho de Melo 	.accept			= inet_csk_accept,
23761da177e4SLinus Torvalds 	.ioctl			= tcp_ioctl,
23771da177e4SLinus Torvalds 	.init			= tcp_v4_init_sock,
23781da177e4SLinus Torvalds 	.destroy		= tcp_v4_destroy_sock,
23791da177e4SLinus Torvalds 	.shutdown		= tcp_shutdown,
23801da177e4SLinus Torvalds 	.setsockopt		= tcp_setsockopt,
23811da177e4SLinus Torvalds 	.getsockopt		= tcp_getsockopt,
23824b9d07a4SUrsula Braun 	.keepalive		= tcp_set_keepalive,
23831da177e4SLinus Torvalds 	.recvmsg		= tcp_recvmsg,
23847ba42910SChangli Gao 	.sendmsg		= tcp_sendmsg,
23857ba42910SChangli Gao 	.sendpage		= tcp_sendpage,
23861da177e4SLinus Torvalds 	.backlog_rcv		= tcp_v4_do_rcv,
238746d3ceabSEric Dumazet 	.release_cb		= tcp_release_cb,
2388ab1e0a13SArnaldo Carvalho de Melo 	.hash			= inet_hash,
2389ab1e0a13SArnaldo Carvalho de Melo 	.unhash			= inet_unhash,
2390ab1e0a13SArnaldo Carvalho de Melo 	.get_port		= inet_csk_get_port,
23911da177e4SLinus Torvalds 	.enter_memory_pressure	= tcp_enter_memory_pressure,
239206044751SEric Dumazet 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2393c9bee3b7SEric Dumazet 	.stream_memory_free	= tcp_stream_memory_free,
23941da177e4SLinus Torvalds 	.sockets_allocated	= &tcp_sockets_allocated,
23950a5578cfSArnaldo Carvalho de Melo 	.orphan_count		= &tcp_orphan_count,
23961da177e4SLinus Torvalds 	.memory_allocated	= &tcp_memory_allocated,
23971da177e4SLinus Torvalds 	.memory_pressure	= &tcp_memory_pressure,
2398a4fe34bfSEric W. Biederman 	.sysctl_mem		= sysctl_tcp_mem,
23991da177e4SLinus Torvalds 	.sysctl_wmem		= sysctl_tcp_wmem,
24001da177e4SLinus Torvalds 	.sysctl_rmem		= sysctl_tcp_rmem,
24011da177e4SLinus Torvalds 	.max_header		= MAX_TCP_HEADER,
24021da177e4SLinus Torvalds 	.obj_size		= sizeof(struct tcp_sock),
24035f0d5a3aSPaul E. McKenney 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
24046d6ee43eSArnaldo Carvalho de Melo 	.twsk_prot		= &tcp_timewait_sock_ops,
240560236fddSArnaldo Carvalho de Melo 	.rsk_prot		= &tcp_request_sock_ops,
240639d8cda7SPavel Emelyanov 	.h.hashinfo		= &tcp_hashinfo,
24077ba42910SChangli Gao 	.no_autobind		= true,
2408543d9cfeSArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT
2409543d9cfeSArnaldo Carvalho de Melo 	.compat_setsockopt	= compat_tcp_setsockopt,
2410543d9cfeSArnaldo Carvalho de Melo 	.compat_getsockopt	= compat_tcp_getsockopt,
2411543d9cfeSArnaldo Carvalho de Melo #endif
2412c1e64e29SLorenzo Colitti 	.diag_destroy		= tcp_abort,
24131da177e4SLinus Torvalds };
24144bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_prot);
24151da177e4SLinus Torvalds 
2416046ee902SDenis V. Lunev static void __net_exit tcp_sk_exit(struct net *net)
2417046ee902SDenis V. Lunev {
2418bdbbb852SEric Dumazet 	int cpu;
2419bdbbb852SEric Dumazet 
2420bdbbb852SEric Dumazet 	for_each_possible_cpu(cpu)
2421bdbbb852SEric Dumazet 		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2422bdbbb852SEric Dumazet 	free_percpu(net->ipv4.tcp_sk);
2423bdbbb852SEric Dumazet }
2424bdbbb852SEric Dumazet 
2425bdbbb852SEric Dumazet static int __net_init tcp_sk_init(struct net *net)
2426bdbbb852SEric Dumazet {
2427fee83d09SHaishuang Yan 	int res, cpu, cnt;
2428bdbbb852SEric Dumazet 
2429bdbbb852SEric Dumazet 	net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2430bdbbb852SEric Dumazet 	if (!net->ipv4.tcp_sk)
2431bdbbb852SEric Dumazet 		return -ENOMEM;
2432bdbbb852SEric Dumazet 
2433bdbbb852SEric Dumazet 	for_each_possible_cpu(cpu) {
2434bdbbb852SEric Dumazet 		struct sock *sk;
2435bdbbb852SEric Dumazet 
2436bdbbb852SEric Dumazet 		res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2437bdbbb852SEric Dumazet 					   IPPROTO_TCP, net);
2438bdbbb852SEric Dumazet 		if (res)
2439bdbbb852SEric Dumazet 			goto fail;
2440a9d6532bSEric Dumazet 		sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2441bdbbb852SEric Dumazet 		*per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2442bdbbb852SEric Dumazet 	}
244349213555SDaniel Borkmann 
2444bdbbb852SEric Dumazet 	net->ipv4.sysctl_tcp_ecn = 2;
244549213555SDaniel Borkmann 	net->ipv4.sysctl_tcp_ecn_fallback = 1;
244649213555SDaniel Borkmann 
2447b0f9ca53SFan Du 	net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
24486b58e0a5SFan Du 	net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
244905cbc0dbSFan Du 	net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
2450bdbbb852SEric Dumazet 
245113b287e8SNikolay Borisov 	net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
24529bd6861bSNikolay Borisov 	net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
2453b840d15dSNikolay Borisov 	net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
245413b287e8SNikolay Borisov 
24556fa25166SNikolay Borisov 	net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
24567c083ecbSNikolay Borisov 	net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
24570aca737dSDavid S. Miller 	net->ipv4.sysctl_tcp_syncookies = 1;
24581043e25fSNikolay Borisov 	net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
2459ae5c3f40SNikolay Borisov 	net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
2460c6214a97SNikolay Borisov 	net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
2461c402d9beSNikolay Borisov 	net->ipv4.sysctl_tcp_orphan_retries = 0;
24621e579caaSNikolay Borisov 	net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
24634979f2d9SNikolay Borisov 	net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
246456ab6b93SHaishuang Yan 	net->ipv4.sysctl_tcp_tw_reuse = 0;
246512ed8244SNikolay Borisov 
2466fee83d09SHaishuang Yan 	cnt = tcp_hashinfo.ehash_mask + 1;
2467fee83d09SHaishuang Yan 	net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2;
24681946e672SHaishuang Yan 	net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
24691946e672SHaishuang Yan 
2470fee83d09SHaishuang Yan 	net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256);
2471f9301034SEric Dumazet 	net->ipv4.sysctl_tcp_sack = 1;
24729bb37ef0SEric Dumazet 	net->ipv4.sysctl_tcp_window_scaling = 1;
24735d2ed052SEric Dumazet 	net->ipv4.sysctl_tcp_timestamps = 1;
2474fee83d09SHaishuang Yan 
247549213555SDaniel Borkmann 	return 0;
2476bdbbb852SEric Dumazet fail:
2477bdbbb852SEric Dumazet 	tcp_sk_exit(net);
2478bdbbb852SEric Dumazet 
2479bdbbb852SEric Dumazet 	return res;
2480b099ce26SEric W. Biederman }
2481b099ce26SEric W. Biederman 
2482b099ce26SEric W. Biederman static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2483b099ce26SEric W. Biederman {
24841946e672SHaishuang Yan 	inet_twsk_purge(&tcp_hashinfo, AF_INET);
2485046ee902SDenis V. Lunev }
2486046ee902SDenis V. Lunev 
2487046ee902SDenis V. Lunev static struct pernet_operations __net_initdata tcp_sk_ops = {
2488046ee902SDenis V. Lunev        .init	   = tcp_sk_init,
2489046ee902SDenis V. Lunev        .exit	   = tcp_sk_exit,
2490b099ce26SEric W. Biederman        .exit_batch = tcp_sk_exit_batch,
2491046ee902SDenis V. Lunev };
2492046ee902SDenis V. Lunev 
24939b0f976fSDenis V. Lunev void __init tcp_v4_init(void)
24941da177e4SLinus Torvalds {
24956a1b3054SEric W. Biederman 	if (register_pernet_subsys(&tcp_sk_ops))
24961da177e4SLinus Torvalds 		panic("Failed to create the TCP control socket.\n");
24971da177e4SLinus Torvalds }
2498