xref: /linux/net/ipv4/tcp_ipv4.c (revision e62a123b8ef7c5dc4db2c16383d506860ad21b47)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
31da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
41da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *		Implementation of the Transmission Control Protocol(TCP).
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  *		IPv4 specific functions
91da177e4SLinus Torvalds  *
101da177e4SLinus Torvalds  *
111da177e4SLinus Torvalds  *		code split from:
121da177e4SLinus Torvalds  *		linux/ipv4/tcp.c
131da177e4SLinus Torvalds  *		linux/ipv4/tcp_input.c
141da177e4SLinus Torvalds  *		linux/ipv4/tcp_output.c
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  *		See tcp.c for author information
171da177e4SLinus Torvalds  *
181da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
191da177e4SLinus Torvalds  *      modify it under the terms of the GNU General Public License
201da177e4SLinus Torvalds  *      as published by the Free Software Foundation; either version
211da177e4SLinus Torvalds  *      2 of the License, or (at your option) any later version.
221da177e4SLinus Torvalds  */
231da177e4SLinus Torvalds 
241da177e4SLinus Torvalds /*
251da177e4SLinus Torvalds  * Changes:
261da177e4SLinus Torvalds  *		David S. Miller	:	New socket lookup architecture.
271da177e4SLinus Torvalds  *					This code is dedicated to John Dyson.
281da177e4SLinus Torvalds  *		David S. Miller :	Change semantics of established hash,
291da177e4SLinus Torvalds  *					half is devoted to TIME_WAIT sockets
301da177e4SLinus Torvalds  *					and the rest go in the other half.
311da177e4SLinus Torvalds  *		Andi Kleen :		Add support for syncookies and fixed
321da177e4SLinus Torvalds  *					some bugs: ip options weren't passed to
331da177e4SLinus Torvalds  *					the TCP layer, missed a check for an
341da177e4SLinus Torvalds  *					ACK bit.
351da177e4SLinus Torvalds  *		Andi Kleen :		Implemented fast path mtu discovery.
361da177e4SLinus Torvalds  *	     				Fixed many serious bugs in the
3760236fddSArnaldo Carvalho de Melo  *					request_sock handling and moved
381da177e4SLinus Torvalds  *					most of it into the af independent code.
391da177e4SLinus Torvalds  *					Added tail drop and some other bugfixes.
40caa20d9aSStephen Hemminger  *					Added new listen semantics.
411da177e4SLinus Torvalds  *		Mike McLagan	:	Routing by source
421da177e4SLinus Torvalds  *	Juan Jose Ciarlante:		ip_dynaddr bits
431da177e4SLinus Torvalds  *		Andi Kleen:		various fixes.
441da177e4SLinus Torvalds  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
451da177e4SLinus Torvalds  *					coma.
461da177e4SLinus Torvalds  *	Andi Kleen		:	Fix new listen.
471da177e4SLinus Torvalds  *	Andi Kleen		:	Fix accept error reporting.
481da177e4SLinus Torvalds  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
491da177e4SLinus Torvalds  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
501da177e4SLinus Torvalds  *					a single port at the same time.
511da177e4SLinus Torvalds  */
521da177e4SLinus Torvalds 
53afd46503SJoe Perches #define pr_fmt(fmt) "TCP: " fmt
541da177e4SLinus Torvalds 
55eb4dea58SHerbert Xu #include <linux/bottom_half.h>
561da177e4SLinus Torvalds #include <linux/types.h>
571da177e4SLinus Torvalds #include <linux/fcntl.h>
581da177e4SLinus Torvalds #include <linux/module.h>
591da177e4SLinus Torvalds #include <linux/random.h>
601da177e4SLinus Torvalds #include <linux/cache.h>
611da177e4SLinus Torvalds #include <linux/jhash.h>
621da177e4SLinus Torvalds #include <linux/init.h>
631da177e4SLinus Torvalds #include <linux/times.h>
645a0e3ad6STejun Heo #include <linux/slab.h>
651da177e4SLinus Torvalds 
66457c4cbcSEric W. Biederman #include <net/net_namespace.h>
671da177e4SLinus Torvalds #include <net/icmp.h>
68304a1618SArnaldo Carvalho de Melo #include <net/inet_hashtables.h>
691da177e4SLinus Torvalds #include <net/tcp.h>
7020380731SArnaldo Carvalho de Melo #include <net/transp_v6.h>
711da177e4SLinus Torvalds #include <net/ipv6.h>
721da177e4SLinus Torvalds #include <net/inet_common.h>
736d6ee43eSArnaldo Carvalho de Melo #include <net/timewait_sock.h>
741da177e4SLinus Torvalds #include <net/xfrm.h>
756e5714eaSDavid S. Miller #include <net/secure_seq.h>
76d1a4c0b3SGlauber Costa #include <net/tcp_memcontrol.h>
77076bb0c8SEliezer Tamir #include <net/busy_poll.h>
781da177e4SLinus Torvalds 
791da177e4SLinus Torvalds #include <linux/inet.h>
801da177e4SLinus Torvalds #include <linux/ipv6.h>
811da177e4SLinus Torvalds #include <linux/stddef.h>
821da177e4SLinus Torvalds #include <linux/proc_fs.h>
831da177e4SLinus Torvalds #include <linux/seq_file.h>
841da177e4SLinus Torvalds 
85cfb6eeb4SYOSHIFUJI Hideaki #include <linux/crypto.h>
86cfb6eeb4SYOSHIFUJI Hideaki #include <linux/scatterlist.h>
87cfb6eeb4SYOSHIFUJI Hideaki 
88ab32ea5dSBrian Haley int sysctl_tcp_tw_reuse __read_mostly;
89ab32ea5dSBrian Haley int sysctl_tcp_low_latency __read_mostly;
904bc2f18bSEric Dumazet EXPORT_SYMBOL(sysctl_tcp_low_latency);
911da177e4SLinus Torvalds 
92cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
93a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
94318cf7aaSEric Dumazet 			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
95cfb6eeb4SYOSHIFUJI Hideaki #endif
96cfb6eeb4SYOSHIFUJI Hideaki 
975caea4eaSEric Dumazet struct inet_hashinfo tcp_hashinfo;
984bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_hashinfo);
991da177e4SLinus Torvalds 
100936b8bdbSOctavian Purdila static  __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1011da177e4SLinus Torvalds {
102eddc9ec5SArnaldo Carvalho de Melo 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
103eddc9ec5SArnaldo Carvalho de Melo 					  ip_hdr(skb)->saddr,
104aa8223c7SArnaldo Carvalho de Melo 					  tcp_hdr(skb)->dest,
105aa8223c7SArnaldo Carvalho de Melo 					  tcp_hdr(skb)->source);
1061da177e4SLinus Torvalds }
1071da177e4SLinus Torvalds 
1086d6ee43eSArnaldo Carvalho de Melo int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
1096d6ee43eSArnaldo Carvalho de Melo {
1106d6ee43eSArnaldo Carvalho de Melo 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
1116d6ee43eSArnaldo Carvalho de Melo 	struct tcp_sock *tp = tcp_sk(sk);
1126d6ee43eSArnaldo Carvalho de Melo 
1136d6ee43eSArnaldo Carvalho de Melo 	/* With PAWS, it is safe from the viewpoint
1146d6ee43eSArnaldo Carvalho de Melo 	   of data integrity. Even without PAWS it is safe provided sequence
1156d6ee43eSArnaldo Carvalho de Melo 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
1166d6ee43eSArnaldo Carvalho de Melo 
1176d6ee43eSArnaldo Carvalho de Melo 	   Actually, the idea is close to VJ's one, only timestamp cache is
1186d6ee43eSArnaldo Carvalho de Melo 	   held not per host, but per port pair and TW bucket is used as state
1196d6ee43eSArnaldo Carvalho de Melo 	   holder.
1206d6ee43eSArnaldo Carvalho de Melo 
1216d6ee43eSArnaldo Carvalho de Melo 	   If TW bucket has been already destroyed we fall back to VJ's scheme
1226d6ee43eSArnaldo Carvalho de Melo 	   and use initial timestamp retrieved from peer table.
1236d6ee43eSArnaldo Carvalho de Melo 	 */
1246d6ee43eSArnaldo Carvalho de Melo 	if (tcptw->tw_ts_recent_stamp &&
12551456b29SIan Morris 	    (!twp || (sysctl_tcp_tw_reuse &&
1269d729f72SJames Morris 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
1276d6ee43eSArnaldo Carvalho de Melo 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
1286d6ee43eSArnaldo Carvalho de Melo 		if (tp->write_seq == 0)
1296d6ee43eSArnaldo Carvalho de Melo 			tp->write_seq = 1;
1306d6ee43eSArnaldo Carvalho de Melo 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
1316d6ee43eSArnaldo Carvalho de Melo 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
1326d6ee43eSArnaldo Carvalho de Melo 		sock_hold(sktw);
1336d6ee43eSArnaldo Carvalho de Melo 		return 1;
1346d6ee43eSArnaldo Carvalho de Melo 	}
1356d6ee43eSArnaldo Carvalho de Melo 
1366d6ee43eSArnaldo Carvalho de Melo 	return 0;
1376d6ee43eSArnaldo Carvalho de Melo }
1386d6ee43eSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(tcp_twsk_unique);
1396d6ee43eSArnaldo Carvalho de Melo 
1401da177e4SLinus Torvalds /* This will initiate an outgoing connection. */
1411da177e4SLinus Torvalds int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
1421da177e4SLinus Torvalds {
1432d7192d6SDavid S. Miller 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1441da177e4SLinus Torvalds 	struct inet_sock *inet = inet_sk(sk);
1451da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
146dca8b089SDavid S. Miller 	__be16 orig_sport, orig_dport;
147bada8adcSAl Viro 	__be32 daddr, nexthop;
148da905bd1SDavid S. Miller 	struct flowi4 *fl4;
1492d7192d6SDavid S. Miller 	struct rtable *rt;
1501da177e4SLinus Torvalds 	int err;
151f6d8bd05SEric Dumazet 	struct ip_options_rcu *inet_opt;
1521da177e4SLinus Torvalds 
1531da177e4SLinus Torvalds 	if (addr_len < sizeof(struct sockaddr_in))
1541da177e4SLinus Torvalds 		return -EINVAL;
1551da177e4SLinus Torvalds 
1561da177e4SLinus Torvalds 	if (usin->sin_family != AF_INET)
1571da177e4SLinus Torvalds 		return -EAFNOSUPPORT;
1581da177e4SLinus Torvalds 
1591da177e4SLinus Torvalds 	nexthop = daddr = usin->sin_addr.s_addr;
160f6d8bd05SEric Dumazet 	inet_opt = rcu_dereference_protected(inet->inet_opt,
161f6d8bd05SEric Dumazet 					     sock_owned_by_user(sk));
162f6d8bd05SEric Dumazet 	if (inet_opt && inet_opt->opt.srr) {
1631da177e4SLinus Torvalds 		if (!daddr)
1641da177e4SLinus Torvalds 			return -EINVAL;
165f6d8bd05SEric Dumazet 		nexthop = inet_opt->opt.faddr;
1661da177e4SLinus Torvalds 	}
1671da177e4SLinus Torvalds 
168dca8b089SDavid S. Miller 	orig_sport = inet->inet_sport;
169dca8b089SDavid S. Miller 	orig_dport = usin->sin_port;
170da905bd1SDavid S. Miller 	fl4 = &inet->cork.fl.u.ip4;
171da905bd1SDavid S. Miller 	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
1721da177e4SLinus Torvalds 			      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
1731da177e4SLinus Torvalds 			      IPPROTO_TCP,
1740e0d44abSSteffen Klassert 			      orig_sport, orig_dport, sk);
175b23dd4feSDavid S. Miller 	if (IS_ERR(rt)) {
176b23dd4feSDavid S. Miller 		err = PTR_ERR(rt);
177b23dd4feSDavid S. Miller 		if (err == -ENETUNREACH)
178f1d8cba6SEric Dumazet 			IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
179b23dd4feSDavid S. Miller 		return err;
180584bdf8cSWei Dong 	}
1811da177e4SLinus Torvalds 
1821da177e4SLinus Torvalds 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
1831da177e4SLinus Torvalds 		ip_rt_put(rt);
1841da177e4SLinus Torvalds 		return -ENETUNREACH;
1851da177e4SLinus Torvalds 	}
1861da177e4SLinus Torvalds 
187f6d8bd05SEric Dumazet 	if (!inet_opt || !inet_opt->opt.srr)
188da905bd1SDavid S. Miller 		daddr = fl4->daddr;
1891da177e4SLinus Torvalds 
190c720c7e8SEric Dumazet 	if (!inet->inet_saddr)
191da905bd1SDavid S. Miller 		inet->inet_saddr = fl4->saddr;
192d1e559d0SEric Dumazet 	sk_rcv_saddr_set(sk, inet->inet_saddr);
1931da177e4SLinus Torvalds 
194c720c7e8SEric Dumazet 	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1951da177e4SLinus Torvalds 		/* Reset inherited state */
1961da177e4SLinus Torvalds 		tp->rx_opt.ts_recent	   = 0;
1971da177e4SLinus Torvalds 		tp->rx_opt.ts_recent_stamp = 0;
198ee995283SPavel Emelyanov 		if (likely(!tp->repair))
1991da177e4SLinus Torvalds 			tp->write_seq	   = 0;
2001da177e4SLinus Torvalds 	}
2011da177e4SLinus Torvalds 
202295ff7edSArnaldo Carvalho de Melo 	if (tcp_death_row.sysctl_tw_recycle &&
20381166dd6SDavid S. Miller 	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
20481166dd6SDavid S. Miller 		tcp_fetch_timewait_stamp(sk, &rt->dst);
2051da177e4SLinus Torvalds 
206c720c7e8SEric Dumazet 	inet->inet_dport = usin->sin_port;
207d1e559d0SEric Dumazet 	sk_daddr_set(sk, daddr);
2081da177e4SLinus Torvalds 
209d83d8461SArnaldo Carvalho de Melo 	inet_csk(sk)->icsk_ext_hdr_len = 0;
210f6d8bd05SEric Dumazet 	if (inet_opt)
211f6d8bd05SEric Dumazet 		inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
2121da177e4SLinus Torvalds 
213bee7ca9eSWilliam Allen Simpson 	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
2141da177e4SLinus Torvalds 
2151da177e4SLinus Torvalds 	/* Socket identity is still unknown (sport may be zero).
2161da177e4SLinus Torvalds 	 * However we set state to SYN-SENT and not releasing socket
2171da177e4SLinus Torvalds 	 * lock select source port, enter ourselves into the hash tables and
2181da177e4SLinus Torvalds 	 * complete initialization after this.
2191da177e4SLinus Torvalds 	 */
2201da177e4SLinus Torvalds 	tcp_set_state(sk, TCP_SYN_SENT);
221a7f5e7f1SArnaldo Carvalho de Melo 	err = inet_hash_connect(&tcp_death_row, sk);
2221da177e4SLinus Torvalds 	if (err)
2231da177e4SLinus Torvalds 		goto failure;
2241da177e4SLinus Torvalds 
225877d1f62STom Herbert 	sk_set_txhash(sk);
2269e7ceb06SSathya Perla 
227da905bd1SDavid S. Miller 	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
228c720c7e8SEric Dumazet 			       inet->inet_sport, inet->inet_dport, sk);
229b23dd4feSDavid S. Miller 	if (IS_ERR(rt)) {
230b23dd4feSDavid S. Miller 		err = PTR_ERR(rt);
231b23dd4feSDavid S. Miller 		rt = NULL;
2321da177e4SLinus Torvalds 		goto failure;
233b23dd4feSDavid S. Miller 	}
2341da177e4SLinus Torvalds 	/* OK, now commit destination to socket.  */
235bcd76111SHerbert Xu 	sk->sk_gso_type = SKB_GSO_TCPV4;
236d8d1f30bSChangli Gao 	sk_setup_caps(sk, &rt->dst);
2371da177e4SLinus Torvalds 
238ee995283SPavel Emelyanov 	if (!tp->write_seq && likely(!tp->repair))
239c720c7e8SEric Dumazet 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
240c720c7e8SEric Dumazet 							   inet->inet_daddr,
241c720c7e8SEric Dumazet 							   inet->inet_sport,
2421da177e4SLinus Torvalds 							   usin->sin_port);
2431da177e4SLinus Torvalds 
244c720c7e8SEric Dumazet 	inet->inet_id = tp->write_seq ^ jiffies;
2451da177e4SLinus Torvalds 
2461da177e4SLinus Torvalds 	err = tcp_connect(sk);
247ee995283SPavel Emelyanov 
2481da177e4SLinus Torvalds 	rt = NULL;
2491da177e4SLinus Torvalds 	if (err)
2501da177e4SLinus Torvalds 		goto failure;
2511da177e4SLinus Torvalds 
2521da177e4SLinus Torvalds 	return 0;
2531da177e4SLinus Torvalds 
2541da177e4SLinus Torvalds failure:
2557174259eSArnaldo Carvalho de Melo 	/*
2567174259eSArnaldo Carvalho de Melo 	 * This unhashes the socket and releases the local port,
2577174259eSArnaldo Carvalho de Melo 	 * if necessary.
2587174259eSArnaldo Carvalho de Melo 	 */
2591da177e4SLinus Torvalds 	tcp_set_state(sk, TCP_CLOSE);
2601da177e4SLinus Torvalds 	ip_rt_put(rt);
2611da177e4SLinus Torvalds 	sk->sk_route_caps = 0;
262c720c7e8SEric Dumazet 	inet->inet_dport = 0;
2631da177e4SLinus Torvalds 	return err;
2641da177e4SLinus Torvalds }
2654bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_connect);
2661da177e4SLinus Torvalds 
2671da177e4SLinus Torvalds /*
268563d34d0SEric Dumazet  * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
269563d34d0SEric Dumazet  * It can be called through tcp_release_cb() if socket was owned by user
270563d34d0SEric Dumazet  * at the time tcp_v4_err() was called to handle ICMP message.
2711da177e4SLinus Torvalds  */
2724fab9071SNeal Cardwell void tcp_v4_mtu_reduced(struct sock *sk)
2731da177e4SLinus Torvalds {
2741da177e4SLinus Torvalds 	struct dst_entry *dst;
2751da177e4SLinus Torvalds 	struct inet_sock *inet = inet_sk(sk);
276563d34d0SEric Dumazet 	u32 mtu = tcp_sk(sk)->mtu_info;
2771da177e4SLinus Torvalds 
27880d0a69fSDavid S. Miller 	dst = inet_csk_update_pmtu(sk, mtu);
27980d0a69fSDavid S. Miller 	if (!dst)
2801da177e4SLinus Torvalds 		return;
2811da177e4SLinus Torvalds 
2821da177e4SLinus Torvalds 	/* Something is about to be wrong... Remember soft error
2831da177e4SLinus Torvalds 	 * for the case, if this connection will not able to recover.
2841da177e4SLinus Torvalds 	 */
2851da177e4SLinus Torvalds 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
2861da177e4SLinus Torvalds 		sk->sk_err_soft = EMSGSIZE;
2871da177e4SLinus Torvalds 
2881da177e4SLinus Torvalds 	mtu = dst_mtu(dst);
2891da177e4SLinus Torvalds 
2901da177e4SLinus Torvalds 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
291482fc609SHannes Frederic Sowa 	    ip_sk_accept_pmtu(sk) &&
292d83d8461SArnaldo Carvalho de Melo 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
2931da177e4SLinus Torvalds 		tcp_sync_mss(sk, mtu);
2941da177e4SLinus Torvalds 
2951da177e4SLinus Torvalds 		/* Resend the TCP packet because it's
2961da177e4SLinus Torvalds 		 * clear that the old packet has been
2971da177e4SLinus Torvalds 		 * dropped. This is the new "fast" path mtu
2981da177e4SLinus Torvalds 		 * discovery.
2991da177e4SLinus Torvalds 		 */
3001da177e4SLinus Torvalds 		tcp_simple_retransmit(sk);
3011da177e4SLinus Torvalds 	} /* else let the usual retransmit timer handle it */
3021da177e4SLinus Torvalds }
3034fab9071SNeal Cardwell EXPORT_SYMBOL(tcp_v4_mtu_reduced);
3041da177e4SLinus Torvalds 
30555be7a9cSDavid S. Miller static void do_redirect(struct sk_buff *skb, struct sock *sk)
30655be7a9cSDavid S. Miller {
30755be7a9cSDavid S. Miller 	struct dst_entry *dst = __sk_dst_check(sk, 0);
30855be7a9cSDavid S. Miller 
3091ed5c48fSDavid S. Miller 	if (dst)
3106700c270SDavid S. Miller 		dst->ops->redirect(dst, sk, skb);
31155be7a9cSDavid S. Miller }
31255be7a9cSDavid S. Miller 
31326e37360SEric Dumazet 
31426e37360SEric Dumazet /* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
31526e37360SEric Dumazet void tcp_req_err(struct sock *sk, u32 seq)
31626e37360SEric Dumazet {
31726e37360SEric Dumazet 	struct request_sock *req = inet_reqsk(sk);
31826e37360SEric Dumazet 	struct net *net = sock_net(sk);
31926e37360SEric Dumazet 
32026e37360SEric Dumazet 	/* ICMPs are not backlogged, hence we cannot get
32126e37360SEric Dumazet 	 * an established socket here.
32226e37360SEric Dumazet 	 */
32326e37360SEric Dumazet 	WARN_ON(req->sk);
32426e37360SEric Dumazet 
32526e37360SEric Dumazet 	if (seq != tcp_rsk(req)->snt_isn) {
32626e37360SEric Dumazet 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
32726e37360SEric Dumazet 	} else {
32826e37360SEric Dumazet 		/*
32926e37360SEric Dumazet 		 * Still in SYN_RECV, just remove it silently.
33026e37360SEric Dumazet 		 * There is no good way to pass the error to the newly
33126e37360SEric Dumazet 		 * created socket, and POSIX does not want network
33226e37360SEric Dumazet 		 * errors returned from accept().
33326e37360SEric Dumazet 		 */
334c6973669SFan Du 		inet_csk_reqsk_queue_drop(req->rsk_listener, req);
335ef84d8ceSEric Dumazet 		NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS);
33626e37360SEric Dumazet 	}
337ef84d8ceSEric Dumazet 	reqsk_put(req);
33826e37360SEric Dumazet }
33926e37360SEric Dumazet EXPORT_SYMBOL(tcp_req_err);
34026e37360SEric Dumazet 
3411da177e4SLinus Torvalds /*
3421da177e4SLinus Torvalds  * This routine is called by the ICMP module when it gets some
3431da177e4SLinus Torvalds  * sort of error condition.  If err < 0 then the socket should
3441da177e4SLinus Torvalds  * be closed and the error returned to the user.  If err > 0
3451da177e4SLinus Torvalds  * it's just the icmp type << 8 | icmp code.  After adjustment
3461da177e4SLinus Torvalds  * header points to the first 8 bytes of the tcp header.  We need
3471da177e4SLinus Torvalds  * to find the appropriate port.
3481da177e4SLinus Torvalds  *
3491da177e4SLinus Torvalds  * The locking strategy used here is very "optimistic". When
3501da177e4SLinus Torvalds  * someone else accesses the socket the ICMP is just dropped
3511da177e4SLinus Torvalds  * and for some paths there is no check at all.
3521da177e4SLinus Torvalds  * A more general error queue to queue errors for later handling
3531da177e4SLinus Torvalds  * is probably better.
3541da177e4SLinus Torvalds  *
3551da177e4SLinus Torvalds  */
3561da177e4SLinus Torvalds 
3574d1a2d9eSDamian Lukowski void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
3581da177e4SLinus Torvalds {
359b71d1d42SEric Dumazet 	const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
3604d1a2d9eSDamian Lukowski 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
361f1ecd5d9SDamian Lukowski 	struct inet_connection_sock *icsk;
3621da177e4SLinus Torvalds 	struct tcp_sock *tp;
3631da177e4SLinus Torvalds 	struct inet_sock *inet;
3644d1a2d9eSDamian Lukowski 	const int type = icmp_hdr(icmp_skb)->type;
3654d1a2d9eSDamian Lukowski 	const int code = icmp_hdr(icmp_skb)->code;
3661da177e4SLinus Torvalds 	struct sock *sk;
367f1ecd5d9SDamian Lukowski 	struct sk_buff *skb;
3680a672f74SYuchung Cheng 	struct request_sock *fastopen;
3690a672f74SYuchung Cheng 	__u32 seq, snd_una;
370f1ecd5d9SDamian Lukowski 	__u32 remaining;
3711da177e4SLinus Torvalds 	int err;
3724d1a2d9eSDamian Lukowski 	struct net *net = dev_net(icmp_skb->dev);
3731da177e4SLinus Torvalds 
37426e37360SEric Dumazet 	sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
37526e37360SEric Dumazet 				       th->dest, iph->saddr, ntohs(th->source),
37626e37360SEric Dumazet 				       inet_iif(icmp_skb));
3771da177e4SLinus Torvalds 	if (!sk) {
378dcfc23caSPavel Emelyanov 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
3791da177e4SLinus Torvalds 		return;
3801da177e4SLinus Torvalds 	}
3811da177e4SLinus Torvalds 	if (sk->sk_state == TCP_TIME_WAIT) {
3829469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
3831da177e4SLinus Torvalds 		return;
3841da177e4SLinus Torvalds 	}
38526e37360SEric Dumazet 	seq = ntohl(th->seq);
38626e37360SEric Dumazet 	if (sk->sk_state == TCP_NEW_SYN_RECV)
38726e37360SEric Dumazet 		return tcp_req_err(sk, seq);
3881da177e4SLinus Torvalds 
3891da177e4SLinus Torvalds 	bh_lock_sock(sk);
3901da177e4SLinus Torvalds 	/* If too many ICMPs get dropped on busy
3911da177e4SLinus Torvalds 	 * servers this needs to be solved differently.
392563d34d0SEric Dumazet 	 * We do take care of PMTU discovery (RFC1191) special case :
393563d34d0SEric Dumazet 	 * we can receive locally generated ICMP messages while socket is held.
3941da177e4SLinus Torvalds 	 */
395b74aa930SEric Dumazet 	if (sock_owned_by_user(sk)) {
396b74aa930SEric Dumazet 		if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
397de0744afSPavel Emelyanov 			NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
398b74aa930SEric Dumazet 	}
3991da177e4SLinus Torvalds 	if (sk->sk_state == TCP_CLOSE)
4001da177e4SLinus Torvalds 		goto out;
4011da177e4SLinus Torvalds 
40297e3ecd1Sstephen hemminger 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
40397e3ecd1Sstephen hemminger 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
40497e3ecd1Sstephen hemminger 		goto out;
40597e3ecd1Sstephen hemminger 	}
40697e3ecd1Sstephen hemminger 
407f1ecd5d9SDamian Lukowski 	icsk = inet_csk(sk);
4081da177e4SLinus Torvalds 	tp = tcp_sk(sk);
4090a672f74SYuchung Cheng 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
4100a672f74SYuchung Cheng 	fastopen = tp->fastopen_rsk;
4110a672f74SYuchung Cheng 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
4121da177e4SLinus Torvalds 	if (sk->sk_state != TCP_LISTEN &&
4130a672f74SYuchung Cheng 	    !between(seq, snd_una, tp->snd_nxt)) {
414de0744afSPavel Emelyanov 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
4151da177e4SLinus Torvalds 		goto out;
4161da177e4SLinus Torvalds 	}
4171da177e4SLinus Torvalds 
4181da177e4SLinus Torvalds 	switch (type) {
41955be7a9cSDavid S. Miller 	case ICMP_REDIRECT:
42055be7a9cSDavid S. Miller 		do_redirect(icmp_skb, sk);
42155be7a9cSDavid S. Miller 		goto out;
4221da177e4SLinus Torvalds 	case ICMP_SOURCE_QUENCH:
4231da177e4SLinus Torvalds 		/* Just silently ignore these. */
4241da177e4SLinus Torvalds 		goto out;
4251da177e4SLinus Torvalds 	case ICMP_PARAMETERPROB:
4261da177e4SLinus Torvalds 		err = EPROTO;
4271da177e4SLinus Torvalds 		break;
4281da177e4SLinus Torvalds 	case ICMP_DEST_UNREACH:
4291da177e4SLinus Torvalds 		if (code > NR_ICMP_UNREACH)
4301da177e4SLinus Torvalds 			goto out;
4311da177e4SLinus Torvalds 
4321da177e4SLinus Torvalds 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
4330d4f0608SEric Dumazet 			/* We are not interested in TCP_LISTEN and open_requests
4340d4f0608SEric Dumazet 			 * (SYN-ACKs send out by Linux are always <576bytes so
4350d4f0608SEric Dumazet 			 * they should go through unfragmented).
4360d4f0608SEric Dumazet 			 */
4370d4f0608SEric Dumazet 			if (sk->sk_state == TCP_LISTEN)
4380d4f0608SEric Dumazet 				goto out;
4390d4f0608SEric Dumazet 
440563d34d0SEric Dumazet 			tp->mtu_info = info;
441144d56e9SEric Dumazet 			if (!sock_owned_by_user(sk)) {
442563d34d0SEric Dumazet 				tcp_v4_mtu_reduced(sk);
443144d56e9SEric Dumazet 			} else {
444144d56e9SEric Dumazet 				if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
445144d56e9SEric Dumazet 					sock_hold(sk);
446144d56e9SEric Dumazet 			}
4471da177e4SLinus Torvalds 			goto out;
4481da177e4SLinus Torvalds 		}
4491da177e4SLinus Torvalds 
4501da177e4SLinus Torvalds 		err = icmp_err_convert[code].errno;
451f1ecd5d9SDamian Lukowski 		/* check if icmp_skb allows revert of backoff
452f1ecd5d9SDamian Lukowski 		 * (see draft-zimmermann-tcp-lcd) */
453f1ecd5d9SDamian Lukowski 		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
454f1ecd5d9SDamian Lukowski 			break;
455f1ecd5d9SDamian Lukowski 		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
4560a672f74SYuchung Cheng 		    !icsk->icsk_backoff || fastopen)
457f1ecd5d9SDamian Lukowski 			break;
458f1ecd5d9SDamian Lukowski 
4598f49c270SDavid S. Miller 		if (sock_owned_by_user(sk))
4608f49c270SDavid S. Miller 			break;
4618f49c270SDavid S. Miller 
462f1ecd5d9SDamian Lukowski 		icsk->icsk_backoff--;
463fcdd1cf4SEric Dumazet 		icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
464fcdd1cf4SEric Dumazet 					       TCP_TIMEOUT_INIT;
465fcdd1cf4SEric Dumazet 		icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
466f1ecd5d9SDamian Lukowski 
467f1ecd5d9SDamian Lukowski 		skb = tcp_write_queue_head(sk);
468f1ecd5d9SDamian Lukowski 		BUG_ON(!skb);
469f1ecd5d9SDamian Lukowski 
4707faee5c0SEric Dumazet 		remaining = icsk->icsk_rto -
4717faee5c0SEric Dumazet 			    min(icsk->icsk_rto,
4727faee5c0SEric Dumazet 				tcp_time_stamp - tcp_skb_timestamp(skb));
473f1ecd5d9SDamian Lukowski 
474f1ecd5d9SDamian Lukowski 		if (remaining) {
475f1ecd5d9SDamian Lukowski 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
476f1ecd5d9SDamian Lukowski 						  remaining, TCP_RTO_MAX);
477f1ecd5d9SDamian Lukowski 		} else {
478f1ecd5d9SDamian Lukowski 			/* RTO revert clocked out retransmission.
479f1ecd5d9SDamian Lukowski 			 * Will retransmit now */
480f1ecd5d9SDamian Lukowski 			tcp_retransmit_timer(sk);
481f1ecd5d9SDamian Lukowski 		}
482f1ecd5d9SDamian Lukowski 
4831da177e4SLinus Torvalds 		break;
4841da177e4SLinus Torvalds 	case ICMP_TIME_EXCEEDED:
4851da177e4SLinus Torvalds 		err = EHOSTUNREACH;
4861da177e4SLinus Torvalds 		break;
4871da177e4SLinus Torvalds 	default:
4881da177e4SLinus Torvalds 		goto out;
4891da177e4SLinus Torvalds 	}
4901da177e4SLinus Torvalds 
4911da177e4SLinus Torvalds 	switch (sk->sk_state) {
4921da177e4SLinus Torvalds 	case TCP_SYN_SENT:
4930a672f74SYuchung Cheng 	case TCP_SYN_RECV:
4940a672f74SYuchung Cheng 		/* Only in fast or simultaneous open. If a fast open socket is
4950a672f74SYuchung Cheng 		 * is already accepted it is treated as a connected one below.
4961da177e4SLinus Torvalds 		 */
49751456b29SIan Morris 		if (fastopen && !fastopen->sk)
4980a672f74SYuchung Cheng 			break;
4990a672f74SYuchung Cheng 
5001da177e4SLinus Torvalds 		if (!sock_owned_by_user(sk)) {
5011da177e4SLinus Torvalds 			sk->sk_err = err;
5021da177e4SLinus Torvalds 
5031da177e4SLinus Torvalds 			sk->sk_error_report(sk);
5041da177e4SLinus Torvalds 
5051da177e4SLinus Torvalds 			tcp_done(sk);
5061da177e4SLinus Torvalds 		} else {
5071da177e4SLinus Torvalds 			sk->sk_err_soft = err;
5081da177e4SLinus Torvalds 		}
5091da177e4SLinus Torvalds 		goto out;
5101da177e4SLinus Torvalds 	}
5111da177e4SLinus Torvalds 
5121da177e4SLinus Torvalds 	/* If we've already connected we will keep trying
5131da177e4SLinus Torvalds 	 * until we time out, or the user gives up.
5141da177e4SLinus Torvalds 	 *
5151da177e4SLinus Torvalds 	 * rfc1122 4.2.3.9 allows to consider as hard errors
5161da177e4SLinus Torvalds 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
5171da177e4SLinus Torvalds 	 * but it is obsoleted by pmtu discovery).
5181da177e4SLinus Torvalds 	 *
5191da177e4SLinus Torvalds 	 * Note, that in modern internet, where routing is unreliable
5201da177e4SLinus Torvalds 	 * and in each dark corner broken firewalls sit, sending random
5211da177e4SLinus Torvalds 	 * errors ordered by their masters even this two messages finally lose
5221da177e4SLinus Torvalds 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
5231da177e4SLinus Torvalds 	 *
5241da177e4SLinus Torvalds 	 * Now we are in compliance with RFCs.
5251da177e4SLinus Torvalds 	 *							--ANK (980905)
5261da177e4SLinus Torvalds 	 */
5271da177e4SLinus Torvalds 
5281da177e4SLinus Torvalds 	inet = inet_sk(sk);
5291da177e4SLinus Torvalds 	if (!sock_owned_by_user(sk) && inet->recverr) {
5301da177e4SLinus Torvalds 		sk->sk_err = err;
5311da177e4SLinus Torvalds 		sk->sk_error_report(sk);
5321da177e4SLinus Torvalds 	} else	{ /* Only an error on timeout */
5331da177e4SLinus Torvalds 		sk->sk_err_soft = err;
5341da177e4SLinus Torvalds 	}
5351da177e4SLinus Torvalds 
5361da177e4SLinus Torvalds out:
5371da177e4SLinus Torvalds 	bh_unlock_sock(sk);
5381da177e4SLinus Torvalds 	sock_put(sk);
5391da177e4SLinus Torvalds }
5401da177e4SLinus Torvalds 
54128850dc7SDaniel Borkmann void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
5421da177e4SLinus Torvalds {
543aa8223c7SArnaldo Carvalho de Melo 	struct tcphdr *th = tcp_hdr(skb);
5441da177e4SLinus Torvalds 
54584fa7933SPatrick McHardy 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
546419f9f89SHerbert Xu 		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
547663ead3bSHerbert Xu 		skb->csum_start = skb_transport_header(skb) - skb->head;
548ff1dcadbSAl Viro 		skb->csum_offset = offsetof(struct tcphdr, check);
5491da177e4SLinus Torvalds 	} else {
550419f9f89SHerbert Xu 		th->check = tcp_v4_check(skb->len, saddr, daddr,
55107f0757aSJoe Perches 					 csum_partial(th,
5521da177e4SLinus Torvalds 						      th->doff << 2,
5531da177e4SLinus Torvalds 						      skb->csum));
5541da177e4SLinus Torvalds 	}
5551da177e4SLinus Torvalds }
5561da177e4SLinus Torvalds 
557419f9f89SHerbert Xu /* This routine computes an IPv4 TCP checksum. */
558bb296246SHerbert Xu void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
559419f9f89SHerbert Xu {
560cf533ea5SEric Dumazet 	const struct inet_sock *inet = inet_sk(sk);
561419f9f89SHerbert Xu 
562419f9f89SHerbert Xu 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
563419f9f89SHerbert Xu }
5644bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_send_check);
565419f9f89SHerbert Xu 
5661da177e4SLinus Torvalds /*
5671da177e4SLinus Torvalds  *	This routine will send an RST to the other tcp.
5681da177e4SLinus Torvalds  *
5691da177e4SLinus Torvalds  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
5701da177e4SLinus Torvalds  *		      for reset.
5711da177e4SLinus Torvalds  *	Answer: if a packet caused RST, it is not for a socket
5721da177e4SLinus Torvalds  *		existing in our system, if it is matched to a socket,
5731da177e4SLinus Torvalds  *		it is just duplicate segment or bug in other side's TCP.
5741da177e4SLinus Torvalds  *		So that we build reply only basing on parameters
5751da177e4SLinus Torvalds  *		arrived with segment.
5761da177e4SLinus Torvalds  *	Exception: precedence violation. We do not implement it in any case.
5771da177e4SLinus Torvalds  */
5781da177e4SLinus Torvalds 
579a00e7444SEric Dumazet static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
5801da177e4SLinus Torvalds {
581cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
582cfb6eeb4SYOSHIFUJI Hideaki 	struct {
583cfb6eeb4SYOSHIFUJI Hideaki 		struct tcphdr th;
584cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
585714e85beSAl Viro 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
586cfb6eeb4SYOSHIFUJI Hideaki #endif
587cfb6eeb4SYOSHIFUJI Hideaki 	} rep;
5881da177e4SLinus Torvalds 	struct ip_reply_arg arg;
589cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
590e46787f0SFlorian Westphal 	struct tcp_md5sig_key *key = NULL;
591658ddaafSShawn Lu 	const __u8 *hash_location = NULL;
592658ddaafSShawn Lu 	unsigned char newhash[16];
593658ddaafSShawn Lu 	int genhash;
594658ddaafSShawn Lu 	struct sock *sk1 = NULL;
595cfb6eeb4SYOSHIFUJI Hideaki #endif
596a86b1e30SPavel Emelyanov 	struct net *net;
5971da177e4SLinus Torvalds 
5981da177e4SLinus Torvalds 	/* Never send a reset in response to a reset. */
5991da177e4SLinus Torvalds 	if (th->rst)
6001da177e4SLinus Torvalds 		return;
6011da177e4SLinus Torvalds 
602c3658e8dSEric Dumazet 	/* If sk not NULL, it means we did a successful lookup and incoming
603c3658e8dSEric Dumazet 	 * route had to be correct. prequeue might have dropped our dst.
604c3658e8dSEric Dumazet 	 */
605c3658e8dSEric Dumazet 	if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
6061da177e4SLinus Torvalds 		return;
6071da177e4SLinus Torvalds 
6081da177e4SLinus Torvalds 	/* Swap the send and the receive. */
609cfb6eeb4SYOSHIFUJI Hideaki 	memset(&rep, 0, sizeof(rep));
610cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.dest   = th->source;
611cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.source = th->dest;
612cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.doff   = sizeof(struct tcphdr) / 4;
613cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.rst    = 1;
6141da177e4SLinus Torvalds 
6151da177e4SLinus Torvalds 	if (th->ack) {
616cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.seq = th->ack_seq;
6171da177e4SLinus Torvalds 	} else {
618cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.ack = 1;
619cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
6201da177e4SLinus Torvalds 				       skb->len - (th->doff << 2));
6211da177e4SLinus Torvalds 	}
6221da177e4SLinus Torvalds 
6237174259eSArnaldo Carvalho de Melo 	memset(&arg, 0, sizeof(arg));
624cfb6eeb4SYOSHIFUJI Hideaki 	arg.iov[0].iov_base = (unsigned char *)&rep;
625cfb6eeb4SYOSHIFUJI Hideaki 	arg.iov[0].iov_len  = sizeof(rep.th);
626cfb6eeb4SYOSHIFUJI Hideaki 
6270f85feaeSEric Dumazet 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
628cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
629658ddaafSShawn Lu 	hash_location = tcp_parse_md5sig_option(th);
630271c3b9bSFlorian Westphal 	if (sk && sk_fullsock(sk)) {
631e46787f0SFlorian Westphal 		key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
632e46787f0SFlorian Westphal 					&ip_hdr(skb)->saddr, AF_INET);
633e46787f0SFlorian Westphal 	} else if (hash_location) {
634658ddaafSShawn Lu 		/*
635658ddaafSShawn Lu 		 * active side is lost. Try to find listening socket through
636658ddaafSShawn Lu 		 * source port, and then find md5 key through listening socket.
637658ddaafSShawn Lu 		 * we are not loose security here:
638658ddaafSShawn Lu 		 * Incoming packet is checked with md5 hash with finding key,
639658ddaafSShawn Lu 		 * no RST generated if md5 hash doesn't match.
640658ddaafSShawn Lu 		 */
6410f85feaeSEric Dumazet 		sk1 = __inet_lookup_listener(net,
642da5e3630STom Herbert 					     &tcp_hashinfo, ip_hdr(skb)->saddr,
643da5e3630STom Herbert 					     th->source, ip_hdr(skb)->daddr,
644658ddaafSShawn Lu 					     ntohs(th->source), inet_iif(skb));
645658ddaafSShawn Lu 		/* don't send rst if it can't find key */
646658ddaafSShawn Lu 		if (!sk1)
647658ddaafSShawn Lu 			return;
648658ddaafSShawn Lu 		rcu_read_lock();
649658ddaafSShawn Lu 		key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
650658ddaafSShawn Lu 					&ip_hdr(skb)->saddr, AF_INET);
651658ddaafSShawn Lu 		if (!key)
652658ddaafSShawn Lu 			goto release_sk1;
653658ddaafSShawn Lu 
65439f8e58eSEric Dumazet 		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
655658ddaafSShawn Lu 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
656658ddaafSShawn Lu 			goto release_sk1;
657658ddaafSShawn Lu 	}
658658ddaafSShawn Lu 
659cfb6eeb4SYOSHIFUJI Hideaki 	if (key) {
660cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
661cfb6eeb4SYOSHIFUJI Hideaki 				   (TCPOPT_NOP << 16) |
662cfb6eeb4SYOSHIFUJI Hideaki 				   (TCPOPT_MD5SIG << 8) |
663cfb6eeb4SYOSHIFUJI Hideaki 				   TCPOLEN_MD5SIG);
664cfb6eeb4SYOSHIFUJI Hideaki 		/* Update length and the length the header thinks exists */
665cfb6eeb4SYOSHIFUJI Hideaki 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
666cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.doff = arg.iov[0].iov_len / 4;
667cfb6eeb4SYOSHIFUJI Hideaki 
66849a72dfbSAdam Langley 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
66978e645cbSIlpo Järvinen 				     key, ip_hdr(skb)->saddr,
67078e645cbSIlpo Järvinen 				     ip_hdr(skb)->daddr, &rep.th);
671cfb6eeb4SYOSHIFUJI Hideaki 	}
672cfb6eeb4SYOSHIFUJI Hideaki #endif
673eddc9ec5SArnaldo Carvalho de Melo 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
674eddc9ec5SArnaldo Carvalho de Melo 				      ip_hdr(skb)->saddr, /* XXX */
67552cd5750SIlpo Järvinen 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
6761da177e4SLinus Torvalds 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
677271c3b9bSFlorian Westphal 	arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
678271c3b9bSFlorian Westphal 
679e2446eaaSShawn Lu 	/* When socket is gone, all binding information is lost.
6804c675258SAlexey Kuznetsov 	 * routing might fail in this case. No choice here, if we choose to force
6814c675258SAlexey Kuznetsov 	 * input interface, we will misroute in case of asymmetric route.
682e2446eaaSShawn Lu 	 */
6834c675258SAlexey Kuznetsov 	if (sk)
6844c675258SAlexey Kuznetsov 		arg.bound_dev_if = sk->sk_bound_dev_if;
6851da177e4SLinus Torvalds 
686271c3b9bSFlorian Westphal 	BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
687271c3b9bSFlorian Westphal 		     offsetof(struct inet_timewait_sock, tw_bound_dev_if));
688271c3b9bSFlorian Westphal 
68966b13d99SEric Dumazet 	arg.tos = ip_hdr(skb)->tos;
690bdbbb852SEric Dumazet 	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
691bdbbb852SEric Dumazet 			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
69224a2d43dSEric Dumazet 			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
69324a2d43dSEric Dumazet 			      &arg, arg.iov[0].iov_len);
6941da177e4SLinus Torvalds 
69563231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
69663231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
697658ddaafSShawn Lu 
698658ddaafSShawn Lu #ifdef CONFIG_TCP_MD5SIG
699658ddaafSShawn Lu release_sk1:
700658ddaafSShawn Lu 	if (sk1) {
701658ddaafSShawn Lu 		rcu_read_unlock();
702658ddaafSShawn Lu 		sock_put(sk1);
703658ddaafSShawn Lu 	}
704658ddaafSShawn Lu #endif
7051da177e4SLinus Torvalds }
7061da177e4SLinus Torvalds 
7071da177e4SLinus Torvalds /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
7081da177e4SLinus Torvalds    outside socket context is ugly, certainly. What can I do?
7091da177e4SLinus Torvalds  */
7101da177e4SLinus Torvalds 
711*e62a123bSEric Dumazet static void tcp_v4_send_ack(struct net *net,
712*e62a123bSEric Dumazet 			    struct sk_buff *skb, u32 seq, u32 ack,
713ee684b6fSAndrey Vagin 			    u32 win, u32 tsval, u32 tsecr, int oif,
71488ef4a5aSKOVACS Krisztian 			    struct tcp_md5sig_key *key,
71566b13d99SEric Dumazet 			    int reply_flags, u8 tos)
7161da177e4SLinus Torvalds {
717cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
7181da177e4SLinus Torvalds 	struct {
7191da177e4SLinus Torvalds 		struct tcphdr th;
720714e85beSAl Viro 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
721cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
722cfb6eeb4SYOSHIFUJI Hideaki 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
723cfb6eeb4SYOSHIFUJI Hideaki #endif
724cfb6eeb4SYOSHIFUJI Hideaki 			];
7251da177e4SLinus Torvalds 	} rep;
7261da177e4SLinus Torvalds 	struct ip_reply_arg arg;
7271da177e4SLinus Torvalds 
7281da177e4SLinus Torvalds 	memset(&rep.th, 0, sizeof(struct tcphdr));
7297174259eSArnaldo Carvalho de Melo 	memset(&arg, 0, sizeof(arg));
7301da177e4SLinus Torvalds 
7311da177e4SLinus Torvalds 	arg.iov[0].iov_base = (unsigned char *)&rep;
7321da177e4SLinus Torvalds 	arg.iov[0].iov_len  = sizeof(rep.th);
733ee684b6fSAndrey Vagin 	if (tsecr) {
734cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
7351da177e4SLinus Torvalds 				   (TCPOPT_TIMESTAMP << 8) |
7361da177e4SLinus Torvalds 				   TCPOLEN_TIMESTAMP);
737ee684b6fSAndrey Vagin 		rep.opt[1] = htonl(tsval);
738ee684b6fSAndrey Vagin 		rep.opt[2] = htonl(tsecr);
739cb48cfe8SCraig Schlenter 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
7401da177e4SLinus Torvalds 	}
7411da177e4SLinus Torvalds 
7421da177e4SLinus Torvalds 	/* Swap the send and the receive. */
7431da177e4SLinus Torvalds 	rep.th.dest    = th->source;
7441da177e4SLinus Torvalds 	rep.th.source  = th->dest;
7451da177e4SLinus Torvalds 	rep.th.doff    = arg.iov[0].iov_len / 4;
7461da177e4SLinus Torvalds 	rep.th.seq     = htonl(seq);
7471da177e4SLinus Torvalds 	rep.th.ack_seq = htonl(ack);
7481da177e4SLinus Torvalds 	rep.th.ack     = 1;
7491da177e4SLinus Torvalds 	rep.th.window  = htons(win);
7501da177e4SLinus Torvalds 
751cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
752cfb6eeb4SYOSHIFUJI Hideaki 	if (key) {
753ee684b6fSAndrey Vagin 		int offset = (tsecr) ? 3 : 0;
754cfb6eeb4SYOSHIFUJI Hideaki 
755cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
756cfb6eeb4SYOSHIFUJI Hideaki 					  (TCPOPT_NOP << 16) |
757cfb6eeb4SYOSHIFUJI Hideaki 					  (TCPOPT_MD5SIG << 8) |
758cfb6eeb4SYOSHIFUJI Hideaki 					  TCPOLEN_MD5SIG);
759cfb6eeb4SYOSHIFUJI Hideaki 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
760cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.doff = arg.iov[0].iov_len/4;
761cfb6eeb4SYOSHIFUJI Hideaki 
76249a72dfbSAdam Langley 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
76390b7e112SAdam Langley 				    key, ip_hdr(skb)->saddr,
76490b7e112SAdam Langley 				    ip_hdr(skb)->daddr, &rep.th);
765cfb6eeb4SYOSHIFUJI Hideaki 	}
766cfb6eeb4SYOSHIFUJI Hideaki #endif
76788ef4a5aSKOVACS Krisztian 	arg.flags = reply_flags;
768eddc9ec5SArnaldo Carvalho de Melo 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
769eddc9ec5SArnaldo Carvalho de Melo 				      ip_hdr(skb)->saddr, /* XXX */
7701da177e4SLinus Torvalds 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
7711da177e4SLinus Torvalds 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
7729501f972SYOSHIFUJI Hideaki 	if (oif)
7739501f972SYOSHIFUJI Hideaki 		arg.bound_dev_if = oif;
77466b13d99SEric Dumazet 	arg.tos = tos;
775bdbbb852SEric Dumazet 	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
776bdbbb852SEric Dumazet 			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
77724a2d43dSEric Dumazet 			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
77824a2d43dSEric Dumazet 			      &arg, arg.iov[0].iov_len);
7791da177e4SLinus Torvalds 
78063231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
7811da177e4SLinus Torvalds }
7821da177e4SLinus Torvalds 
7831da177e4SLinus Torvalds static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
7841da177e4SLinus Torvalds {
7858feaf0c0SArnaldo Carvalho de Melo 	struct inet_timewait_sock *tw = inet_twsk(sk);
786cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
7871da177e4SLinus Torvalds 
788*e62a123bSEric Dumazet 	tcp_v4_send_ack(sock_net(sk), skb,
789*e62a123bSEric Dumazet 			tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7907174259eSArnaldo Carvalho de Melo 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
791ee684b6fSAndrey Vagin 			tcp_time_stamp + tcptw->tw_ts_offset,
7929501f972SYOSHIFUJI Hideaki 			tcptw->tw_ts_recent,
7939501f972SYOSHIFUJI Hideaki 			tw->tw_bound_dev_if,
79488ef4a5aSKOVACS Krisztian 			tcp_twsk_md5_key(tcptw),
79566b13d99SEric Dumazet 			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
79666b13d99SEric Dumazet 			tw->tw_tos
7979501f972SYOSHIFUJI Hideaki 			);
7981da177e4SLinus Torvalds 
7998feaf0c0SArnaldo Carvalho de Melo 	inet_twsk_put(tw);
8001da177e4SLinus Torvalds }
8011da177e4SLinus Torvalds 
802a00e7444SEric Dumazet static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
8037174259eSArnaldo Carvalho de Melo 				  struct request_sock *req)
8041da177e4SLinus Torvalds {
805168a8f58SJerry Chu 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
806168a8f58SJerry Chu 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
807168a8f58SJerry Chu 	 */
808*e62a123bSEric Dumazet 	u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
809*e62a123bSEric Dumazet 					     tcp_sk(sk)->snd_nxt;
810*e62a123bSEric Dumazet 
811*e62a123bSEric Dumazet 	tcp_v4_send_ack(sock_net(sk), skb, seq,
812ed53d0abSEric Dumazet 			tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
813ee684b6fSAndrey Vagin 			tcp_time_stamp,
8149501f972SYOSHIFUJI Hideaki 			req->ts_recent,
8159501f972SYOSHIFUJI Hideaki 			0,
816a915da9bSEric Dumazet 			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
817a915da9bSEric Dumazet 					  AF_INET),
81866b13d99SEric Dumazet 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
81966b13d99SEric Dumazet 			ip_hdr(skb)->tos);
8201da177e4SLinus Torvalds }
8211da177e4SLinus Torvalds 
8221da177e4SLinus Torvalds /*
8239bf1d83eSKris Katterjohn  *	Send a SYN-ACK after having received a SYN.
82460236fddSArnaldo Carvalho de Melo  *	This still operates on a request_sock only, not on a big
8251da177e4SLinus Torvalds  *	socket.
8261da177e4SLinus Torvalds  */
8270f935dbeSEric Dumazet static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
828d6274bd8SOctavian Purdila 			      struct flowi *fl,
829e6b4d113SWilliam Allen Simpson 			      struct request_sock *req,
830ca6fb065SEric Dumazet 			      struct tcp_fastopen_cookie *foc,
831ca6fb065SEric Dumazet 				  bool attach_req)
8321da177e4SLinus Torvalds {
8332e6599cbSArnaldo Carvalho de Melo 	const struct inet_request_sock *ireq = inet_rsk(req);
8346bd023f3SDavid S. Miller 	struct flowi4 fl4;
8351da177e4SLinus Torvalds 	int err = -1;
8361da177e4SLinus Torvalds 	struct sk_buff *skb;
8371da177e4SLinus Torvalds 
8381da177e4SLinus Torvalds 	/* First, grab a route. */
839ba3f7f04SDavid S. Miller 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
840fd80eb94SDenis V. Lunev 		return -1;
8411da177e4SLinus Torvalds 
842ca6fb065SEric Dumazet 	skb = tcp_make_synack(sk, dst, req, foc, attach_req);
8431da177e4SLinus Torvalds 
8441da177e4SLinus Torvalds 	if (skb) {
845634fb979SEric Dumazet 		__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
8461da177e4SLinus Torvalds 
847634fb979SEric Dumazet 		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
848634fb979SEric Dumazet 					    ireq->ir_rmt_addr,
8492e6599cbSArnaldo Carvalho de Melo 					    ireq->opt);
850b9df3cb8SGerrit Renker 		err = net_xmit_eval(err);
8511da177e4SLinus Torvalds 	}
8521da177e4SLinus Torvalds 
8531da177e4SLinus Torvalds 	return err;
8541da177e4SLinus Torvalds }
8551da177e4SLinus Torvalds 
8561da177e4SLinus Torvalds /*
85760236fddSArnaldo Carvalho de Melo  *	IPv4 request_sock destructor.
8581da177e4SLinus Torvalds  */
85960236fddSArnaldo Carvalho de Melo static void tcp_v4_reqsk_destructor(struct request_sock *req)
8601da177e4SLinus Torvalds {
8612e6599cbSArnaldo Carvalho de Melo 	kfree(inet_rsk(req)->opt);
8621da177e4SLinus Torvalds }
8631da177e4SLinus Torvalds 
8641da177e4SLinus Torvalds 
865cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
866cfb6eeb4SYOSHIFUJI Hideaki /*
867cfb6eeb4SYOSHIFUJI Hideaki  * RFC2385 MD5 checksumming requires a mapping of
868cfb6eeb4SYOSHIFUJI Hideaki  * IP address->MD5 Key.
869cfb6eeb4SYOSHIFUJI Hideaki  * We need to maintain these in the sk structure.
870cfb6eeb4SYOSHIFUJI Hideaki  */
871cfb6eeb4SYOSHIFUJI Hideaki 
872cfb6eeb4SYOSHIFUJI Hideaki /* Find the Key structure for an address.  */
873b83e3debSEric Dumazet struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
874a915da9bSEric Dumazet 					 const union tcp_md5_addr *addr,
875a915da9bSEric Dumazet 					 int family)
876cfb6eeb4SYOSHIFUJI Hideaki {
877fd3a154aSEric Dumazet 	const struct tcp_sock *tp = tcp_sk(sk);
878a915da9bSEric Dumazet 	struct tcp_md5sig_key *key;
879a915da9bSEric Dumazet 	unsigned int size = sizeof(struct in_addr);
880fd3a154aSEric Dumazet 	const struct tcp_md5sig_info *md5sig;
881cfb6eeb4SYOSHIFUJI Hideaki 
882a8afca03SEric Dumazet 	/* caller either holds rcu_read_lock() or socket lock */
883a8afca03SEric Dumazet 	md5sig = rcu_dereference_check(tp->md5sig_info,
884b4fb05eaSEric Dumazet 				       sock_owned_by_user(sk) ||
885b83e3debSEric Dumazet 				       lockdep_is_held((spinlock_t *)&sk->sk_lock.slock));
886a8afca03SEric Dumazet 	if (!md5sig)
887cfb6eeb4SYOSHIFUJI Hideaki 		return NULL;
888a915da9bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
889a915da9bSEric Dumazet 	if (family == AF_INET6)
890a915da9bSEric Dumazet 		size = sizeof(struct in6_addr);
891a915da9bSEric Dumazet #endif
892b67bfe0dSSasha Levin 	hlist_for_each_entry_rcu(key, &md5sig->head, node) {
893a915da9bSEric Dumazet 		if (key->family != family)
894a915da9bSEric Dumazet 			continue;
895a915da9bSEric Dumazet 		if (!memcmp(&key->addr, addr, size))
896a915da9bSEric Dumazet 			return key;
897cfb6eeb4SYOSHIFUJI Hideaki 	}
898cfb6eeb4SYOSHIFUJI Hideaki 	return NULL;
899cfb6eeb4SYOSHIFUJI Hideaki }
900a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_lookup);
901cfb6eeb4SYOSHIFUJI Hideaki 
902b83e3debSEric Dumazet struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
903fd3a154aSEric Dumazet 					 const struct sock *addr_sk)
904cfb6eeb4SYOSHIFUJI Hideaki {
905b52e6921SEric Dumazet 	const union tcp_md5_addr *addr;
906a915da9bSEric Dumazet 
907b52e6921SEric Dumazet 	addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
908a915da9bSEric Dumazet 	return tcp_md5_do_lookup(sk, addr, AF_INET);
909cfb6eeb4SYOSHIFUJI Hideaki }
910cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_md5_lookup);
911cfb6eeb4SYOSHIFUJI Hideaki 
912cfb6eeb4SYOSHIFUJI Hideaki /* This can be called on a newly created socket, from other files */
913a915da9bSEric Dumazet int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
914a915da9bSEric Dumazet 		   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
915cfb6eeb4SYOSHIFUJI Hideaki {
916cfb6eeb4SYOSHIFUJI Hideaki 	/* Add Key to the list */
917b0a713e9SMatthias M. Dellweg 	struct tcp_md5sig_key *key;
918cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
919f6685938SArnaldo Carvalho de Melo 	struct tcp_md5sig_info *md5sig;
920f6685938SArnaldo Carvalho de Melo 
921c0353c7bSAydin Arik 	key = tcp_md5_do_lookup(sk, addr, family);
922a915da9bSEric Dumazet 	if (key) {
923a915da9bSEric Dumazet 		/* Pre-existing entry - just update that one. */
924a915da9bSEric Dumazet 		memcpy(key->key, newkey, newkeylen);
925a915da9bSEric Dumazet 		key->keylen = newkeylen;
926a915da9bSEric Dumazet 		return 0;
927cfb6eeb4SYOSHIFUJI Hideaki 	}
928260fcbebSYan, Zheng 
929a8afca03SEric Dumazet 	md5sig = rcu_dereference_protected(tp->md5sig_info,
9301b8e6a01SEric Dumazet 					   sock_owned_by_user(sk) ||
9311b8e6a01SEric Dumazet 					   lockdep_is_held(&sk->sk_lock.slock));
932a915da9bSEric Dumazet 	if (!md5sig) {
933a915da9bSEric Dumazet 		md5sig = kmalloc(sizeof(*md5sig), gfp);
934a915da9bSEric Dumazet 		if (!md5sig)
935a915da9bSEric Dumazet 			return -ENOMEM;
936a915da9bSEric Dumazet 
937a915da9bSEric Dumazet 		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
938a915da9bSEric Dumazet 		INIT_HLIST_HEAD(&md5sig->head);
939a8afca03SEric Dumazet 		rcu_assign_pointer(tp->md5sig_info, md5sig);
940a915da9bSEric Dumazet 	}
941a915da9bSEric Dumazet 
9425f3d9cb2SEric Dumazet 	key = sock_kmalloc(sk, sizeof(*key), gfp);
943a915da9bSEric Dumazet 	if (!key)
944a915da9bSEric Dumazet 		return -ENOMEM;
94571cea17eSEric Dumazet 	if (!tcp_alloc_md5sig_pool()) {
9465f3d9cb2SEric Dumazet 		sock_kfree_s(sk, key, sizeof(*key));
947cfb6eeb4SYOSHIFUJI Hideaki 		return -ENOMEM;
948cfb6eeb4SYOSHIFUJI Hideaki 	}
949f6685938SArnaldo Carvalho de Melo 
950a915da9bSEric Dumazet 	memcpy(key->key, newkey, newkeylen);
951a915da9bSEric Dumazet 	key->keylen = newkeylen;
952a915da9bSEric Dumazet 	key->family = family;
953a915da9bSEric Dumazet 	memcpy(&key->addr, addr,
954a915da9bSEric Dumazet 	       (family == AF_INET6) ? sizeof(struct in6_addr) :
955a915da9bSEric Dumazet 				      sizeof(struct in_addr));
956a915da9bSEric Dumazet 	hlist_add_head_rcu(&key->node, &md5sig->head);
957cfb6eeb4SYOSHIFUJI Hideaki 	return 0;
958cfb6eeb4SYOSHIFUJI Hideaki }
959a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_add);
960cfb6eeb4SYOSHIFUJI Hideaki 
961a915da9bSEric Dumazet int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
962cfb6eeb4SYOSHIFUJI Hideaki {
963a915da9bSEric Dumazet 	struct tcp_md5sig_key *key;
964cfb6eeb4SYOSHIFUJI Hideaki 
965c0353c7bSAydin Arik 	key = tcp_md5_do_lookup(sk, addr, family);
966a915da9bSEric Dumazet 	if (!key)
967cfb6eeb4SYOSHIFUJI Hideaki 		return -ENOENT;
968a915da9bSEric Dumazet 	hlist_del_rcu(&key->node);
9695f3d9cb2SEric Dumazet 	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
970a915da9bSEric Dumazet 	kfree_rcu(key, rcu);
971a915da9bSEric Dumazet 	return 0;
972cfb6eeb4SYOSHIFUJI Hideaki }
973a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_del);
974cfb6eeb4SYOSHIFUJI Hideaki 
975e0683e70Sstephen hemminger static void tcp_clear_md5_list(struct sock *sk)
976cfb6eeb4SYOSHIFUJI Hideaki {
977cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
978a915da9bSEric Dumazet 	struct tcp_md5sig_key *key;
979b67bfe0dSSasha Levin 	struct hlist_node *n;
980a8afca03SEric Dumazet 	struct tcp_md5sig_info *md5sig;
981cfb6eeb4SYOSHIFUJI Hideaki 
982a8afca03SEric Dumazet 	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
983a8afca03SEric Dumazet 
984b67bfe0dSSasha Levin 	hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
985a915da9bSEric Dumazet 		hlist_del_rcu(&key->node);
9865f3d9cb2SEric Dumazet 		atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
987a915da9bSEric Dumazet 		kfree_rcu(key, rcu);
988cfb6eeb4SYOSHIFUJI Hideaki 	}
989cfb6eeb4SYOSHIFUJI Hideaki }
990cfb6eeb4SYOSHIFUJI Hideaki 
991cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
992cfb6eeb4SYOSHIFUJI Hideaki 				 int optlen)
993cfb6eeb4SYOSHIFUJI Hideaki {
994cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig cmd;
995cfb6eeb4SYOSHIFUJI Hideaki 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
996cfb6eeb4SYOSHIFUJI Hideaki 
997cfb6eeb4SYOSHIFUJI Hideaki 	if (optlen < sizeof(cmd))
998cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
999cfb6eeb4SYOSHIFUJI Hideaki 
1000cfb6eeb4SYOSHIFUJI Hideaki 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
1001cfb6eeb4SYOSHIFUJI Hideaki 		return -EFAULT;
1002cfb6eeb4SYOSHIFUJI Hideaki 
1003cfb6eeb4SYOSHIFUJI Hideaki 	if (sin->sin_family != AF_INET)
1004cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
1005cfb6eeb4SYOSHIFUJI Hideaki 
100664a124edSDmitry Popov 	if (!cmd.tcpm_keylen)
1007a915da9bSEric Dumazet 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1008a915da9bSEric Dumazet 				      AF_INET);
1009cfb6eeb4SYOSHIFUJI Hideaki 
1010cfb6eeb4SYOSHIFUJI Hideaki 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1011cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
1012cfb6eeb4SYOSHIFUJI Hideaki 
1013a915da9bSEric Dumazet 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1014a915da9bSEric Dumazet 			      AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1015a915da9bSEric Dumazet 			      GFP_KERNEL);
1016cfb6eeb4SYOSHIFUJI Hideaki }
1017cfb6eeb4SYOSHIFUJI Hideaki 
101849a72dfbSAdam Langley static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
101949a72dfbSAdam Langley 					__be32 daddr, __be32 saddr, int nbytes)
1020cfb6eeb4SYOSHIFUJI Hideaki {
1021cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp4_pseudohdr *bp;
102249a72dfbSAdam Langley 	struct scatterlist sg;
1023cfb6eeb4SYOSHIFUJI Hideaki 
1024cfb6eeb4SYOSHIFUJI Hideaki 	bp = &hp->md5_blk.ip4;
1025cfb6eeb4SYOSHIFUJI Hideaki 
1026cfb6eeb4SYOSHIFUJI Hideaki 	/*
102749a72dfbSAdam Langley 	 * 1. the TCP pseudo-header (in the order: source IP address,
1028cfb6eeb4SYOSHIFUJI Hideaki 	 * destination IP address, zero-padded protocol number, and
1029cfb6eeb4SYOSHIFUJI Hideaki 	 * segment length)
1030cfb6eeb4SYOSHIFUJI Hideaki 	 */
1031cfb6eeb4SYOSHIFUJI Hideaki 	bp->saddr = saddr;
1032cfb6eeb4SYOSHIFUJI Hideaki 	bp->daddr = daddr;
1033cfb6eeb4SYOSHIFUJI Hideaki 	bp->pad = 0;
1034076fb722SYOSHIFUJI Hideaki 	bp->protocol = IPPROTO_TCP;
103549a72dfbSAdam Langley 	bp->len = cpu_to_be16(nbytes);
1036c7da57a1SDavid S. Miller 
103749a72dfbSAdam Langley 	sg_init_one(&sg, bp, sizeof(*bp));
103849a72dfbSAdam Langley 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
103949a72dfbSAdam Langley }
104049a72dfbSAdam Langley 
1041a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1042318cf7aaSEric Dumazet 			       __be32 daddr, __be32 saddr, const struct tcphdr *th)
104349a72dfbSAdam Langley {
104449a72dfbSAdam Langley 	struct tcp_md5sig_pool *hp;
104549a72dfbSAdam Langley 	struct hash_desc *desc;
104649a72dfbSAdam Langley 
104749a72dfbSAdam Langley 	hp = tcp_get_md5sig_pool();
104849a72dfbSAdam Langley 	if (!hp)
104949a72dfbSAdam Langley 		goto clear_hash_noput;
105049a72dfbSAdam Langley 	desc = &hp->md5_desc;
105149a72dfbSAdam Langley 
105249a72dfbSAdam Langley 	if (crypto_hash_init(desc))
105349a72dfbSAdam Langley 		goto clear_hash;
105449a72dfbSAdam Langley 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
105549a72dfbSAdam Langley 		goto clear_hash;
105649a72dfbSAdam Langley 	if (tcp_md5_hash_header(hp, th))
105749a72dfbSAdam Langley 		goto clear_hash;
105849a72dfbSAdam Langley 	if (tcp_md5_hash_key(hp, key))
105949a72dfbSAdam Langley 		goto clear_hash;
106049a72dfbSAdam Langley 	if (crypto_hash_final(desc, md5_hash))
1061cfb6eeb4SYOSHIFUJI Hideaki 		goto clear_hash;
1062cfb6eeb4SYOSHIFUJI Hideaki 
1063cfb6eeb4SYOSHIFUJI Hideaki 	tcp_put_md5sig_pool();
1064cfb6eeb4SYOSHIFUJI Hideaki 	return 0;
106549a72dfbSAdam Langley 
1066cfb6eeb4SYOSHIFUJI Hideaki clear_hash:
1067cfb6eeb4SYOSHIFUJI Hideaki 	tcp_put_md5sig_pool();
1068cfb6eeb4SYOSHIFUJI Hideaki clear_hash_noput:
1069cfb6eeb4SYOSHIFUJI Hideaki 	memset(md5_hash, 0, 16);
107049a72dfbSAdam Langley 	return 1;
1071cfb6eeb4SYOSHIFUJI Hideaki }
1072cfb6eeb4SYOSHIFUJI Hideaki 
107339f8e58eSEric Dumazet int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
107439f8e58eSEric Dumazet 			const struct sock *sk,
1075318cf7aaSEric Dumazet 			const struct sk_buff *skb)
1076cfb6eeb4SYOSHIFUJI Hideaki {
107749a72dfbSAdam Langley 	struct tcp_md5sig_pool *hp;
107849a72dfbSAdam Langley 	struct hash_desc *desc;
1079318cf7aaSEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
1080cfb6eeb4SYOSHIFUJI Hideaki 	__be32 saddr, daddr;
1081cfb6eeb4SYOSHIFUJI Hideaki 
108239f8e58eSEric Dumazet 	if (sk) { /* valid for establish/request sockets */
108339f8e58eSEric Dumazet 		saddr = sk->sk_rcv_saddr;
108439f8e58eSEric Dumazet 		daddr = sk->sk_daddr;
1085cfb6eeb4SYOSHIFUJI Hideaki 	} else {
108649a72dfbSAdam Langley 		const struct iphdr *iph = ip_hdr(skb);
108749a72dfbSAdam Langley 		saddr = iph->saddr;
108849a72dfbSAdam Langley 		daddr = iph->daddr;
1089cfb6eeb4SYOSHIFUJI Hideaki 	}
1090cfb6eeb4SYOSHIFUJI Hideaki 
109149a72dfbSAdam Langley 	hp = tcp_get_md5sig_pool();
109249a72dfbSAdam Langley 	if (!hp)
109349a72dfbSAdam Langley 		goto clear_hash_noput;
109449a72dfbSAdam Langley 	desc = &hp->md5_desc;
109549a72dfbSAdam Langley 
109649a72dfbSAdam Langley 	if (crypto_hash_init(desc))
109749a72dfbSAdam Langley 		goto clear_hash;
109849a72dfbSAdam Langley 
109949a72dfbSAdam Langley 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
110049a72dfbSAdam Langley 		goto clear_hash;
110149a72dfbSAdam Langley 	if (tcp_md5_hash_header(hp, th))
110249a72dfbSAdam Langley 		goto clear_hash;
110349a72dfbSAdam Langley 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
110449a72dfbSAdam Langley 		goto clear_hash;
110549a72dfbSAdam Langley 	if (tcp_md5_hash_key(hp, key))
110649a72dfbSAdam Langley 		goto clear_hash;
110749a72dfbSAdam Langley 	if (crypto_hash_final(desc, md5_hash))
110849a72dfbSAdam Langley 		goto clear_hash;
110949a72dfbSAdam Langley 
111049a72dfbSAdam Langley 	tcp_put_md5sig_pool();
111149a72dfbSAdam Langley 	return 0;
111249a72dfbSAdam Langley 
111349a72dfbSAdam Langley clear_hash:
111449a72dfbSAdam Langley 	tcp_put_md5sig_pool();
111549a72dfbSAdam Langley clear_hash_noput:
111649a72dfbSAdam Langley 	memset(md5_hash, 0, 16);
111749a72dfbSAdam Langley 	return 1;
111849a72dfbSAdam Langley }
111949a72dfbSAdam Langley EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1120cfb6eeb4SYOSHIFUJI Hideaki 
1121ba8e275aSEric Dumazet #endif
1122ba8e275aSEric Dumazet 
1123ff74e23fSEric Dumazet /* Called with rcu_read_lock() */
1124ba8e275aSEric Dumazet static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
11259ea88a15SDmitry Popov 				    const struct sk_buff *skb)
1126cfb6eeb4SYOSHIFUJI Hideaki {
1127ba8e275aSEric Dumazet #ifdef CONFIG_TCP_MD5SIG
1128cfb6eeb4SYOSHIFUJI Hideaki 	/*
1129cfb6eeb4SYOSHIFUJI Hideaki 	 * This gets called for each TCP segment that arrives
1130cfb6eeb4SYOSHIFUJI Hideaki 	 * so we want to be efficient.
1131cfb6eeb4SYOSHIFUJI Hideaki 	 * We have 3 drop cases:
1132cfb6eeb4SYOSHIFUJI Hideaki 	 * o No MD5 hash and one expected.
1133cfb6eeb4SYOSHIFUJI Hideaki 	 * o MD5 hash and we're not expecting one.
1134cfb6eeb4SYOSHIFUJI Hideaki 	 * o MD5 hash and its wrong.
1135cfb6eeb4SYOSHIFUJI Hideaki 	 */
1136cf533ea5SEric Dumazet 	const __u8 *hash_location = NULL;
1137cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *hash_expected;
1138eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
1139cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
1140cfb6eeb4SYOSHIFUJI Hideaki 	int genhash;
1141cfb6eeb4SYOSHIFUJI Hideaki 	unsigned char newhash[16];
1142cfb6eeb4SYOSHIFUJI Hideaki 
1143a915da9bSEric Dumazet 	hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1144a915da9bSEric Dumazet 					  AF_INET);
11457d5d5525SYOSHIFUJI Hideaki 	hash_location = tcp_parse_md5sig_option(th);
1146cfb6eeb4SYOSHIFUJI Hideaki 
1147cfb6eeb4SYOSHIFUJI Hideaki 	/* We've parsed the options - do we have a hash? */
1148cfb6eeb4SYOSHIFUJI Hideaki 	if (!hash_expected && !hash_location)
1149a2a385d6SEric Dumazet 		return false;
1150cfb6eeb4SYOSHIFUJI Hideaki 
1151cfb6eeb4SYOSHIFUJI Hideaki 	if (hash_expected && !hash_location) {
1152785957d3SDavid S. Miller 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1153a2a385d6SEric Dumazet 		return true;
1154cfb6eeb4SYOSHIFUJI Hideaki 	}
1155cfb6eeb4SYOSHIFUJI Hideaki 
1156cfb6eeb4SYOSHIFUJI Hideaki 	if (!hash_expected && hash_location) {
1157785957d3SDavid S. Miller 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1158a2a385d6SEric Dumazet 		return true;
1159cfb6eeb4SYOSHIFUJI Hideaki 	}
1160cfb6eeb4SYOSHIFUJI Hideaki 
1161cfb6eeb4SYOSHIFUJI Hideaki 	/* Okay, so this is hash_expected and hash_location -
1162cfb6eeb4SYOSHIFUJI Hideaki 	 * so we need to calculate the checksum.
1163cfb6eeb4SYOSHIFUJI Hideaki 	 */
116449a72dfbSAdam Langley 	genhash = tcp_v4_md5_hash_skb(newhash,
1165cfb6eeb4SYOSHIFUJI Hideaki 				      hash_expected,
116639f8e58eSEric Dumazet 				      NULL, skb);
1167cfb6eeb4SYOSHIFUJI Hideaki 
1168cfb6eeb4SYOSHIFUJI Hideaki 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1169e87cc472SJoe Perches 		net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1170673d57e7SHarvey Harrison 				     &iph->saddr, ntohs(th->source),
1171673d57e7SHarvey Harrison 				     &iph->daddr, ntohs(th->dest),
1172e87cc472SJoe Perches 				     genhash ? " tcp_v4_calc_md5_hash failed"
1173e87cc472SJoe Perches 				     : "");
1174a2a385d6SEric Dumazet 		return true;
1175cfb6eeb4SYOSHIFUJI Hideaki 	}
1176a2a385d6SEric Dumazet 	return false;
1177cfb6eeb4SYOSHIFUJI Hideaki #endif
1178ba8e275aSEric Dumazet 	return false;
1179ba8e275aSEric Dumazet }
1180cfb6eeb4SYOSHIFUJI Hideaki 
1181b40cf18eSEric Dumazet static void tcp_v4_init_req(struct request_sock *req,
1182b40cf18eSEric Dumazet 			    const struct sock *sk_listener,
118316bea70aSOctavian Purdila 			    struct sk_buff *skb)
118416bea70aSOctavian Purdila {
118516bea70aSOctavian Purdila 	struct inet_request_sock *ireq = inet_rsk(req);
118616bea70aSOctavian Purdila 
118708d2cc3bSEric Dumazet 	sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
118808d2cc3bSEric Dumazet 	sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
118908d2cc3bSEric Dumazet 	ireq->no_srccheck = inet_sk(sk_listener)->transparent;
119016bea70aSOctavian Purdila 	ireq->opt = tcp_v4_save_options(skb);
119116bea70aSOctavian Purdila }
119216bea70aSOctavian Purdila 
1193f964629eSEric Dumazet static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
1194f964629eSEric Dumazet 					  struct flowi *fl,
1195d94e0417SOctavian Purdila 					  const struct request_sock *req,
1196d94e0417SOctavian Purdila 					  bool *strict)
1197d94e0417SOctavian Purdila {
1198d94e0417SOctavian Purdila 	struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
1199d94e0417SOctavian Purdila 
1200d94e0417SOctavian Purdila 	if (strict) {
1201d94e0417SOctavian Purdila 		if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
1202d94e0417SOctavian Purdila 			*strict = true;
1203d94e0417SOctavian Purdila 		else
1204d94e0417SOctavian Purdila 			*strict = false;
1205d94e0417SOctavian Purdila 	}
1206d94e0417SOctavian Purdila 
1207d94e0417SOctavian Purdila 	return dst;
1208d94e0417SOctavian Purdila }
1209d94e0417SOctavian Purdila 
121072a3effaSEric Dumazet struct request_sock_ops tcp_request_sock_ops __read_mostly = {
12111da177e4SLinus Torvalds 	.family		=	PF_INET,
12122e6599cbSArnaldo Carvalho de Melo 	.obj_size	=	sizeof(struct tcp_request_sock),
12135db92c99SOctavian Purdila 	.rtx_syn_ack	=	tcp_rtx_synack,
121460236fddSArnaldo Carvalho de Melo 	.send_ack	=	tcp_v4_reqsk_send_ack,
121560236fddSArnaldo Carvalho de Melo 	.destructor	=	tcp_v4_reqsk_destructor,
12161da177e4SLinus Torvalds 	.send_reset	=	tcp_v4_send_reset,
121772659eccSOctavian Purdila 	.syn_ack_timeout =	tcp_syn_ack_timeout,
12181da177e4SLinus Torvalds };
12191da177e4SLinus Torvalds 
1220b2e4b3deSStephen Hemminger static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
12212aec4a29SOctavian Purdila 	.mss_clamp	=	TCP_MSS_DEFAULT,
122216bea70aSOctavian Purdila #ifdef CONFIG_TCP_MD5SIG
1223fd3a154aSEric Dumazet 	.req_md5_lookup	=	tcp_v4_md5_lookup,
1224e3afe7b7SJohn Dykstra 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1225b6332e6cSAndrew Morton #endif
122616bea70aSOctavian Purdila 	.init_req	=	tcp_v4_init_req,
1227fb7b37a7SOctavian Purdila #ifdef CONFIG_SYN_COOKIES
1228fb7b37a7SOctavian Purdila 	.cookie_init_seq =	cookie_v4_init_sequence,
1229fb7b37a7SOctavian Purdila #endif
1230d94e0417SOctavian Purdila 	.route_req	=	tcp_v4_route_req,
1231936b8bdbSOctavian Purdila 	.init_seq	=	tcp_v4_init_sequence,
1232d6274bd8SOctavian Purdila 	.send_synack	=	tcp_v4_send_synack,
123316bea70aSOctavian Purdila };
1234cfb6eeb4SYOSHIFUJI Hideaki 
12351da177e4SLinus Torvalds int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
12361da177e4SLinus Torvalds {
12371da177e4SLinus Torvalds 	/* Never answer to SYNs send to broadcast or multicast */
1238511c3f92SEric Dumazet 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
12391da177e4SLinus Torvalds 		goto drop;
12401da177e4SLinus Torvalds 
12411fb6f159SOctavian Purdila 	return tcp_conn_request(&tcp_request_sock_ops,
12421fb6f159SOctavian Purdila 				&tcp_request_sock_ipv4_ops, sk, skb);
12431da177e4SLinus Torvalds 
12441da177e4SLinus Torvalds drop:
1245848bf15fSVijay Subramanian 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
12461da177e4SLinus Torvalds 	return 0;
12471da177e4SLinus Torvalds }
12484bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_conn_request);
12491da177e4SLinus Torvalds 
12501da177e4SLinus Torvalds 
12511da177e4SLinus Torvalds /*
12521da177e4SLinus Torvalds  * The three way handshake has completed - we got a valid synack -
12531da177e4SLinus Torvalds  * now create the new socket.
12541da177e4SLinus Torvalds  */
12550c27171eSEric Dumazet struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
125660236fddSArnaldo Carvalho de Melo 				  struct request_sock *req,
12575e0724d0SEric Dumazet 				  struct dst_entry *dst,
12585e0724d0SEric Dumazet 				  struct request_sock *req_unhash,
12595e0724d0SEric Dumazet 				  bool *own_req)
12601da177e4SLinus Torvalds {
12612e6599cbSArnaldo Carvalho de Melo 	struct inet_request_sock *ireq;
12621da177e4SLinus Torvalds 	struct inet_sock *newinet;
12631da177e4SLinus Torvalds 	struct tcp_sock *newtp;
12641da177e4SLinus Torvalds 	struct sock *newsk;
1265cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1266cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *key;
1267cfb6eeb4SYOSHIFUJI Hideaki #endif
1268f6d8bd05SEric Dumazet 	struct ip_options_rcu *inet_opt;
12691da177e4SLinus Torvalds 
12701da177e4SLinus Torvalds 	if (sk_acceptq_is_full(sk))
12711da177e4SLinus Torvalds 		goto exit_overflow;
12721da177e4SLinus Torvalds 
12731da177e4SLinus Torvalds 	newsk = tcp_create_openreq_child(sk, req, skb);
12741da177e4SLinus Torvalds 	if (!newsk)
1275093d2823SBalazs Scheidler 		goto exit_nonewsk;
12761da177e4SLinus Torvalds 
1277bcd76111SHerbert Xu 	newsk->sk_gso_type = SKB_GSO_TCPV4;
1278fae6ef87SNeal Cardwell 	inet_sk_rx_dst_set(newsk, skb);
12791da177e4SLinus Torvalds 
12801da177e4SLinus Torvalds 	newtp		      = tcp_sk(newsk);
12811da177e4SLinus Torvalds 	newinet		      = inet_sk(newsk);
12822e6599cbSArnaldo Carvalho de Melo 	ireq		      = inet_rsk(req);
1283d1e559d0SEric Dumazet 	sk_daddr_set(newsk, ireq->ir_rmt_addr);
1284d1e559d0SEric Dumazet 	sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
12856dd9a14eSDavid Ahern 	newsk->sk_bound_dev_if = ireq->ir_iif;
1286634fb979SEric Dumazet 	newinet->inet_saddr	      = ireq->ir_loc_addr;
1287f6d8bd05SEric Dumazet 	inet_opt	      = ireq->opt;
1288f6d8bd05SEric Dumazet 	rcu_assign_pointer(newinet->inet_opt, inet_opt);
12892e6599cbSArnaldo Carvalho de Melo 	ireq->opt	      = NULL;
1290463c84b9SArnaldo Carvalho de Melo 	newinet->mc_index     = inet_iif(skb);
1291eddc9ec5SArnaldo Carvalho de Melo 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
12924c507d28SJiri Benc 	newinet->rcv_tos      = ip_hdr(skb)->tos;
1293d83d8461SArnaldo Carvalho de Melo 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1294f6d8bd05SEric Dumazet 	if (inet_opt)
1295f6d8bd05SEric Dumazet 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1296c720c7e8SEric Dumazet 	newinet->inet_id = newtp->write_seq ^ jiffies;
12971da177e4SLinus Torvalds 
1298dfd25fffSEric Dumazet 	if (!dst) {
1299dfd25fffSEric Dumazet 		dst = inet_csk_route_child_sock(sk, newsk, req);
1300dfd25fffSEric Dumazet 		if (!dst)
13010e734419SDavid S. Miller 			goto put_and_exit;
1302dfd25fffSEric Dumazet 	} else {
1303dfd25fffSEric Dumazet 		/* syncookie case : see end of cookie_v4_check() */
1304dfd25fffSEric Dumazet 	}
13050e734419SDavid S. Miller 	sk_setup_caps(newsk, dst);
13060e734419SDavid S. Miller 
130781164413SDaniel Borkmann 	tcp_ca_openreq_child(newsk, dst);
130881164413SDaniel Borkmann 
13091da177e4SLinus Torvalds 	tcp_sync_mss(newsk, dst_mtu(dst));
13100dbaee3bSDavid S. Miller 	newtp->advmss = dst_metric_advmss(dst);
1311f5fff5dcSTom Quetchenbach 	if (tcp_sk(sk)->rx_opt.user_mss &&
1312f5fff5dcSTom Quetchenbach 	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1313f5fff5dcSTom Quetchenbach 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1314f5fff5dcSTom Quetchenbach 
13151da177e4SLinus Torvalds 	tcp_initialize_rcv_mss(newsk);
13161da177e4SLinus Torvalds 
1317cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1318cfb6eeb4SYOSHIFUJI Hideaki 	/* Copy over the MD5 key from the original socket */
1319a915da9bSEric Dumazet 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1320a915da9bSEric Dumazet 				AF_INET);
132100db4124SIan Morris 	if (key) {
1322cfb6eeb4SYOSHIFUJI Hideaki 		/*
1323cfb6eeb4SYOSHIFUJI Hideaki 		 * We're using one, so create a matching key
1324cfb6eeb4SYOSHIFUJI Hideaki 		 * on the newsk structure. If we fail to get
1325cfb6eeb4SYOSHIFUJI Hideaki 		 * memory, then we end up not copying the key
1326cfb6eeb4SYOSHIFUJI Hideaki 		 * across. Shucks.
1327cfb6eeb4SYOSHIFUJI Hideaki 		 */
1328a915da9bSEric Dumazet 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1329a915da9bSEric Dumazet 			       AF_INET, key->key, key->keylen, GFP_ATOMIC);
1330a465419bSEric Dumazet 		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1331cfb6eeb4SYOSHIFUJI Hideaki 	}
1332cfb6eeb4SYOSHIFUJI Hideaki #endif
1333cfb6eeb4SYOSHIFUJI Hideaki 
13340e734419SDavid S. Miller 	if (__inet_inherit_port(sk, newsk) < 0)
13350e734419SDavid S. Miller 		goto put_and_exit;
13365e0724d0SEric Dumazet 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1337805c4bc0SEric Dumazet 	if (*own_req)
133849a496c9SEric Dumazet 		tcp_move_syn(newtp, req);
13391da177e4SLinus Torvalds 
13401da177e4SLinus Torvalds 	return newsk;
13411da177e4SLinus Torvalds 
13421da177e4SLinus Torvalds exit_overflow:
1343de0744afSPavel Emelyanov 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1344093d2823SBalazs Scheidler exit_nonewsk:
1345093d2823SBalazs Scheidler 	dst_release(dst);
13461da177e4SLinus Torvalds exit:
1347de0744afSPavel Emelyanov 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
13481da177e4SLinus Torvalds 	return NULL;
13490e734419SDavid S. Miller put_and_exit:
1350e337e24dSChristoph Paasch 	inet_csk_prepare_forced_close(newsk);
1351e337e24dSChristoph Paasch 	tcp_done(newsk);
13520e734419SDavid S. Miller 	goto exit;
13531da177e4SLinus Torvalds }
13544bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
13551da177e4SLinus Torvalds 
1356079096f1SEric Dumazet static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
13571da177e4SLinus Torvalds {
13581da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES
1359079096f1SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
1360079096f1SEric Dumazet 
1361af9b4738SFlorian Westphal 	if (!th->syn)
1362461b74c3SCong Wang 		sk = cookie_v4_check(sk, skb);
13631da177e4SLinus Torvalds #endif
13641da177e4SLinus Torvalds 	return sk;
13651da177e4SLinus Torvalds }
13661da177e4SLinus Torvalds 
13671da177e4SLinus Torvalds /* The socket must have it's spinlock held when we get
1368e994b2f0SEric Dumazet  * here, unless it is a TCP_LISTEN socket.
13691da177e4SLinus Torvalds  *
13701da177e4SLinus Torvalds  * We have a potential double-lock case here, so even when
13711da177e4SLinus Torvalds  * doing backlog processing we use the BH locking scheme.
13721da177e4SLinus Torvalds  * This is because we cannot sleep with the original spinlock
13731da177e4SLinus Torvalds  * held.
13741da177e4SLinus Torvalds  */
13751da177e4SLinus Torvalds int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
13761da177e4SLinus Torvalds {
1377cfb6eeb4SYOSHIFUJI Hideaki 	struct sock *rsk;
1378cfb6eeb4SYOSHIFUJI Hideaki 
13791da177e4SLinus Torvalds 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
138092101b3bSDavid S. Miller 		struct dst_entry *dst = sk->sk_rx_dst;
1381404e0a8bSEric Dumazet 
1382404e0a8bSEric Dumazet 		sock_rps_save_rxhash(sk, skb);
13833d97379aSEric Dumazet 		sk_mark_napi_id(sk, skb);
1384404e0a8bSEric Dumazet 		if (dst) {
1385505fbcf0SEric Dumazet 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
138651456b29SIan Morris 			    !dst->ops->check(dst, 0)) {
138792101b3bSDavid S. Miller 				dst_release(dst);
138892101b3bSDavid S. Miller 				sk->sk_rx_dst = NULL;
138992101b3bSDavid S. Miller 			}
139092101b3bSDavid S. Miller 		}
1391c995ae22SVijay Subramanian 		tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
13921da177e4SLinus Torvalds 		return 0;
13931da177e4SLinus Torvalds 	}
13941da177e4SLinus Torvalds 
139512e25e10SEric Dumazet 	if (tcp_checksum_complete(skb))
13961da177e4SLinus Torvalds 		goto csum_err;
13971da177e4SLinus Torvalds 
13981da177e4SLinus Torvalds 	if (sk->sk_state == TCP_LISTEN) {
1399079096f1SEric Dumazet 		struct sock *nsk = tcp_v4_cookie_check(sk, skb);
1400079096f1SEric Dumazet 
14011da177e4SLinus Torvalds 		if (!nsk)
14021da177e4SLinus Torvalds 			goto discard;
14031da177e4SLinus Torvalds 		if (nsk != sk) {
1404bdeab991STom Herbert 			sock_rps_save_rxhash(nsk, skb);
140538cb5245SEric Dumazet 			sk_mark_napi_id(nsk, skb);
1406cfb6eeb4SYOSHIFUJI Hideaki 			if (tcp_child_process(sk, nsk, skb)) {
1407cfb6eeb4SYOSHIFUJI Hideaki 				rsk = nsk;
14081da177e4SLinus Torvalds 				goto reset;
1409cfb6eeb4SYOSHIFUJI Hideaki 			}
14101da177e4SLinus Torvalds 			return 0;
14111da177e4SLinus Torvalds 		}
1412ca55158cSEric Dumazet 	} else
1413bdeab991STom Herbert 		sock_rps_save_rxhash(sk, skb);
1414ca55158cSEric Dumazet 
141572ab4a86SEric Dumazet 	if (tcp_rcv_state_process(sk, skb)) {
1416cfb6eeb4SYOSHIFUJI Hideaki 		rsk = sk;
14171da177e4SLinus Torvalds 		goto reset;
1418cfb6eeb4SYOSHIFUJI Hideaki 	}
14191da177e4SLinus Torvalds 	return 0;
14201da177e4SLinus Torvalds 
14211da177e4SLinus Torvalds reset:
1422cfb6eeb4SYOSHIFUJI Hideaki 	tcp_v4_send_reset(rsk, skb);
14231da177e4SLinus Torvalds discard:
14241da177e4SLinus Torvalds 	kfree_skb(skb);
14251da177e4SLinus Torvalds 	/* Be careful here. If this function gets more complicated and
14261da177e4SLinus Torvalds 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
14271da177e4SLinus Torvalds 	 * might be destroyed here. This current version compiles correctly,
14281da177e4SLinus Torvalds 	 * but you have been warned.
14291da177e4SLinus Torvalds 	 */
14301da177e4SLinus Torvalds 	return 0;
14311da177e4SLinus Torvalds 
14321da177e4SLinus Torvalds csum_err:
14336a5dc9e5SEric Dumazet 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
143463231bddSPavel Emelyanov 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
14351da177e4SLinus Torvalds 	goto discard;
14361da177e4SLinus Torvalds }
14374bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_do_rcv);
14381da177e4SLinus Torvalds 
1439160eb5a6SDavid S. Miller void tcp_v4_early_demux(struct sk_buff *skb)
144041063e9dSDavid S. Miller {
144141063e9dSDavid S. Miller 	const struct iphdr *iph;
144241063e9dSDavid S. Miller 	const struct tcphdr *th;
144341063e9dSDavid S. Miller 	struct sock *sk;
144441063e9dSDavid S. Miller 
144541063e9dSDavid S. Miller 	if (skb->pkt_type != PACKET_HOST)
1446160eb5a6SDavid S. Miller 		return;
144741063e9dSDavid S. Miller 
144845f00f99SEric Dumazet 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1449160eb5a6SDavid S. Miller 		return;
145041063e9dSDavid S. Miller 
145141063e9dSDavid S. Miller 	iph = ip_hdr(skb);
145245f00f99SEric Dumazet 	th = tcp_hdr(skb);
145341063e9dSDavid S. Miller 
145441063e9dSDavid S. Miller 	if (th->doff < sizeof(struct tcphdr) / 4)
1455160eb5a6SDavid S. Miller 		return;
145641063e9dSDavid S. Miller 
145745f00f99SEric Dumazet 	sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
145841063e9dSDavid S. Miller 				       iph->saddr, th->source,
14597011d085SVijay Subramanian 				       iph->daddr, ntohs(th->dest),
14609cb429d6SEric Dumazet 				       skb->skb_iif);
146141063e9dSDavid S. Miller 	if (sk) {
146241063e9dSDavid S. Miller 		skb->sk = sk;
146341063e9dSDavid S. Miller 		skb->destructor = sock_edemux;
1464f7e4eb03SEric Dumazet 		if (sk_fullsock(sk)) {
1465d0c294c5SMichal Kubeček 			struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1466505fbcf0SEric Dumazet 
146741063e9dSDavid S. Miller 			if (dst)
146841063e9dSDavid S. Miller 				dst = dst_check(dst, 0);
146992101b3bSDavid S. Miller 			if (dst &&
1470505fbcf0SEric Dumazet 			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
147141063e9dSDavid S. Miller 				skb_dst_set_noref(skb, dst);
147241063e9dSDavid S. Miller 		}
147341063e9dSDavid S. Miller 	}
147441063e9dSDavid S. Miller }
147541063e9dSDavid S. Miller 
1476b2fb4f54SEric Dumazet /* Packet is added to VJ-style prequeue for processing in process
1477b2fb4f54SEric Dumazet  * context, if a reader task is waiting. Apparently, this exciting
1478b2fb4f54SEric Dumazet  * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1479b2fb4f54SEric Dumazet  * failed somewhere. Latency? Burstiness? Well, at least now we will
1480b2fb4f54SEric Dumazet  * see, why it failed. 8)8)				  --ANK
1481b2fb4f54SEric Dumazet  *
1482b2fb4f54SEric Dumazet  */
1483b2fb4f54SEric Dumazet bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1484b2fb4f54SEric Dumazet {
1485b2fb4f54SEric Dumazet 	struct tcp_sock *tp = tcp_sk(sk);
1486b2fb4f54SEric Dumazet 
1487b2fb4f54SEric Dumazet 	if (sysctl_tcp_low_latency || !tp->ucopy.task)
1488b2fb4f54SEric Dumazet 		return false;
1489b2fb4f54SEric Dumazet 
1490b2fb4f54SEric Dumazet 	if (skb->len <= tcp_hdrlen(skb) &&
1491b2fb4f54SEric Dumazet 	    skb_queue_len(&tp->ucopy.prequeue) == 0)
1492b2fb4f54SEric Dumazet 		return false;
1493b2fb4f54SEric Dumazet 
1494ca777effSEric Dumazet 	/* Before escaping RCU protected region, we need to take care of skb
1495ca777effSEric Dumazet 	 * dst. Prequeue is only enabled for established sockets.
1496ca777effSEric Dumazet 	 * For such sockets, we might need the skb dst only to set sk->sk_rx_dst
1497ca777effSEric Dumazet 	 * Instead of doing full sk_rx_dst validity here, let's perform
1498ca777effSEric Dumazet 	 * an optimistic check.
1499ca777effSEric Dumazet 	 */
1500ca777effSEric Dumazet 	if (likely(sk->sk_rx_dst))
1501ca777effSEric Dumazet 		skb_dst_drop(skb);
1502ca777effSEric Dumazet 	else
15035037e9efSEric Dumazet 		skb_dst_force_safe(skb);
1504ca777effSEric Dumazet 
1505b2fb4f54SEric Dumazet 	__skb_queue_tail(&tp->ucopy.prequeue, skb);
1506b2fb4f54SEric Dumazet 	tp->ucopy.memory += skb->truesize;
1507b2fb4f54SEric Dumazet 	if (tp->ucopy.memory > sk->sk_rcvbuf) {
1508b2fb4f54SEric Dumazet 		struct sk_buff *skb1;
1509b2fb4f54SEric Dumazet 
1510b2fb4f54SEric Dumazet 		BUG_ON(sock_owned_by_user(sk));
1511b2fb4f54SEric Dumazet 
1512b2fb4f54SEric Dumazet 		while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1513b2fb4f54SEric Dumazet 			sk_backlog_rcv(sk, skb1);
1514b2fb4f54SEric Dumazet 			NET_INC_STATS_BH(sock_net(sk),
1515b2fb4f54SEric Dumazet 					 LINUX_MIB_TCPPREQUEUEDROPPED);
1516b2fb4f54SEric Dumazet 		}
1517b2fb4f54SEric Dumazet 
1518b2fb4f54SEric Dumazet 		tp->ucopy.memory = 0;
1519b2fb4f54SEric Dumazet 	} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1520b2fb4f54SEric Dumazet 		wake_up_interruptible_sync_poll(sk_sleep(sk),
1521b2fb4f54SEric Dumazet 					   POLLIN | POLLRDNORM | POLLRDBAND);
1522b2fb4f54SEric Dumazet 		if (!inet_csk_ack_scheduled(sk))
1523b2fb4f54SEric Dumazet 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1524b2fb4f54SEric Dumazet 						  (3 * tcp_rto_min(sk)) / 4,
1525b2fb4f54SEric Dumazet 						  TCP_RTO_MAX);
1526b2fb4f54SEric Dumazet 	}
1527b2fb4f54SEric Dumazet 	return true;
1528b2fb4f54SEric Dumazet }
1529b2fb4f54SEric Dumazet EXPORT_SYMBOL(tcp_prequeue);
1530b2fb4f54SEric Dumazet 
15311da177e4SLinus Torvalds /*
15321da177e4SLinus Torvalds  *	From tcp_input.c
15331da177e4SLinus Torvalds  */
15341da177e4SLinus Torvalds 
15351da177e4SLinus Torvalds int tcp_v4_rcv(struct sk_buff *skb)
15361da177e4SLinus Torvalds {
1537eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph;
1538cf533ea5SEric Dumazet 	const struct tcphdr *th;
15391da177e4SLinus Torvalds 	struct sock *sk;
15401da177e4SLinus Torvalds 	int ret;
1541a86b1e30SPavel Emelyanov 	struct net *net = dev_net(skb->dev);
15421da177e4SLinus Torvalds 
15431da177e4SLinus Torvalds 	if (skb->pkt_type != PACKET_HOST)
15441da177e4SLinus Torvalds 		goto discard_it;
15451da177e4SLinus Torvalds 
15461da177e4SLinus Torvalds 	/* Count it even if it's bad */
154763231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
15481da177e4SLinus Torvalds 
15491da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
15501da177e4SLinus Torvalds 		goto discard_it;
15511da177e4SLinus Torvalds 
1552aa8223c7SArnaldo Carvalho de Melo 	th = tcp_hdr(skb);
15531da177e4SLinus Torvalds 
15541da177e4SLinus Torvalds 	if (th->doff < sizeof(struct tcphdr) / 4)
15551da177e4SLinus Torvalds 		goto bad_packet;
15561da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, th->doff * 4))
15571da177e4SLinus Torvalds 		goto discard_it;
15581da177e4SLinus Torvalds 
15591da177e4SLinus Torvalds 	/* An explanation is required here, I think.
15601da177e4SLinus Torvalds 	 * Packet length and doff are validated by header prediction,
1561caa20d9aSStephen Hemminger 	 * provided case of th->doff==0 is eliminated.
15621da177e4SLinus Torvalds 	 * So, we defer the checks. */
1563ed70fcfcSTom Herbert 
1564ed70fcfcSTom Herbert 	if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
15656a5dc9e5SEric Dumazet 		goto csum_error;
15661da177e4SLinus Torvalds 
1567aa8223c7SArnaldo Carvalho de Melo 	th = tcp_hdr(skb);
1568eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
1569971f10ecSEric Dumazet 	/* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
1570971f10ecSEric Dumazet 	 * barrier() makes sure compiler wont play fool^Waliasing games.
1571971f10ecSEric Dumazet 	 */
1572971f10ecSEric Dumazet 	memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1573971f10ecSEric Dumazet 		sizeof(struct inet_skb_parm));
1574971f10ecSEric Dumazet 	barrier();
1575971f10ecSEric Dumazet 
15761da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
15771da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
15781da177e4SLinus Torvalds 				    skb->len - th->doff * 4);
15791da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1580e11ecddfSEric Dumazet 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
158104317dafSEric Dumazet 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1582b82d1bb4SEric Dumazet 	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
15831da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->sacked	 = 0;
15841da177e4SLinus Torvalds 
15854bdc3d66SEric Dumazet lookup:
15869a1f27c4SArnaldo Carvalho de Melo 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
15871da177e4SLinus Torvalds 	if (!sk)
15881da177e4SLinus Torvalds 		goto no_tcp_socket;
15891da177e4SLinus Torvalds 
1590bb134d5dSEric Dumazet process:
1591bb134d5dSEric Dumazet 	if (sk->sk_state == TCP_TIME_WAIT)
1592bb134d5dSEric Dumazet 		goto do_time_wait;
1593bb134d5dSEric Dumazet 
1594079096f1SEric Dumazet 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1595079096f1SEric Dumazet 		struct request_sock *req = inet_reqsk(sk);
1596079096f1SEric Dumazet 		struct sock *nsk = NULL;
1597079096f1SEric Dumazet 
1598079096f1SEric Dumazet 		sk = req->rsk_listener;
1599079096f1SEric Dumazet 		if (tcp_v4_inbound_md5_hash(sk, skb))
1600079096f1SEric Dumazet 			goto discard_and_relse;
16014bdc3d66SEric Dumazet 		if (likely(sk->sk_state == TCP_LISTEN)) {
1602079096f1SEric Dumazet 			nsk = tcp_check_req(sk, skb, req, false);
16034bdc3d66SEric Dumazet 		} else {
1604f03f2e15SEric Dumazet 			inet_csk_reqsk_queue_drop_and_put(sk, req);
16054bdc3d66SEric Dumazet 			goto lookup;
16064bdc3d66SEric Dumazet 		}
1607079096f1SEric Dumazet 		if (!nsk) {
1608079096f1SEric Dumazet 			reqsk_put(req);
1609079096f1SEric Dumazet 			goto discard_it;
1610079096f1SEric Dumazet 		}
1611079096f1SEric Dumazet 		if (nsk == sk) {
1612079096f1SEric Dumazet 			sock_hold(sk);
1613079096f1SEric Dumazet 			reqsk_put(req);
1614079096f1SEric Dumazet 		} else if (tcp_child_process(sk, nsk, skb)) {
1615079096f1SEric Dumazet 			tcp_v4_send_reset(nsk, skb);
1616079096f1SEric Dumazet 			goto discard_it;
1617079096f1SEric Dumazet 		} else {
1618079096f1SEric Dumazet 			return 0;
1619079096f1SEric Dumazet 		}
1620079096f1SEric Dumazet 	}
16216cce09f8SEric Dumazet 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
16226cce09f8SEric Dumazet 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1623d218d111SStephen Hemminger 		goto discard_and_relse;
16246cce09f8SEric Dumazet 	}
1625d218d111SStephen Hemminger 
16261da177e4SLinus Torvalds 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
16271da177e4SLinus Torvalds 		goto discard_and_relse;
16289ea88a15SDmitry Popov 
16299ea88a15SDmitry Popov 	if (tcp_v4_inbound_md5_hash(sk, skb))
16309ea88a15SDmitry Popov 		goto discard_and_relse;
16319ea88a15SDmitry Popov 
1632b59c2701SPatrick McHardy 	nf_reset(skb);
16331da177e4SLinus Torvalds 
1634fda9ef5dSDmitry Mishin 	if (sk_filter(sk, skb))
16351da177e4SLinus Torvalds 		goto discard_and_relse;
16361da177e4SLinus Torvalds 
16371da177e4SLinus Torvalds 	skb->dev = NULL;
16381da177e4SLinus Torvalds 
1639e994b2f0SEric Dumazet 	if (sk->sk_state == TCP_LISTEN) {
1640e994b2f0SEric Dumazet 		ret = tcp_v4_do_rcv(sk, skb);
1641e994b2f0SEric Dumazet 		goto put_and_return;
1642e994b2f0SEric Dumazet 	}
1643e994b2f0SEric Dumazet 
1644e994b2f0SEric Dumazet 	sk_incoming_cpu_update(sk);
1645e994b2f0SEric Dumazet 
1646c6366184SIngo Molnar 	bh_lock_sock_nested(sk);
16472efd055cSMarcelo Ricardo Leitner 	tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
16481da177e4SLinus Torvalds 	ret = 0;
16491da177e4SLinus Torvalds 	if (!sock_owned_by_user(sk)) {
16501da177e4SLinus Torvalds 		if (!tcp_prequeue(sk, skb))
16511da177e4SLinus Torvalds 			ret = tcp_v4_do_rcv(sk, skb);
1652da882c1fSEric Dumazet 	} else if (unlikely(sk_add_backlog(sk, skb,
1653da882c1fSEric Dumazet 					   sk->sk_rcvbuf + sk->sk_sndbuf))) {
16546b03a53aSZhu Yi 		bh_unlock_sock(sk);
16556cce09f8SEric Dumazet 		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
16566b03a53aSZhu Yi 		goto discard_and_relse;
16576b03a53aSZhu Yi 	}
16581da177e4SLinus Torvalds 	bh_unlock_sock(sk);
16591da177e4SLinus Torvalds 
1660e994b2f0SEric Dumazet put_and_return:
16611da177e4SLinus Torvalds 	sock_put(sk);
16621da177e4SLinus Torvalds 
16631da177e4SLinus Torvalds 	return ret;
16641da177e4SLinus Torvalds 
16651da177e4SLinus Torvalds no_tcp_socket:
16661da177e4SLinus Torvalds 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
16671da177e4SLinus Torvalds 		goto discard_it;
16681da177e4SLinus Torvalds 
166912e25e10SEric Dumazet 	if (tcp_checksum_complete(skb)) {
16706a5dc9e5SEric Dumazet csum_error:
16716a5dc9e5SEric Dumazet 		TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
16721da177e4SLinus Torvalds bad_packet:
167363231bddSPavel Emelyanov 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
16741da177e4SLinus Torvalds 	} else {
1675cfb6eeb4SYOSHIFUJI Hideaki 		tcp_v4_send_reset(NULL, skb);
16761da177e4SLinus Torvalds 	}
16771da177e4SLinus Torvalds 
16781da177e4SLinus Torvalds discard_it:
16791da177e4SLinus Torvalds 	/* Discard frame. */
16801da177e4SLinus Torvalds 	kfree_skb(skb);
16811da177e4SLinus Torvalds 	return 0;
16821da177e4SLinus Torvalds 
16831da177e4SLinus Torvalds discard_and_relse:
16841da177e4SLinus Torvalds 	sock_put(sk);
16851da177e4SLinus Torvalds 	goto discard_it;
16861da177e4SLinus Torvalds 
16871da177e4SLinus Torvalds do_time_wait:
16881da177e4SLinus Torvalds 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
16899469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
16901da177e4SLinus Torvalds 		goto discard_it;
16911da177e4SLinus Torvalds 	}
16921da177e4SLinus Torvalds 
16936a5dc9e5SEric Dumazet 	if (tcp_checksum_complete(skb)) {
16946a5dc9e5SEric Dumazet 		inet_twsk_put(inet_twsk(sk));
16956a5dc9e5SEric Dumazet 		goto csum_error;
16961da177e4SLinus Torvalds 	}
16979469c7b4SYOSHIFUJI Hideaki 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
16981da177e4SLinus Torvalds 	case TCP_TW_SYN: {
1699c346dca1SYOSHIFUJI Hideaki 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1700c67499c0SPavel Emelyanov 							&tcp_hashinfo,
1701da5e3630STom Herbert 							iph->saddr, th->source,
1702eddc9ec5SArnaldo Carvalho de Melo 							iph->daddr, th->dest,
1703463c84b9SArnaldo Carvalho de Melo 							inet_iif(skb));
17041da177e4SLinus Torvalds 		if (sk2) {
1705dbe7faa4SEric Dumazet 			inet_twsk_deschedule_put(inet_twsk(sk));
17061da177e4SLinus Torvalds 			sk = sk2;
17071da177e4SLinus Torvalds 			goto process;
17081da177e4SLinus Torvalds 		}
17091da177e4SLinus Torvalds 		/* Fall through to ACK */
17101da177e4SLinus Torvalds 	}
17111da177e4SLinus Torvalds 	case TCP_TW_ACK:
17121da177e4SLinus Torvalds 		tcp_v4_timewait_ack(sk, skb);
17131da177e4SLinus Torvalds 		break;
17141da177e4SLinus Torvalds 	case TCP_TW_RST:
1715271c3b9bSFlorian Westphal 		tcp_v4_send_reset(sk, skb);
1716271c3b9bSFlorian Westphal 		inet_twsk_deschedule_put(inet_twsk(sk));
1717271c3b9bSFlorian Westphal 		goto discard_it;
17181da177e4SLinus Torvalds 	case TCP_TW_SUCCESS:;
17191da177e4SLinus Torvalds 	}
17201da177e4SLinus Torvalds 	goto discard_it;
17211da177e4SLinus Torvalds }
17221da177e4SLinus Torvalds 
1723ccb7c410SDavid S. Miller static struct timewait_sock_ops tcp_timewait_sock_ops = {
1724ccb7c410SDavid S. Miller 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1725ccb7c410SDavid S. Miller 	.twsk_unique	= tcp_twsk_unique,
1726ccb7c410SDavid S. Miller 	.twsk_destructor= tcp_twsk_destructor,
1727ccb7c410SDavid S. Miller };
17281da177e4SLinus Torvalds 
172963d02d15SEric Dumazet void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
17305d299f3dSEric Dumazet {
17315d299f3dSEric Dumazet 	struct dst_entry *dst = skb_dst(skb);
17325d299f3dSEric Dumazet 
17335037e9efSEric Dumazet 	if (dst && dst_hold_safe(dst)) {
17345d299f3dSEric Dumazet 		sk->sk_rx_dst = dst;
17355d299f3dSEric Dumazet 		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
17365d299f3dSEric Dumazet 	}
1737ca777effSEric Dumazet }
173863d02d15SEric Dumazet EXPORT_SYMBOL(inet_sk_rx_dst_set);
17395d299f3dSEric Dumazet 
17403b401a81SStephen Hemminger const struct inet_connection_sock_af_ops ipv4_specific = {
17411da177e4SLinus Torvalds 	.queue_xmit	   = ip_queue_xmit,
17421da177e4SLinus Torvalds 	.send_check	   = tcp_v4_send_check,
174332519f11SArnaldo Carvalho de Melo 	.rebuild_header	   = inet_sk_rebuild_header,
17445d299f3dSEric Dumazet 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
17451da177e4SLinus Torvalds 	.conn_request	   = tcp_v4_conn_request,
17461da177e4SLinus Torvalds 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
17471da177e4SLinus Torvalds 	.net_header_len	   = sizeof(struct iphdr),
17481da177e4SLinus Torvalds 	.setsockopt	   = ip_setsockopt,
17491da177e4SLinus Torvalds 	.getsockopt	   = ip_getsockopt,
1750543d9cfeSArnaldo Carvalho de Melo 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1751543d9cfeSArnaldo Carvalho de Melo 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1752ab1e0a13SArnaldo Carvalho de Melo 	.bind_conflict	   = inet_csk_bind_conflict,
17533fdadf7dSDmitry Mishin #ifdef CONFIG_COMPAT
17543fdadf7dSDmitry Mishin 	.compat_setsockopt = compat_ip_setsockopt,
17553fdadf7dSDmitry Mishin 	.compat_getsockopt = compat_ip_getsockopt,
17563fdadf7dSDmitry Mishin #endif
17574fab9071SNeal Cardwell 	.mtu_reduced	   = tcp_v4_mtu_reduced,
17581da177e4SLinus Torvalds };
17594bc2f18bSEric Dumazet EXPORT_SYMBOL(ipv4_specific);
17601da177e4SLinus Torvalds 
1761cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1762b2e4b3deSStephen Hemminger static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1763cfb6eeb4SYOSHIFUJI Hideaki 	.md5_lookup		= tcp_v4_md5_lookup,
176449a72dfbSAdam Langley 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
1765cfb6eeb4SYOSHIFUJI Hideaki 	.md5_parse		= tcp_v4_parse_md5_keys,
1766cfb6eeb4SYOSHIFUJI Hideaki };
1767b6332e6cSAndrew Morton #endif
1768cfb6eeb4SYOSHIFUJI Hideaki 
17691da177e4SLinus Torvalds /* NOTE: A lot of things set to zero explicitly by call to
17701da177e4SLinus Torvalds  *       sk_alloc() so need not be done here.
17711da177e4SLinus Torvalds  */
17721da177e4SLinus Torvalds static int tcp_v4_init_sock(struct sock *sk)
17731da177e4SLinus Torvalds {
17746687e988SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
17751da177e4SLinus Torvalds 
1776900f65d3SNeal Cardwell 	tcp_init_sock(sk);
17771da177e4SLinus Torvalds 
17788292a17aSArnaldo Carvalho de Melo 	icsk->icsk_af_ops = &ipv4_specific;
1779900f65d3SNeal Cardwell 
1780cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1781ac807fa8SDavid S. Miller 	tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1782cfb6eeb4SYOSHIFUJI Hideaki #endif
17831da177e4SLinus Torvalds 
17841da177e4SLinus Torvalds 	return 0;
17851da177e4SLinus Torvalds }
17861da177e4SLinus Torvalds 
17877d06b2e0SBrian Haley void tcp_v4_destroy_sock(struct sock *sk)
17881da177e4SLinus Torvalds {
17891da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
17901da177e4SLinus Torvalds 
17911da177e4SLinus Torvalds 	tcp_clear_xmit_timers(sk);
17921da177e4SLinus Torvalds 
17936687e988SArnaldo Carvalho de Melo 	tcp_cleanup_congestion_control(sk);
1794317a76f9SStephen Hemminger 
17951da177e4SLinus Torvalds 	/* Cleanup up the write buffer. */
1796fe067e8aSDavid S. Miller 	tcp_write_queue_purge(sk);
17971da177e4SLinus Torvalds 
17981da177e4SLinus Torvalds 	/* Cleans up our, hopefully empty, out_of_order_queue. */
17991da177e4SLinus Torvalds 	__skb_queue_purge(&tp->out_of_order_queue);
18001da177e4SLinus Torvalds 
1801cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1802cfb6eeb4SYOSHIFUJI Hideaki 	/* Clean up the MD5 key list, if any */
1803cfb6eeb4SYOSHIFUJI Hideaki 	if (tp->md5sig_info) {
1804a915da9bSEric Dumazet 		tcp_clear_md5_list(sk);
1805a8afca03SEric Dumazet 		kfree_rcu(tp->md5sig_info, rcu);
1806cfb6eeb4SYOSHIFUJI Hideaki 		tp->md5sig_info = NULL;
1807cfb6eeb4SYOSHIFUJI Hideaki 	}
1808cfb6eeb4SYOSHIFUJI Hideaki #endif
1809cfb6eeb4SYOSHIFUJI Hideaki 
18101da177e4SLinus Torvalds 	/* Clean prequeue, it must be empty really */
18111da177e4SLinus Torvalds 	__skb_queue_purge(&tp->ucopy.prequeue);
18121da177e4SLinus Torvalds 
18131da177e4SLinus Torvalds 	/* Clean up a referenced TCP bind bucket. */
1814463c84b9SArnaldo Carvalho de Melo 	if (inet_csk(sk)->icsk_bind_hash)
1815ab1e0a13SArnaldo Carvalho de Melo 		inet_put_port(sk);
18161da177e4SLinus Torvalds 
181700db4124SIan Morris 	BUG_ON(tp->fastopen_rsk);
1818435cf559SWilliam Allen Simpson 
1819cf60af03SYuchung Cheng 	/* If socket is aborted during connect operation */
1820cf60af03SYuchung Cheng 	tcp_free_fastopen_req(tp);
1821cd8ae852SEric Dumazet 	tcp_saved_syn_free(tp);
1822cf60af03SYuchung Cheng 
1823180d8cd9SGlauber Costa 	sk_sockets_allocated_dec(sk);
18243d596f7bSJohannes Weiner 
1825baac50bbSJohannes Weiner 	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
1826d1a4c0b3SGlauber Costa 		sock_release_memcg(sk);
18271da177e4SLinus Torvalds }
18281da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_destroy_sock);
18291da177e4SLinus Torvalds 
18301da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
18311da177e4SLinus Torvalds /* Proc filesystem TCP sock list dumping. */
18321da177e4SLinus Torvalds 
1833a8b690f9STom Herbert /*
1834a8b690f9STom Herbert  * Get next listener socket follow cur.  If cur is NULL, get first socket
1835a8b690f9STom Herbert  * starting from bucket given in st->bucket; when st->bucket is zero the
1836a8b690f9STom Herbert  * very first socket in the hash table is returned.
1837a8b690f9STom Herbert  */
18381da177e4SLinus Torvalds static void *listening_get_next(struct seq_file *seq, void *cur)
18391da177e4SLinus Torvalds {
1840463c84b9SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk;
1841c25eb3bfSEric Dumazet 	struct hlist_nulls_node *node;
18421da177e4SLinus Torvalds 	struct sock *sk = cur;
18435caea4eaSEric Dumazet 	struct inet_listen_hashbucket *ilb;
18441da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
1845a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
18461da177e4SLinus Torvalds 
18471da177e4SLinus Torvalds 	if (!sk) {
1848a8b690f9STom Herbert 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
18495caea4eaSEric Dumazet 		spin_lock_bh(&ilb->lock);
1850c25eb3bfSEric Dumazet 		sk = sk_nulls_head(&ilb->head);
1851a8b690f9STom Herbert 		st->offset = 0;
18521da177e4SLinus Torvalds 		goto get_sk;
18531da177e4SLinus Torvalds 	}
18545caea4eaSEric Dumazet 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
18551da177e4SLinus Torvalds 	++st->num;
1856a8b690f9STom Herbert 	++st->offset;
18571da177e4SLinus Torvalds 
18581bde5ac4SEric Dumazet 	sk = sk_nulls_next(sk);
18591da177e4SLinus Torvalds get_sk:
1860c25eb3bfSEric Dumazet 	sk_nulls_for_each_from(sk, node) {
18618475ef9fSPavel Emelyanov 		if (!net_eq(sock_net(sk), net))
18628475ef9fSPavel Emelyanov 			continue;
18638475ef9fSPavel Emelyanov 		if (sk->sk_family == st->family) {
18641da177e4SLinus Torvalds 			cur = sk;
18651da177e4SLinus Torvalds 			goto out;
18661da177e4SLinus Torvalds 		}
1867463c84b9SArnaldo Carvalho de Melo 		icsk = inet_csk(sk);
18681da177e4SLinus Torvalds 	}
18695caea4eaSEric Dumazet 	spin_unlock_bh(&ilb->lock);
1870a8b690f9STom Herbert 	st->offset = 0;
18710f7ff927SArnaldo Carvalho de Melo 	if (++st->bucket < INET_LHTABLE_SIZE) {
18725caea4eaSEric Dumazet 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
18735caea4eaSEric Dumazet 		spin_lock_bh(&ilb->lock);
1874c25eb3bfSEric Dumazet 		sk = sk_nulls_head(&ilb->head);
18751da177e4SLinus Torvalds 		goto get_sk;
18761da177e4SLinus Torvalds 	}
18771da177e4SLinus Torvalds 	cur = NULL;
18781da177e4SLinus Torvalds out:
18791da177e4SLinus Torvalds 	return cur;
18801da177e4SLinus Torvalds }
18811da177e4SLinus Torvalds 
18821da177e4SLinus Torvalds static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
18831da177e4SLinus Torvalds {
1884a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
1885a8b690f9STom Herbert 	void *rc;
1886a8b690f9STom Herbert 
1887a8b690f9STom Herbert 	st->bucket = 0;
1888a8b690f9STom Herbert 	st->offset = 0;
1889a8b690f9STom Herbert 	rc = listening_get_next(seq, NULL);
18901da177e4SLinus Torvalds 
18911da177e4SLinus Torvalds 	while (rc && *pos) {
18921da177e4SLinus Torvalds 		rc = listening_get_next(seq, rc);
18931da177e4SLinus Torvalds 		--*pos;
18941da177e4SLinus Torvalds 	}
18951da177e4SLinus Torvalds 	return rc;
18961da177e4SLinus Torvalds }
18971da177e4SLinus Torvalds 
189805dbc7b5SEric Dumazet static inline bool empty_bucket(const struct tcp_iter_state *st)
18996eac5604SAndi Kleen {
190005dbc7b5SEric Dumazet 	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
19016eac5604SAndi Kleen }
19026eac5604SAndi Kleen 
1903a8b690f9STom Herbert /*
1904a8b690f9STom Herbert  * Get first established socket starting from bucket given in st->bucket.
1905a8b690f9STom Herbert  * If st->bucket is zero, the very first socket in the hash is returned.
1906a8b690f9STom Herbert  */
19071da177e4SLinus Torvalds static void *established_get_first(struct seq_file *seq)
19081da177e4SLinus Torvalds {
19091da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
1910a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
19111da177e4SLinus Torvalds 	void *rc = NULL;
19121da177e4SLinus Torvalds 
1913a8b690f9STom Herbert 	st->offset = 0;
1914a8b690f9STom Herbert 	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
19151da177e4SLinus Torvalds 		struct sock *sk;
19163ab5aee7SEric Dumazet 		struct hlist_nulls_node *node;
19179db66bdcSEric Dumazet 		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
19181da177e4SLinus Torvalds 
19196eac5604SAndi Kleen 		/* Lockless fast path for the common case of empty buckets */
19206eac5604SAndi Kleen 		if (empty_bucket(st))
19216eac5604SAndi Kleen 			continue;
19226eac5604SAndi Kleen 
19239db66bdcSEric Dumazet 		spin_lock_bh(lock);
19243ab5aee7SEric Dumazet 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
1925f40c8174SDaniel Lezcano 			if (sk->sk_family != st->family ||
1926878628fbSYOSHIFUJI Hideaki 			    !net_eq(sock_net(sk), net)) {
19271da177e4SLinus Torvalds 				continue;
19281da177e4SLinus Torvalds 			}
19291da177e4SLinus Torvalds 			rc = sk;
19301da177e4SLinus Torvalds 			goto out;
19311da177e4SLinus Torvalds 		}
19329db66bdcSEric Dumazet 		spin_unlock_bh(lock);
19331da177e4SLinus Torvalds 	}
19341da177e4SLinus Torvalds out:
19351da177e4SLinus Torvalds 	return rc;
19361da177e4SLinus Torvalds }
19371da177e4SLinus Torvalds 
19381da177e4SLinus Torvalds static void *established_get_next(struct seq_file *seq, void *cur)
19391da177e4SLinus Torvalds {
19401da177e4SLinus Torvalds 	struct sock *sk = cur;
19413ab5aee7SEric Dumazet 	struct hlist_nulls_node *node;
19421da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
1943a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
19441da177e4SLinus Torvalds 
19451da177e4SLinus Torvalds 	++st->num;
1946a8b690f9STom Herbert 	++st->offset;
19471da177e4SLinus Torvalds 
19483ab5aee7SEric Dumazet 	sk = sk_nulls_next(sk);
19491da177e4SLinus Torvalds 
19503ab5aee7SEric Dumazet 	sk_nulls_for_each_from(sk, node) {
1951878628fbSYOSHIFUJI Hideaki 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
195205dbc7b5SEric Dumazet 			return sk;
19531da177e4SLinus Torvalds 	}
19541da177e4SLinus Torvalds 
195505dbc7b5SEric Dumazet 	spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
195605dbc7b5SEric Dumazet 	++st->bucket;
195705dbc7b5SEric Dumazet 	return established_get_first(seq);
19581da177e4SLinus Torvalds }
19591da177e4SLinus Torvalds 
19601da177e4SLinus Torvalds static void *established_get_idx(struct seq_file *seq, loff_t pos)
19611da177e4SLinus Torvalds {
1962a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
1963a8b690f9STom Herbert 	void *rc;
1964a8b690f9STom Herbert 
1965a8b690f9STom Herbert 	st->bucket = 0;
1966a8b690f9STom Herbert 	rc = established_get_first(seq);
19671da177e4SLinus Torvalds 
19681da177e4SLinus Torvalds 	while (rc && pos) {
19691da177e4SLinus Torvalds 		rc = established_get_next(seq, rc);
19701da177e4SLinus Torvalds 		--pos;
19711da177e4SLinus Torvalds 	}
19721da177e4SLinus Torvalds 	return rc;
19731da177e4SLinus Torvalds }
19741da177e4SLinus Torvalds 
19751da177e4SLinus Torvalds static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
19761da177e4SLinus Torvalds {
19771da177e4SLinus Torvalds 	void *rc;
19781da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
19791da177e4SLinus Torvalds 
19801da177e4SLinus Torvalds 	st->state = TCP_SEQ_STATE_LISTENING;
19811da177e4SLinus Torvalds 	rc	  = listening_get_idx(seq, &pos);
19821da177e4SLinus Torvalds 
19831da177e4SLinus Torvalds 	if (!rc) {
19841da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_ESTABLISHED;
19851da177e4SLinus Torvalds 		rc	  = established_get_idx(seq, pos);
19861da177e4SLinus Torvalds 	}
19871da177e4SLinus Torvalds 
19881da177e4SLinus Torvalds 	return rc;
19891da177e4SLinus Torvalds }
19901da177e4SLinus Torvalds 
1991a8b690f9STom Herbert static void *tcp_seek_last_pos(struct seq_file *seq)
1992a8b690f9STom Herbert {
1993a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
1994a8b690f9STom Herbert 	int offset = st->offset;
1995a8b690f9STom Herbert 	int orig_num = st->num;
1996a8b690f9STom Herbert 	void *rc = NULL;
1997a8b690f9STom Herbert 
1998a8b690f9STom Herbert 	switch (st->state) {
1999a8b690f9STom Herbert 	case TCP_SEQ_STATE_LISTENING:
2000a8b690f9STom Herbert 		if (st->bucket >= INET_LHTABLE_SIZE)
2001a8b690f9STom Herbert 			break;
2002a8b690f9STom Herbert 		st->state = TCP_SEQ_STATE_LISTENING;
2003a8b690f9STom Herbert 		rc = listening_get_next(seq, NULL);
2004a8b690f9STom Herbert 		while (offset-- && rc)
2005a8b690f9STom Herbert 			rc = listening_get_next(seq, rc);
2006a8b690f9STom Herbert 		if (rc)
2007a8b690f9STom Herbert 			break;
2008a8b690f9STom Herbert 		st->bucket = 0;
200905dbc7b5SEric Dumazet 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2010a8b690f9STom Herbert 		/* Fallthrough */
2011a8b690f9STom Herbert 	case TCP_SEQ_STATE_ESTABLISHED:
2012a8b690f9STom Herbert 		if (st->bucket > tcp_hashinfo.ehash_mask)
2013a8b690f9STom Herbert 			break;
2014a8b690f9STom Herbert 		rc = established_get_first(seq);
2015a8b690f9STom Herbert 		while (offset-- && rc)
2016a8b690f9STom Herbert 			rc = established_get_next(seq, rc);
2017a8b690f9STom Herbert 	}
2018a8b690f9STom Herbert 
2019a8b690f9STom Herbert 	st->num = orig_num;
2020a8b690f9STom Herbert 
2021a8b690f9STom Herbert 	return rc;
2022a8b690f9STom Herbert }
2023a8b690f9STom Herbert 
20241da177e4SLinus Torvalds static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
20251da177e4SLinus Torvalds {
20261da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
2027a8b690f9STom Herbert 	void *rc;
2028a8b690f9STom Herbert 
2029a8b690f9STom Herbert 	if (*pos && *pos == st->last_pos) {
2030a8b690f9STom Herbert 		rc = tcp_seek_last_pos(seq);
2031a8b690f9STom Herbert 		if (rc)
2032a8b690f9STom Herbert 			goto out;
2033a8b690f9STom Herbert 	}
2034a8b690f9STom Herbert 
20351da177e4SLinus Torvalds 	st->state = TCP_SEQ_STATE_LISTENING;
20361da177e4SLinus Torvalds 	st->num = 0;
2037a8b690f9STom Herbert 	st->bucket = 0;
2038a8b690f9STom Herbert 	st->offset = 0;
2039a8b690f9STom Herbert 	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2040a8b690f9STom Herbert 
2041a8b690f9STom Herbert out:
2042a8b690f9STom Herbert 	st->last_pos = *pos;
2043a8b690f9STom Herbert 	return rc;
20441da177e4SLinus Torvalds }
20451da177e4SLinus Torvalds 
20461da177e4SLinus Torvalds static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
20471da177e4SLinus Torvalds {
2048a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
20491da177e4SLinus Torvalds 	void *rc = NULL;
20501da177e4SLinus Torvalds 
20511da177e4SLinus Torvalds 	if (v == SEQ_START_TOKEN) {
20521da177e4SLinus Torvalds 		rc = tcp_get_idx(seq, 0);
20531da177e4SLinus Torvalds 		goto out;
20541da177e4SLinus Torvalds 	}
20551da177e4SLinus Torvalds 
20561da177e4SLinus Torvalds 	switch (st->state) {
20571da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
20581da177e4SLinus Torvalds 		rc = listening_get_next(seq, v);
20591da177e4SLinus Torvalds 		if (!rc) {
20601da177e4SLinus Torvalds 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2061a8b690f9STom Herbert 			st->bucket = 0;
2062a8b690f9STom Herbert 			st->offset = 0;
20631da177e4SLinus Torvalds 			rc	  = established_get_first(seq);
20641da177e4SLinus Torvalds 		}
20651da177e4SLinus Torvalds 		break;
20661da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
20671da177e4SLinus Torvalds 		rc = established_get_next(seq, v);
20681da177e4SLinus Torvalds 		break;
20691da177e4SLinus Torvalds 	}
20701da177e4SLinus Torvalds out:
20711da177e4SLinus Torvalds 	++*pos;
2072a8b690f9STom Herbert 	st->last_pos = *pos;
20731da177e4SLinus Torvalds 	return rc;
20741da177e4SLinus Torvalds }
20751da177e4SLinus Torvalds 
20761da177e4SLinus Torvalds static void tcp_seq_stop(struct seq_file *seq, void *v)
20771da177e4SLinus Torvalds {
20781da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
20791da177e4SLinus Torvalds 
20801da177e4SLinus Torvalds 	switch (st->state) {
20811da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
20821da177e4SLinus Torvalds 		if (v != SEQ_START_TOKEN)
20835caea4eaSEric Dumazet 			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
20841da177e4SLinus Torvalds 		break;
20851da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
20861da177e4SLinus Torvalds 		if (v)
20879db66bdcSEric Dumazet 			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
20881da177e4SLinus Torvalds 		break;
20891da177e4SLinus Torvalds 	}
20901da177e4SLinus Torvalds }
20911da177e4SLinus Torvalds 
209273cb88ecSArjan van de Ven int tcp_seq_open(struct inode *inode, struct file *file)
20931da177e4SLinus Torvalds {
2094d9dda78bSAl Viro 	struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
20951da177e4SLinus Torvalds 	struct tcp_iter_state *s;
209652d6f3f1SDenis V. Lunev 	int err;
20971da177e4SLinus Torvalds 
209852d6f3f1SDenis V. Lunev 	err = seq_open_net(inode, file, &afinfo->seq_ops,
209952d6f3f1SDenis V. Lunev 			  sizeof(struct tcp_iter_state));
210052d6f3f1SDenis V. Lunev 	if (err < 0)
210152d6f3f1SDenis V. Lunev 		return err;
2102f40c8174SDaniel Lezcano 
210352d6f3f1SDenis V. Lunev 	s = ((struct seq_file *)file->private_data)->private;
21041da177e4SLinus Torvalds 	s->family		= afinfo->family;
2105a8b690f9STom Herbert 	s->last_pos		= 0;
2106f40c8174SDaniel Lezcano 	return 0;
2107f40c8174SDaniel Lezcano }
210873cb88ecSArjan van de Ven EXPORT_SYMBOL(tcp_seq_open);
2109f40c8174SDaniel Lezcano 
21106f8b13bcSDaniel Lezcano int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
21111da177e4SLinus Torvalds {
21121da177e4SLinus Torvalds 	int rc = 0;
21131da177e4SLinus Torvalds 	struct proc_dir_entry *p;
21141da177e4SLinus Torvalds 
21159427c4b3SDenis V. Lunev 	afinfo->seq_ops.start		= tcp_seq_start;
21169427c4b3SDenis V. Lunev 	afinfo->seq_ops.next		= tcp_seq_next;
21179427c4b3SDenis V. Lunev 	afinfo->seq_ops.stop		= tcp_seq_stop;
21189427c4b3SDenis V. Lunev 
211984841c3cSDenis V. Lunev 	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
212073cb88ecSArjan van de Ven 			     afinfo->seq_fops, afinfo);
212184841c3cSDenis V. Lunev 	if (!p)
21221da177e4SLinus Torvalds 		rc = -ENOMEM;
21231da177e4SLinus Torvalds 	return rc;
21241da177e4SLinus Torvalds }
21254bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_register);
21261da177e4SLinus Torvalds 
21276f8b13bcSDaniel Lezcano void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
21281da177e4SLinus Torvalds {
2129ece31ffdSGao feng 	remove_proc_entry(afinfo->name, net->proc_net);
21301da177e4SLinus Torvalds }
21314bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_unregister);
21321da177e4SLinus Torvalds 
2133d4f06873SEric Dumazet static void get_openreq4(const struct request_sock *req,
2134aa3a0c8cSEric Dumazet 			 struct seq_file *f, int i)
21351da177e4SLinus Torvalds {
21362e6599cbSArnaldo Carvalho de Melo 	const struct inet_request_sock *ireq = inet_rsk(req);
2137fa76ce73SEric Dumazet 	long delta = req->rsk_timer.expires - jiffies;
21381da177e4SLinus Torvalds 
21395e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2140652586dfSTetsuo Handa 		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
21411da177e4SLinus Torvalds 		i,
2142634fb979SEric Dumazet 		ireq->ir_loc_addr,
2143d4f06873SEric Dumazet 		ireq->ir_num,
2144634fb979SEric Dumazet 		ireq->ir_rmt_addr,
2145634fb979SEric Dumazet 		ntohs(ireq->ir_rmt_port),
21461da177e4SLinus Torvalds 		TCP_SYN_RECV,
21471da177e4SLinus Torvalds 		0, 0, /* could print option size, but that is af dependent. */
21481da177e4SLinus Torvalds 		1,    /* timers active (only the expire timer) */
2149a399a805SEric Dumazet 		jiffies_delta_to_clock_t(delta),
2150e6c022a4SEric Dumazet 		req->num_timeout,
2151aa3a0c8cSEric Dumazet 		from_kuid_munged(seq_user_ns(f),
2152aa3a0c8cSEric Dumazet 				 sock_i_uid(req->rsk_listener)),
21531da177e4SLinus Torvalds 		0,  /* non standard timer */
21541da177e4SLinus Torvalds 		0, /* open_requests have no inode */
2155d4f06873SEric Dumazet 		0,
2156652586dfSTetsuo Handa 		req);
21571da177e4SLinus Torvalds }
21581da177e4SLinus Torvalds 
2159652586dfSTetsuo Handa static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
21601da177e4SLinus Torvalds {
21611da177e4SLinus Torvalds 	int timer_active;
21621da177e4SLinus Torvalds 	unsigned long timer_expires;
2163cf533ea5SEric Dumazet 	const struct tcp_sock *tp = tcp_sk(sk);
2164cf4c6bf8SIlpo Järvinen 	const struct inet_connection_sock *icsk = inet_csk(sk);
2165cf533ea5SEric Dumazet 	const struct inet_sock *inet = inet_sk(sk);
21660536fcc0SEric Dumazet 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2167c720c7e8SEric Dumazet 	__be32 dest = inet->inet_daddr;
2168c720c7e8SEric Dumazet 	__be32 src = inet->inet_rcv_saddr;
2169c720c7e8SEric Dumazet 	__u16 destp = ntohs(inet->inet_dport);
2170c720c7e8SEric Dumazet 	__u16 srcp = ntohs(inet->inet_sport);
217149d09007SEric Dumazet 	int rx_queue;
217200fd38d9SEric Dumazet 	int state;
21731da177e4SLinus Torvalds 
21746ba8a3b1SNandita Dukkipati 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
21756ba8a3b1SNandita Dukkipati 	    icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
21766ba8a3b1SNandita Dukkipati 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
21771da177e4SLinus Torvalds 		timer_active	= 1;
2178463c84b9SArnaldo Carvalho de Melo 		timer_expires	= icsk->icsk_timeout;
2179463c84b9SArnaldo Carvalho de Melo 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
21801da177e4SLinus Torvalds 		timer_active	= 4;
2181463c84b9SArnaldo Carvalho de Melo 		timer_expires	= icsk->icsk_timeout;
2182cf4c6bf8SIlpo Järvinen 	} else if (timer_pending(&sk->sk_timer)) {
21831da177e4SLinus Torvalds 		timer_active	= 2;
2184cf4c6bf8SIlpo Järvinen 		timer_expires	= sk->sk_timer.expires;
21851da177e4SLinus Torvalds 	} else {
21861da177e4SLinus Torvalds 		timer_active	= 0;
21871da177e4SLinus Torvalds 		timer_expires = jiffies;
21881da177e4SLinus Torvalds 	}
21891da177e4SLinus Torvalds 
219000fd38d9SEric Dumazet 	state = sk_state_load(sk);
219100fd38d9SEric Dumazet 	if (state == TCP_LISTEN)
219249d09007SEric Dumazet 		rx_queue = sk->sk_ack_backlog;
219349d09007SEric Dumazet 	else
219400fd38d9SEric Dumazet 		/* Because we don't lock the socket,
219500fd38d9SEric Dumazet 		 * we might find a transient negative value.
219649d09007SEric Dumazet 		 */
219749d09007SEric Dumazet 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
219849d09007SEric Dumazet 
21995e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2200652586dfSTetsuo Handa 			"%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
220100fd38d9SEric Dumazet 		i, src, srcp, dest, destp, state,
220247da8ee6SSridhar Samudrala 		tp->write_seq - tp->snd_una,
220349d09007SEric Dumazet 		rx_queue,
22041da177e4SLinus Torvalds 		timer_active,
2205a399a805SEric Dumazet 		jiffies_delta_to_clock_t(timer_expires - jiffies),
2206463c84b9SArnaldo Carvalho de Melo 		icsk->icsk_retransmits,
2207a7cb5a49SEric W. Biederman 		from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
22086687e988SArnaldo Carvalho de Melo 		icsk->icsk_probes_out,
2209cf4c6bf8SIlpo Järvinen 		sock_i_ino(sk),
2210cf4c6bf8SIlpo Järvinen 		atomic_read(&sk->sk_refcnt), sk,
22117be87351SStephen Hemminger 		jiffies_to_clock_t(icsk->icsk_rto),
22127be87351SStephen Hemminger 		jiffies_to_clock_t(icsk->icsk_ack.ato),
2213463c84b9SArnaldo Carvalho de Melo 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
22141da177e4SLinus Torvalds 		tp->snd_cwnd,
221500fd38d9SEric Dumazet 		state == TCP_LISTEN ?
221600fd38d9SEric Dumazet 		    fastopenq->max_qlen :
2217652586dfSTetsuo Handa 		    (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
22181da177e4SLinus Torvalds }
22191da177e4SLinus Torvalds 
2220cf533ea5SEric Dumazet static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2221652586dfSTetsuo Handa 			       struct seq_file *f, int i)
22221da177e4SLinus Torvalds {
2223789f558cSEric Dumazet 	long delta = tw->tw_timer.expires - jiffies;
222423f33c2dSAl Viro 	__be32 dest, src;
22251da177e4SLinus Torvalds 	__u16 destp, srcp;
22261da177e4SLinus Torvalds 
22271da177e4SLinus Torvalds 	dest  = tw->tw_daddr;
22281da177e4SLinus Torvalds 	src   = tw->tw_rcv_saddr;
22291da177e4SLinus Torvalds 	destp = ntohs(tw->tw_dport);
22301da177e4SLinus Torvalds 	srcp  = ntohs(tw->tw_sport);
22311da177e4SLinus Torvalds 
22325e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2233652586dfSTetsuo Handa 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
22341da177e4SLinus Torvalds 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2235a399a805SEric Dumazet 		3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2236652586dfSTetsuo Handa 		atomic_read(&tw->tw_refcnt), tw);
22371da177e4SLinus Torvalds }
22381da177e4SLinus Torvalds 
22391da177e4SLinus Torvalds #define TMPSZ 150
22401da177e4SLinus Torvalds 
22411da177e4SLinus Torvalds static int tcp4_seq_show(struct seq_file *seq, void *v)
22421da177e4SLinus Torvalds {
22431da177e4SLinus Torvalds 	struct tcp_iter_state *st;
224405dbc7b5SEric Dumazet 	struct sock *sk = v;
22451da177e4SLinus Torvalds 
2246652586dfSTetsuo Handa 	seq_setwidth(seq, TMPSZ - 1);
22471da177e4SLinus Torvalds 	if (v == SEQ_START_TOKEN) {
2248652586dfSTetsuo Handa 		seq_puts(seq, "  sl  local_address rem_address   st tx_queue "
22491da177e4SLinus Torvalds 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
22501da177e4SLinus Torvalds 			   "inode");
22511da177e4SLinus Torvalds 		goto out;
22521da177e4SLinus Torvalds 	}
22531da177e4SLinus Torvalds 	st = seq->private;
22541da177e4SLinus Torvalds 
225505dbc7b5SEric Dumazet 	if (sk->sk_state == TCP_TIME_WAIT)
2256652586dfSTetsuo Handa 		get_timewait4_sock(v, seq, st->num);
2257079096f1SEric Dumazet 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2258079096f1SEric Dumazet 		get_openreq4(v, seq, st->num);
225905dbc7b5SEric Dumazet 	else
2260652586dfSTetsuo Handa 		get_tcp4_sock(v, seq, st->num);
22611da177e4SLinus Torvalds out:
2262652586dfSTetsuo Handa 	seq_pad(seq, '\n');
22631da177e4SLinus Torvalds 	return 0;
22641da177e4SLinus Torvalds }
22651da177e4SLinus Torvalds 
226673cb88ecSArjan van de Ven static const struct file_operations tcp_afinfo_seq_fops = {
226773cb88ecSArjan van de Ven 	.owner   = THIS_MODULE,
226873cb88ecSArjan van de Ven 	.open    = tcp_seq_open,
226973cb88ecSArjan van de Ven 	.read    = seq_read,
227073cb88ecSArjan van de Ven 	.llseek  = seq_lseek,
227173cb88ecSArjan van de Ven 	.release = seq_release_net
227273cb88ecSArjan van de Ven };
227373cb88ecSArjan van de Ven 
22741da177e4SLinus Torvalds static struct tcp_seq_afinfo tcp4_seq_afinfo = {
22751da177e4SLinus Torvalds 	.name		= "tcp",
22761da177e4SLinus Torvalds 	.family		= AF_INET,
227773cb88ecSArjan van de Ven 	.seq_fops	= &tcp_afinfo_seq_fops,
22789427c4b3SDenis V. Lunev 	.seq_ops	= {
22799427c4b3SDenis V. Lunev 		.show		= tcp4_seq_show,
22809427c4b3SDenis V. Lunev 	},
22811da177e4SLinus Torvalds };
22821da177e4SLinus Torvalds 
22832c8c1e72SAlexey Dobriyan static int __net_init tcp4_proc_init_net(struct net *net)
2284757764f6SPavel Emelyanov {
2285757764f6SPavel Emelyanov 	return tcp_proc_register(net, &tcp4_seq_afinfo);
2286757764f6SPavel Emelyanov }
2287757764f6SPavel Emelyanov 
22882c8c1e72SAlexey Dobriyan static void __net_exit tcp4_proc_exit_net(struct net *net)
2289757764f6SPavel Emelyanov {
2290757764f6SPavel Emelyanov 	tcp_proc_unregister(net, &tcp4_seq_afinfo);
2291757764f6SPavel Emelyanov }
2292757764f6SPavel Emelyanov 
2293757764f6SPavel Emelyanov static struct pernet_operations tcp4_net_ops = {
2294757764f6SPavel Emelyanov 	.init = tcp4_proc_init_net,
2295757764f6SPavel Emelyanov 	.exit = tcp4_proc_exit_net,
2296757764f6SPavel Emelyanov };
2297757764f6SPavel Emelyanov 
22981da177e4SLinus Torvalds int __init tcp4_proc_init(void)
22991da177e4SLinus Torvalds {
2300757764f6SPavel Emelyanov 	return register_pernet_subsys(&tcp4_net_ops);
23011da177e4SLinus Torvalds }
23021da177e4SLinus Torvalds 
23031da177e4SLinus Torvalds void tcp4_proc_exit(void)
23041da177e4SLinus Torvalds {
2305757764f6SPavel Emelyanov 	unregister_pernet_subsys(&tcp4_net_ops);
23061da177e4SLinus Torvalds }
23071da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */
23081da177e4SLinus Torvalds 
23091da177e4SLinus Torvalds struct proto tcp_prot = {
23101da177e4SLinus Torvalds 	.name			= "TCP",
23111da177e4SLinus Torvalds 	.owner			= THIS_MODULE,
23121da177e4SLinus Torvalds 	.close			= tcp_close,
23131da177e4SLinus Torvalds 	.connect		= tcp_v4_connect,
23141da177e4SLinus Torvalds 	.disconnect		= tcp_disconnect,
2315463c84b9SArnaldo Carvalho de Melo 	.accept			= inet_csk_accept,
23161da177e4SLinus Torvalds 	.ioctl			= tcp_ioctl,
23171da177e4SLinus Torvalds 	.init			= tcp_v4_init_sock,
23181da177e4SLinus Torvalds 	.destroy		= tcp_v4_destroy_sock,
23191da177e4SLinus Torvalds 	.shutdown		= tcp_shutdown,
23201da177e4SLinus Torvalds 	.setsockopt		= tcp_setsockopt,
23211da177e4SLinus Torvalds 	.getsockopt		= tcp_getsockopt,
23221da177e4SLinus Torvalds 	.recvmsg		= tcp_recvmsg,
23237ba42910SChangli Gao 	.sendmsg		= tcp_sendmsg,
23247ba42910SChangli Gao 	.sendpage		= tcp_sendpage,
23251da177e4SLinus Torvalds 	.backlog_rcv		= tcp_v4_do_rcv,
232646d3ceabSEric Dumazet 	.release_cb		= tcp_release_cb,
2327ab1e0a13SArnaldo Carvalho de Melo 	.hash			= inet_hash,
2328ab1e0a13SArnaldo Carvalho de Melo 	.unhash			= inet_unhash,
2329ab1e0a13SArnaldo Carvalho de Melo 	.get_port		= inet_csk_get_port,
23301da177e4SLinus Torvalds 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2331c9bee3b7SEric Dumazet 	.stream_memory_free	= tcp_stream_memory_free,
23321da177e4SLinus Torvalds 	.sockets_allocated	= &tcp_sockets_allocated,
23330a5578cfSArnaldo Carvalho de Melo 	.orphan_count		= &tcp_orphan_count,
23341da177e4SLinus Torvalds 	.memory_allocated	= &tcp_memory_allocated,
23351da177e4SLinus Torvalds 	.memory_pressure	= &tcp_memory_pressure,
2336a4fe34bfSEric W. Biederman 	.sysctl_mem		= sysctl_tcp_mem,
23371da177e4SLinus Torvalds 	.sysctl_wmem		= sysctl_tcp_wmem,
23381da177e4SLinus Torvalds 	.sysctl_rmem		= sysctl_tcp_rmem,
23391da177e4SLinus Torvalds 	.max_header		= MAX_TCP_HEADER,
23401da177e4SLinus Torvalds 	.obj_size		= sizeof(struct tcp_sock),
23413ab5aee7SEric Dumazet 	.slab_flags		= SLAB_DESTROY_BY_RCU,
23426d6ee43eSArnaldo Carvalho de Melo 	.twsk_prot		= &tcp_timewait_sock_ops,
234360236fddSArnaldo Carvalho de Melo 	.rsk_prot		= &tcp_request_sock_ops,
234439d8cda7SPavel Emelyanov 	.h.hashinfo		= &tcp_hashinfo,
23457ba42910SChangli Gao 	.no_autobind		= true,
2346543d9cfeSArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT
2347543d9cfeSArnaldo Carvalho de Melo 	.compat_setsockopt	= compat_tcp_setsockopt,
2348543d9cfeSArnaldo Carvalho de Melo 	.compat_getsockopt	= compat_tcp_getsockopt,
2349543d9cfeSArnaldo Carvalho de Melo #endif
2350c1e64e29SLorenzo Colitti 	.diag_destroy		= tcp_abort,
23511da177e4SLinus Torvalds };
23524bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_prot);
23531da177e4SLinus Torvalds 
2354046ee902SDenis V. Lunev static void __net_exit tcp_sk_exit(struct net *net)
2355046ee902SDenis V. Lunev {
2356bdbbb852SEric Dumazet 	int cpu;
2357bdbbb852SEric Dumazet 
2358bdbbb852SEric Dumazet 	for_each_possible_cpu(cpu)
2359bdbbb852SEric Dumazet 		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2360bdbbb852SEric Dumazet 	free_percpu(net->ipv4.tcp_sk);
2361bdbbb852SEric Dumazet }
2362bdbbb852SEric Dumazet 
2363bdbbb852SEric Dumazet static int __net_init tcp_sk_init(struct net *net)
2364bdbbb852SEric Dumazet {
2365bdbbb852SEric Dumazet 	int res, cpu;
2366bdbbb852SEric Dumazet 
2367bdbbb852SEric Dumazet 	net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2368bdbbb852SEric Dumazet 	if (!net->ipv4.tcp_sk)
2369bdbbb852SEric Dumazet 		return -ENOMEM;
2370bdbbb852SEric Dumazet 
2371bdbbb852SEric Dumazet 	for_each_possible_cpu(cpu) {
2372bdbbb852SEric Dumazet 		struct sock *sk;
2373bdbbb852SEric Dumazet 
2374bdbbb852SEric Dumazet 		res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2375bdbbb852SEric Dumazet 					   IPPROTO_TCP, net);
2376bdbbb852SEric Dumazet 		if (res)
2377bdbbb852SEric Dumazet 			goto fail;
2378bdbbb852SEric Dumazet 		*per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2379bdbbb852SEric Dumazet 	}
238049213555SDaniel Borkmann 
2381bdbbb852SEric Dumazet 	net->ipv4.sysctl_tcp_ecn = 2;
238249213555SDaniel Borkmann 	net->ipv4.sysctl_tcp_ecn_fallback = 1;
238349213555SDaniel Borkmann 
2384b0f9ca53SFan Du 	net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
23856b58e0a5SFan Du 	net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
238605cbc0dbSFan Du 	net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
2387bdbbb852SEric Dumazet 
238813b287e8SNikolay Borisov 	net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
23899bd6861bSNikolay Borisov 	net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
2390b840d15dSNikolay Borisov 	net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
239113b287e8SNikolay Borisov 
239249213555SDaniel Borkmann 	return 0;
2393bdbbb852SEric Dumazet fail:
2394bdbbb852SEric Dumazet 	tcp_sk_exit(net);
2395bdbbb852SEric Dumazet 
2396bdbbb852SEric Dumazet 	return res;
2397b099ce26SEric W. Biederman }
2398b099ce26SEric W. Biederman 
2399b099ce26SEric W. Biederman static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2400b099ce26SEric W. Biederman {
2401b099ce26SEric W. Biederman 	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2402046ee902SDenis V. Lunev }
2403046ee902SDenis V. Lunev 
2404046ee902SDenis V. Lunev static struct pernet_operations __net_initdata tcp_sk_ops = {
2405046ee902SDenis V. Lunev        .init	   = tcp_sk_init,
2406046ee902SDenis V. Lunev        .exit	   = tcp_sk_exit,
2407b099ce26SEric W. Biederman        .exit_batch = tcp_sk_exit_batch,
2408046ee902SDenis V. Lunev };
2409046ee902SDenis V. Lunev 
24109b0f976fSDenis V. Lunev void __init tcp_v4_init(void)
24111da177e4SLinus Torvalds {
24125caea4eaSEric Dumazet 	inet_hashinfo_init(&tcp_hashinfo);
24136a1b3054SEric W. Biederman 	if (register_pernet_subsys(&tcp_sk_ops))
24141da177e4SLinus Torvalds 		panic("Failed to create the TCP control socket.\n");
24151da177e4SLinus Torvalds }
2416