xref: /linux/net/ipv4/tcp_ipv4.c (revision 2efd055c53c06b7e89c167c98069bab9afce7e59)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
31da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
41da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *		Implementation of the Transmission Control Protocol(TCP).
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  *		IPv4 specific functions
91da177e4SLinus Torvalds  *
101da177e4SLinus Torvalds  *
111da177e4SLinus Torvalds  *		code split from:
121da177e4SLinus Torvalds  *		linux/ipv4/tcp.c
131da177e4SLinus Torvalds  *		linux/ipv4/tcp_input.c
141da177e4SLinus Torvalds  *		linux/ipv4/tcp_output.c
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  *		See tcp.c for author information
171da177e4SLinus Torvalds  *
181da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
191da177e4SLinus Torvalds  *      modify it under the terms of the GNU General Public License
201da177e4SLinus Torvalds  *      as published by the Free Software Foundation; either version
211da177e4SLinus Torvalds  *      2 of the License, or (at your option) any later version.
221da177e4SLinus Torvalds  */
231da177e4SLinus Torvalds 
241da177e4SLinus Torvalds /*
251da177e4SLinus Torvalds  * Changes:
261da177e4SLinus Torvalds  *		David S. Miller	:	New socket lookup architecture.
271da177e4SLinus Torvalds  *					This code is dedicated to John Dyson.
281da177e4SLinus Torvalds  *		David S. Miller :	Change semantics of established hash,
291da177e4SLinus Torvalds  *					half is devoted to TIME_WAIT sockets
301da177e4SLinus Torvalds  *					and the rest go in the other half.
311da177e4SLinus Torvalds  *		Andi Kleen :		Add support for syncookies and fixed
321da177e4SLinus Torvalds  *					some bugs: ip options weren't passed to
331da177e4SLinus Torvalds  *					the TCP layer, missed a check for an
341da177e4SLinus Torvalds  *					ACK bit.
351da177e4SLinus Torvalds  *		Andi Kleen :		Implemented fast path mtu discovery.
361da177e4SLinus Torvalds  *	     				Fixed many serious bugs in the
3760236fddSArnaldo Carvalho de Melo  *					request_sock handling and moved
381da177e4SLinus Torvalds  *					most of it into the af independent code.
391da177e4SLinus Torvalds  *					Added tail drop and some other bugfixes.
40caa20d9aSStephen Hemminger  *					Added new listen semantics.
411da177e4SLinus Torvalds  *		Mike McLagan	:	Routing by source
421da177e4SLinus Torvalds  *	Juan Jose Ciarlante:		ip_dynaddr bits
431da177e4SLinus Torvalds  *		Andi Kleen:		various fixes.
441da177e4SLinus Torvalds  *	Vitaly E. Lavrov	:	Transparent proxy revived after year
451da177e4SLinus Torvalds  *					coma.
461da177e4SLinus Torvalds  *	Andi Kleen		:	Fix new listen.
471da177e4SLinus Torvalds  *	Andi Kleen		:	Fix accept error reporting.
481da177e4SLinus Torvalds  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
491da177e4SLinus Torvalds  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
501da177e4SLinus Torvalds  *					a single port at the same time.
511da177e4SLinus Torvalds  */
521da177e4SLinus Torvalds 
53afd46503SJoe Perches #define pr_fmt(fmt) "TCP: " fmt
541da177e4SLinus Torvalds 
55eb4dea58SHerbert Xu #include <linux/bottom_half.h>
561da177e4SLinus Torvalds #include <linux/types.h>
571da177e4SLinus Torvalds #include <linux/fcntl.h>
581da177e4SLinus Torvalds #include <linux/module.h>
591da177e4SLinus Torvalds #include <linux/random.h>
601da177e4SLinus Torvalds #include <linux/cache.h>
611da177e4SLinus Torvalds #include <linux/jhash.h>
621da177e4SLinus Torvalds #include <linux/init.h>
631da177e4SLinus Torvalds #include <linux/times.h>
645a0e3ad6STejun Heo #include <linux/slab.h>
651da177e4SLinus Torvalds 
66457c4cbcSEric W. Biederman #include <net/net_namespace.h>
671da177e4SLinus Torvalds #include <net/icmp.h>
68304a1618SArnaldo Carvalho de Melo #include <net/inet_hashtables.h>
691da177e4SLinus Torvalds #include <net/tcp.h>
7020380731SArnaldo Carvalho de Melo #include <net/transp_v6.h>
711da177e4SLinus Torvalds #include <net/ipv6.h>
721da177e4SLinus Torvalds #include <net/inet_common.h>
736d6ee43eSArnaldo Carvalho de Melo #include <net/timewait_sock.h>
741da177e4SLinus Torvalds #include <net/xfrm.h>
756e5714eaSDavid S. Miller #include <net/secure_seq.h>
76d1a4c0b3SGlauber Costa #include <net/tcp_memcontrol.h>
77076bb0c8SEliezer Tamir #include <net/busy_poll.h>
781da177e4SLinus Torvalds 
791da177e4SLinus Torvalds #include <linux/inet.h>
801da177e4SLinus Torvalds #include <linux/ipv6.h>
811da177e4SLinus Torvalds #include <linux/stddef.h>
821da177e4SLinus Torvalds #include <linux/proc_fs.h>
831da177e4SLinus Torvalds #include <linux/seq_file.h>
841da177e4SLinus Torvalds 
85cfb6eeb4SYOSHIFUJI Hideaki #include <linux/crypto.h>
86cfb6eeb4SYOSHIFUJI Hideaki #include <linux/scatterlist.h>
87cfb6eeb4SYOSHIFUJI Hideaki 
88ab32ea5dSBrian Haley int sysctl_tcp_tw_reuse __read_mostly;
89ab32ea5dSBrian Haley int sysctl_tcp_low_latency __read_mostly;
904bc2f18bSEric Dumazet EXPORT_SYMBOL(sysctl_tcp_low_latency);
911da177e4SLinus Torvalds 
92cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
93a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
94318cf7aaSEric Dumazet 			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
95cfb6eeb4SYOSHIFUJI Hideaki #endif
96cfb6eeb4SYOSHIFUJI Hideaki 
975caea4eaSEric Dumazet struct inet_hashinfo tcp_hashinfo;
984bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_hashinfo);
991da177e4SLinus Torvalds 
100936b8bdbSOctavian Purdila static  __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1011da177e4SLinus Torvalds {
102eddc9ec5SArnaldo Carvalho de Melo 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
103eddc9ec5SArnaldo Carvalho de Melo 					  ip_hdr(skb)->saddr,
104aa8223c7SArnaldo Carvalho de Melo 					  tcp_hdr(skb)->dest,
105aa8223c7SArnaldo Carvalho de Melo 					  tcp_hdr(skb)->source);
1061da177e4SLinus Torvalds }
1071da177e4SLinus Torvalds 
1086d6ee43eSArnaldo Carvalho de Melo int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
1096d6ee43eSArnaldo Carvalho de Melo {
1106d6ee43eSArnaldo Carvalho de Melo 	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
1116d6ee43eSArnaldo Carvalho de Melo 	struct tcp_sock *tp = tcp_sk(sk);
1126d6ee43eSArnaldo Carvalho de Melo 
1136d6ee43eSArnaldo Carvalho de Melo 	/* With PAWS, it is safe from the viewpoint
1146d6ee43eSArnaldo Carvalho de Melo 	   of data integrity. Even without PAWS it is safe provided sequence
1156d6ee43eSArnaldo Carvalho de Melo 	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
1166d6ee43eSArnaldo Carvalho de Melo 
1176d6ee43eSArnaldo Carvalho de Melo 	   Actually, the idea is close to VJ's one, only timestamp cache is
1186d6ee43eSArnaldo Carvalho de Melo 	   held not per host, but per port pair and TW bucket is used as state
1196d6ee43eSArnaldo Carvalho de Melo 	   holder.
1206d6ee43eSArnaldo Carvalho de Melo 
1216d6ee43eSArnaldo Carvalho de Melo 	   If TW bucket has been already destroyed we fall back to VJ's scheme
1226d6ee43eSArnaldo Carvalho de Melo 	   and use initial timestamp retrieved from peer table.
1236d6ee43eSArnaldo Carvalho de Melo 	 */
1246d6ee43eSArnaldo Carvalho de Melo 	if (tcptw->tw_ts_recent_stamp &&
12551456b29SIan Morris 	    (!twp || (sysctl_tcp_tw_reuse &&
1269d729f72SJames Morris 			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
1276d6ee43eSArnaldo Carvalho de Melo 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
1286d6ee43eSArnaldo Carvalho de Melo 		if (tp->write_seq == 0)
1296d6ee43eSArnaldo Carvalho de Melo 			tp->write_seq = 1;
1306d6ee43eSArnaldo Carvalho de Melo 		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
1316d6ee43eSArnaldo Carvalho de Melo 		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
1326d6ee43eSArnaldo Carvalho de Melo 		sock_hold(sktw);
1336d6ee43eSArnaldo Carvalho de Melo 		return 1;
1346d6ee43eSArnaldo Carvalho de Melo 	}
1356d6ee43eSArnaldo Carvalho de Melo 
1366d6ee43eSArnaldo Carvalho de Melo 	return 0;
1376d6ee43eSArnaldo Carvalho de Melo }
1386d6ee43eSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(tcp_twsk_unique);
1396d6ee43eSArnaldo Carvalho de Melo 
1401da177e4SLinus Torvalds /* This will initiate an outgoing connection. */
1411da177e4SLinus Torvalds int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
1421da177e4SLinus Torvalds {
1432d7192d6SDavid S. Miller 	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1441da177e4SLinus Torvalds 	struct inet_sock *inet = inet_sk(sk);
1451da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
146dca8b089SDavid S. Miller 	__be16 orig_sport, orig_dport;
147bada8adcSAl Viro 	__be32 daddr, nexthop;
148da905bd1SDavid S. Miller 	struct flowi4 *fl4;
1492d7192d6SDavid S. Miller 	struct rtable *rt;
1501da177e4SLinus Torvalds 	int err;
151f6d8bd05SEric Dumazet 	struct ip_options_rcu *inet_opt;
1521da177e4SLinus Torvalds 
1531da177e4SLinus Torvalds 	if (addr_len < sizeof(struct sockaddr_in))
1541da177e4SLinus Torvalds 		return -EINVAL;
1551da177e4SLinus Torvalds 
1561da177e4SLinus Torvalds 	if (usin->sin_family != AF_INET)
1571da177e4SLinus Torvalds 		return -EAFNOSUPPORT;
1581da177e4SLinus Torvalds 
1591da177e4SLinus Torvalds 	nexthop = daddr = usin->sin_addr.s_addr;
160f6d8bd05SEric Dumazet 	inet_opt = rcu_dereference_protected(inet->inet_opt,
161f6d8bd05SEric Dumazet 					     sock_owned_by_user(sk));
162f6d8bd05SEric Dumazet 	if (inet_opt && inet_opt->opt.srr) {
1631da177e4SLinus Torvalds 		if (!daddr)
1641da177e4SLinus Torvalds 			return -EINVAL;
165f6d8bd05SEric Dumazet 		nexthop = inet_opt->opt.faddr;
1661da177e4SLinus Torvalds 	}
1671da177e4SLinus Torvalds 
168dca8b089SDavid S. Miller 	orig_sport = inet->inet_sport;
169dca8b089SDavid S. Miller 	orig_dport = usin->sin_port;
170da905bd1SDavid S. Miller 	fl4 = &inet->cork.fl.u.ip4;
171da905bd1SDavid S. Miller 	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
1721da177e4SLinus Torvalds 			      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
1731da177e4SLinus Torvalds 			      IPPROTO_TCP,
1740e0d44abSSteffen Klassert 			      orig_sport, orig_dport, sk);
175b23dd4feSDavid S. Miller 	if (IS_ERR(rt)) {
176b23dd4feSDavid S. Miller 		err = PTR_ERR(rt);
177b23dd4feSDavid S. Miller 		if (err == -ENETUNREACH)
178f1d8cba6SEric Dumazet 			IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
179b23dd4feSDavid S. Miller 		return err;
180584bdf8cSWei Dong 	}
1811da177e4SLinus Torvalds 
1821da177e4SLinus Torvalds 	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
1831da177e4SLinus Torvalds 		ip_rt_put(rt);
1841da177e4SLinus Torvalds 		return -ENETUNREACH;
1851da177e4SLinus Torvalds 	}
1861da177e4SLinus Torvalds 
187f6d8bd05SEric Dumazet 	if (!inet_opt || !inet_opt->opt.srr)
188da905bd1SDavid S. Miller 		daddr = fl4->daddr;
1891da177e4SLinus Torvalds 
190c720c7e8SEric Dumazet 	if (!inet->inet_saddr)
191da905bd1SDavid S. Miller 		inet->inet_saddr = fl4->saddr;
192d1e559d0SEric Dumazet 	sk_rcv_saddr_set(sk, inet->inet_saddr);
1931da177e4SLinus Torvalds 
194c720c7e8SEric Dumazet 	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1951da177e4SLinus Torvalds 		/* Reset inherited state */
1961da177e4SLinus Torvalds 		tp->rx_opt.ts_recent	   = 0;
1971da177e4SLinus Torvalds 		tp->rx_opt.ts_recent_stamp = 0;
198ee995283SPavel Emelyanov 		if (likely(!tp->repair))
1991da177e4SLinus Torvalds 			tp->write_seq	   = 0;
2001da177e4SLinus Torvalds 	}
2011da177e4SLinus Torvalds 
202295ff7edSArnaldo Carvalho de Melo 	if (tcp_death_row.sysctl_tw_recycle &&
20381166dd6SDavid S. Miller 	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
20481166dd6SDavid S. Miller 		tcp_fetch_timewait_stamp(sk, &rt->dst);
2051da177e4SLinus Torvalds 
206c720c7e8SEric Dumazet 	inet->inet_dport = usin->sin_port;
207d1e559d0SEric Dumazet 	sk_daddr_set(sk, daddr);
2081da177e4SLinus Torvalds 
209d83d8461SArnaldo Carvalho de Melo 	inet_csk(sk)->icsk_ext_hdr_len = 0;
210f6d8bd05SEric Dumazet 	if (inet_opt)
211f6d8bd05SEric Dumazet 		inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
2121da177e4SLinus Torvalds 
213bee7ca9eSWilliam Allen Simpson 	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
2141da177e4SLinus Torvalds 
2151da177e4SLinus Torvalds 	/* Socket identity is still unknown (sport may be zero).
2161da177e4SLinus Torvalds 	 * However we set state to SYN-SENT and not releasing socket
2171da177e4SLinus Torvalds 	 * lock select source port, enter ourselves into the hash tables and
2181da177e4SLinus Torvalds 	 * complete initialization after this.
2191da177e4SLinus Torvalds 	 */
2201da177e4SLinus Torvalds 	tcp_set_state(sk, TCP_SYN_SENT);
221a7f5e7f1SArnaldo Carvalho de Melo 	err = inet_hash_connect(&tcp_death_row, sk);
2221da177e4SLinus Torvalds 	if (err)
2231da177e4SLinus Torvalds 		goto failure;
2241da177e4SLinus Torvalds 
2259e7ceb06SSathya Perla 	inet_set_txhash(sk);
2269e7ceb06SSathya Perla 
227da905bd1SDavid S. Miller 	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
228c720c7e8SEric Dumazet 			       inet->inet_sport, inet->inet_dport, sk);
229b23dd4feSDavid S. Miller 	if (IS_ERR(rt)) {
230b23dd4feSDavid S. Miller 		err = PTR_ERR(rt);
231b23dd4feSDavid S. Miller 		rt = NULL;
2321da177e4SLinus Torvalds 		goto failure;
233b23dd4feSDavid S. Miller 	}
2341da177e4SLinus Torvalds 	/* OK, now commit destination to socket.  */
235bcd76111SHerbert Xu 	sk->sk_gso_type = SKB_GSO_TCPV4;
236d8d1f30bSChangli Gao 	sk_setup_caps(sk, &rt->dst);
2371da177e4SLinus Torvalds 
238ee995283SPavel Emelyanov 	if (!tp->write_seq && likely(!tp->repair))
239c720c7e8SEric Dumazet 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
240c720c7e8SEric Dumazet 							   inet->inet_daddr,
241c720c7e8SEric Dumazet 							   inet->inet_sport,
2421da177e4SLinus Torvalds 							   usin->sin_port);
2431da177e4SLinus Torvalds 
244c720c7e8SEric Dumazet 	inet->inet_id = tp->write_seq ^ jiffies;
2451da177e4SLinus Torvalds 
2461da177e4SLinus Torvalds 	err = tcp_connect(sk);
247ee995283SPavel Emelyanov 
2481da177e4SLinus Torvalds 	rt = NULL;
2491da177e4SLinus Torvalds 	if (err)
2501da177e4SLinus Torvalds 		goto failure;
2511da177e4SLinus Torvalds 
2521da177e4SLinus Torvalds 	return 0;
2531da177e4SLinus Torvalds 
2541da177e4SLinus Torvalds failure:
2557174259eSArnaldo Carvalho de Melo 	/*
2567174259eSArnaldo Carvalho de Melo 	 * This unhashes the socket and releases the local port,
2577174259eSArnaldo Carvalho de Melo 	 * if necessary.
2587174259eSArnaldo Carvalho de Melo 	 */
2591da177e4SLinus Torvalds 	tcp_set_state(sk, TCP_CLOSE);
2601da177e4SLinus Torvalds 	ip_rt_put(rt);
2611da177e4SLinus Torvalds 	sk->sk_route_caps = 0;
262c720c7e8SEric Dumazet 	inet->inet_dport = 0;
2631da177e4SLinus Torvalds 	return err;
2641da177e4SLinus Torvalds }
2654bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_connect);
2661da177e4SLinus Torvalds 
2671da177e4SLinus Torvalds /*
268563d34d0SEric Dumazet  * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
269563d34d0SEric Dumazet  * It can be called through tcp_release_cb() if socket was owned by user
270563d34d0SEric Dumazet  * at the time tcp_v4_err() was called to handle ICMP message.
2711da177e4SLinus Torvalds  */
2724fab9071SNeal Cardwell void tcp_v4_mtu_reduced(struct sock *sk)
2731da177e4SLinus Torvalds {
2741da177e4SLinus Torvalds 	struct dst_entry *dst;
2751da177e4SLinus Torvalds 	struct inet_sock *inet = inet_sk(sk);
276563d34d0SEric Dumazet 	u32 mtu = tcp_sk(sk)->mtu_info;
2771da177e4SLinus Torvalds 
27880d0a69fSDavid S. Miller 	dst = inet_csk_update_pmtu(sk, mtu);
27980d0a69fSDavid S. Miller 	if (!dst)
2801da177e4SLinus Torvalds 		return;
2811da177e4SLinus Torvalds 
2821da177e4SLinus Torvalds 	/* Something is about to be wrong... Remember soft error
2831da177e4SLinus Torvalds 	 * for the case, if this connection will not able to recover.
2841da177e4SLinus Torvalds 	 */
2851da177e4SLinus Torvalds 	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
2861da177e4SLinus Torvalds 		sk->sk_err_soft = EMSGSIZE;
2871da177e4SLinus Torvalds 
2881da177e4SLinus Torvalds 	mtu = dst_mtu(dst);
2891da177e4SLinus Torvalds 
2901da177e4SLinus Torvalds 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
291482fc609SHannes Frederic Sowa 	    ip_sk_accept_pmtu(sk) &&
292d83d8461SArnaldo Carvalho de Melo 	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
2931da177e4SLinus Torvalds 		tcp_sync_mss(sk, mtu);
2941da177e4SLinus Torvalds 
2951da177e4SLinus Torvalds 		/* Resend the TCP packet because it's
2961da177e4SLinus Torvalds 		 * clear that the old packet has been
2971da177e4SLinus Torvalds 		 * dropped. This is the new "fast" path mtu
2981da177e4SLinus Torvalds 		 * discovery.
2991da177e4SLinus Torvalds 		 */
3001da177e4SLinus Torvalds 		tcp_simple_retransmit(sk);
3011da177e4SLinus Torvalds 	} /* else let the usual retransmit timer handle it */
3021da177e4SLinus Torvalds }
3034fab9071SNeal Cardwell EXPORT_SYMBOL(tcp_v4_mtu_reduced);
3041da177e4SLinus Torvalds 
30555be7a9cSDavid S. Miller static void do_redirect(struct sk_buff *skb, struct sock *sk)
30655be7a9cSDavid S. Miller {
30755be7a9cSDavid S. Miller 	struct dst_entry *dst = __sk_dst_check(sk, 0);
30855be7a9cSDavid S. Miller 
3091ed5c48fSDavid S. Miller 	if (dst)
3106700c270SDavid S. Miller 		dst->ops->redirect(dst, sk, skb);
31155be7a9cSDavid S. Miller }
31255be7a9cSDavid S. Miller 
31326e37360SEric Dumazet 
31426e37360SEric Dumazet /* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
31526e37360SEric Dumazet void tcp_req_err(struct sock *sk, u32 seq)
31626e37360SEric Dumazet {
31726e37360SEric Dumazet 	struct request_sock *req = inet_reqsk(sk);
31826e37360SEric Dumazet 	struct net *net = sock_net(sk);
31926e37360SEric Dumazet 
32026e37360SEric Dumazet 	/* ICMPs are not backlogged, hence we cannot get
32126e37360SEric Dumazet 	 * an established socket here.
32226e37360SEric Dumazet 	 */
32326e37360SEric Dumazet 	WARN_ON(req->sk);
32426e37360SEric Dumazet 
32526e37360SEric Dumazet 	if (seq != tcp_rsk(req)->snt_isn) {
32626e37360SEric Dumazet 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
327c6973669SFan Du 		reqsk_put(req);
32826e37360SEric Dumazet 	} else {
32926e37360SEric Dumazet 		/*
33026e37360SEric Dumazet 		 * Still in SYN_RECV, just remove it silently.
33126e37360SEric Dumazet 		 * There is no good way to pass the error to the newly
33226e37360SEric Dumazet 		 * created socket, and POSIX does not want network
33326e37360SEric Dumazet 		 * errors returned from accept().
33426e37360SEric Dumazet 		 */
33526e37360SEric Dumazet 		NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS);
336c6973669SFan Du 		inet_csk_reqsk_queue_drop(req->rsk_listener, req);
33726e37360SEric Dumazet 	}
33826e37360SEric Dumazet }
33926e37360SEric Dumazet EXPORT_SYMBOL(tcp_req_err);
34026e37360SEric Dumazet 
3411da177e4SLinus Torvalds /*
3421da177e4SLinus Torvalds  * This routine is called by the ICMP module when it gets some
3431da177e4SLinus Torvalds  * sort of error condition.  If err < 0 then the socket should
3441da177e4SLinus Torvalds  * be closed and the error returned to the user.  If err > 0
3451da177e4SLinus Torvalds  * it's just the icmp type << 8 | icmp code.  After adjustment
3461da177e4SLinus Torvalds  * header points to the first 8 bytes of the tcp header.  We need
3471da177e4SLinus Torvalds  * to find the appropriate port.
3481da177e4SLinus Torvalds  *
3491da177e4SLinus Torvalds  * The locking strategy used here is very "optimistic". When
3501da177e4SLinus Torvalds  * someone else accesses the socket the ICMP is just dropped
3511da177e4SLinus Torvalds  * and for some paths there is no check at all.
3521da177e4SLinus Torvalds  * A more general error queue to queue errors for later handling
3531da177e4SLinus Torvalds  * is probably better.
3541da177e4SLinus Torvalds  *
3551da177e4SLinus Torvalds  */
3561da177e4SLinus Torvalds 
3574d1a2d9eSDamian Lukowski void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
3581da177e4SLinus Torvalds {
359b71d1d42SEric Dumazet 	const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
3604d1a2d9eSDamian Lukowski 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
361f1ecd5d9SDamian Lukowski 	struct inet_connection_sock *icsk;
3621da177e4SLinus Torvalds 	struct tcp_sock *tp;
3631da177e4SLinus Torvalds 	struct inet_sock *inet;
3644d1a2d9eSDamian Lukowski 	const int type = icmp_hdr(icmp_skb)->type;
3654d1a2d9eSDamian Lukowski 	const int code = icmp_hdr(icmp_skb)->code;
3661da177e4SLinus Torvalds 	struct sock *sk;
367f1ecd5d9SDamian Lukowski 	struct sk_buff *skb;
3680a672f74SYuchung Cheng 	struct request_sock *fastopen;
3690a672f74SYuchung Cheng 	__u32 seq, snd_una;
370f1ecd5d9SDamian Lukowski 	__u32 remaining;
3711da177e4SLinus Torvalds 	int err;
3724d1a2d9eSDamian Lukowski 	struct net *net = dev_net(icmp_skb->dev);
3731da177e4SLinus Torvalds 
37426e37360SEric Dumazet 	sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
37526e37360SEric Dumazet 				       th->dest, iph->saddr, ntohs(th->source),
37626e37360SEric Dumazet 				       inet_iif(icmp_skb));
3771da177e4SLinus Torvalds 	if (!sk) {
378dcfc23caSPavel Emelyanov 		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
3791da177e4SLinus Torvalds 		return;
3801da177e4SLinus Torvalds 	}
3811da177e4SLinus Torvalds 	if (sk->sk_state == TCP_TIME_WAIT) {
3829469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
3831da177e4SLinus Torvalds 		return;
3841da177e4SLinus Torvalds 	}
38526e37360SEric Dumazet 	seq = ntohl(th->seq);
38626e37360SEric Dumazet 	if (sk->sk_state == TCP_NEW_SYN_RECV)
38726e37360SEric Dumazet 		return tcp_req_err(sk, seq);
3881da177e4SLinus Torvalds 
3891da177e4SLinus Torvalds 	bh_lock_sock(sk);
3901da177e4SLinus Torvalds 	/* If too many ICMPs get dropped on busy
3911da177e4SLinus Torvalds 	 * servers this needs to be solved differently.
392563d34d0SEric Dumazet 	 * We do take care of PMTU discovery (RFC1191) special case :
393563d34d0SEric Dumazet 	 * we can receive locally generated ICMP messages while socket is held.
3941da177e4SLinus Torvalds 	 */
395b74aa930SEric Dumazet 	if (sock_owned_by_user(sk)) {
396b74aa930SEric Dumazet 		if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
397de0744afSPavel Emelyanov 			NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
398b74aa930SEric Dumazet 	}
3991da177e4SLinus Torvalds 	if (sk->sk_state == TCP_CLOSE)
4001da177e4SLinus Torvalds 		goto out;
4011da177e4SLinus Torvalds 
40297e3ecd1Sstephen hemminger 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
40397e3ecd1Sstephen hemminger 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
40497e3ecd1Sstephen hemminger 		goto out;
40597e3ecd1Sstephen hemminger 	}
40697e3ecd1Sstephen hemminger 
407f1ecd5d9SDamian Lukowski 	icsk = inet_csk(sk);
4081da177e4SLinus Torvalds 	tp = tcp_sk(sk);
4090a672f74SYuchung Cheng 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
4100a672f74SYuchung Cheng 	fastopen = tp->fastopen_rsk;
4110a672f74SYuchung Cheng 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
4121da177e4SLinus Torvalds 	if (sk->sk_state != TCP_LISTEN &&
4130a672f74SYuchung Cheng 	    !between(seq, snd_una, tp->snd_nxt)) {
414de0744afSPavel Emelyanov 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
4151da177e4SLinus Torvalds 		goto out;
4161da177e4SLinus Torvalds 	}
4171da177e4SLinus Torvalds 
4181da177e4SLinus Torvalds 	switch (type) {
41955be7a9cSDavid S. Miller 	case ICMP_REDIRECT:
42055be7a9cSDavid S. Miller 		do_redirect(icmp_skb, sk);
42155be7a9cSDavid S. Miller 		goto out;
4221da177e4SLinus Torvalds 	case ICMP_SOURCE_QUENCH:
4231da177e4SLinus Torvalds 		/* Just silently ignore these. */
4241da177e4SLinus Torvalds 		goto out;
4251da177e4SLinus Torvalds 	case ICMP_PARAMETERPROB:
4261da177e4SLinus Torvalds 		err = EPROTO;
4271da177e4SLinus Torvalds 		break;
4281da177e4SLinus Torvalds 	case ICMP_DEST_UNREACH:
4291da177e4SLinus Torvalds 		if (code > NR_ICMP_UNREACH)
4301da177e4SLinus Torvalds 			goto out;
4311da177e4SLinus Torvalds 
4321da177e4SLinus Torvalds 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
4330d4f0608SEric Dumazet 			/* We are not interested in TCP_LISTEN and open_requests
4340d4f0608SEric Dumazet 			 * (SYN-ACKs send out by Linux are always <576bytes so
4350d4f0608SEric Dumazet 			 * they should go through unfragmented).
4360d4f0608SEric Dumazet 			 */
4370d4f0608SEric Dumazet 			if (sk->sk_state == TCP_LISTEN)
4380d4f0608SEric Dumazet 				goto out;
4390d4f0608SEric Dumazet 
440563d34d0SEric Dumazet 			tp->mtu_info = info;
441144d56e9SEric Dumazet 			if (!sock_owned_by_user(sk)) {
442563d34d0SEric Dumazet 				tcp_v4_mtu_reduced(sk);
443144d56e9SEric Dumazet 			} else {
444144d56e9SEric Dumazet 				if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
445144d56e9SEric Dumazet 					sock_hold(sk);
446144d56e9SEric Dumazet 			}
4471da177e4SLinus Torvalds 			goto out;
4481da177e4SLinus Torvalds 		}
4491da177e4SLinus Torvalds 
4501da177e4SLinus Torvalds 		err = icmp_err_convert[code].errno;
451f1ecd5d9SDamian Lukowski 		/* check if icmp_skb allows revert of backoff
452f1ecd5d9SDamian Lukowski 		 * (see draft-zimmermann-tcp-lcd) */
453f1ecd5d9SDamian Lukowski 		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
454f1ecd5d9SDamian Lukowski 			break;
455f1ecd5d9SDamian Lukowski 		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
4560a672f74SYuchung Cheng 		    !icsk->icsk_backoff || fastopen)
457f1ecd5d9SDamian Lukowski 			break;
458f1ecd5d9SDamian Lukowski 
4598f49c270SDavid S. Miller 		if (sock_owned_by_user(sk))
4608f49c270SDavid S. Miller 			break;
4618f49c270SDavid S. Miller 
462f1ecd5d9SDamian Lukowski 		icsk->icsk_backoff--;
463fcdd1cf4SEric Dumazet 		icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
464fcdd1cf4SEric Dumazet 					       TCP_TIMEOUT_INIT;
465fcdd1cf4SEric Dumazet 		icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
466f1ecd5d9SDamian Lukowski 
467f1ecd5d9SDamian Lukowski 		skb = tcp_write_queue_head(sk);
468f1ecd5d9SDamian Lukowski 		BUG_ON(!skb);
469f1ecd5d9SDamian Lukowski 
4707faee5c0SEric Dumazet 		remaining = icsk->icsk_rto -
4717faee5c0SEric Dumazet 			    min(icsk->icsk_rto,
4727faee5c0SEric Dumazet 				tcp_time_stamp - tcp_skb_timestamp(skb));
473f1ecd5d9SDamian Lukowski 
474f1ecd5d9SDamian Lukowski 		if (remaining) {
475f1ecd5d9SDamian Lukowski 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
476f1ecd5d9SDamian Lukowski 						  remaining, TCP_RTO_MAX);
477f1ecd5d9SDamian Lukowski 		} else {
478f1ecd5d9SDamian Lukowski 			/* RTO revert clocked out retransmission.
479f1ecd5d9SDamian Lukowski 			 * Will retransmit now */
480f1ecd5d9SDamian Lukowski 			tcp_retransmit_timer(sk);
481f1ecd5d9SDamian Lukowski 		}
482f1ecd5d9SDamian Lukowski 
4831da177e4SLinus Torvalds 		break;
4841da177e4SLinus Torvalds 	case ICMP_TIME_EXCEEDED:
4851da177e4SLinus Torvalds 		err = EHOSTUNREACH;
4861da177e4SLinus Torvalds 		break;
4871da177e4SLinus Torvalds 	default:
4881da177e4SLinus Torvalds 		goto out;
4891da177e4SLinus Torvalds 	}
4901da177e4SLinus Torvalds 
4911da177e4SLinus Torvalds 	switch (sk->sk_state) {
4921da177e4SLinus Torvalds 	case TCP_SYN_SENT:
4930a672f74SYuchung Cheng 	case TCP_SYN_RECV:
4940a672f74SYuchung Cheng 		/* Only in fast or simultaneous open. If a fast open socket is
4950a672f74SYuchung Cheng 		 * is already accepted it is treated as a connected one below.
4961da177e4SLinus Torvalds 		 */
49751456b29SIan Morris 		if (fastopen && !fastopen->sk)
4980a672f74SYuchung Cheng 			break;
4990a672f74SYuchung Cheng 
5001da177e4SLinus Torvalds 		if (!sock_owned_by_user(sk)) {
5011da177e4SLinus Torvalds 			sk->sk_err = err;
5021da177e4SLinus Torvalds 
5031da177e4SLinus Torvalds 			sk->sk_error_report(sk);
5041da177e4SLinus Torvalds 
5051da177e4SLinus Torvalds 			tcp_done(sk);
5061da177e4SLinus Torvalds 		} else {
5071da177e4SLinus Torvalds 			sk->sk_err_soft = err;
5081da177e4SLinus Torvalds 		}
5091da177e4SLinus Torvalds 		goto out;
5101da177e4SLinus Torvalds 	}
5111da177e4SLinus Torvalds 
5121da177e4SLinus Torvalds 	/* If we've already connected we will keep trying
5131da177e4SLinus Torvalds 	 * until we time out, or the user gives up.
5141da177e4SLinus Torvalds 	 *
5151da177e4SLinus Torvalds 	 * rfc1122 4.2.3.9 allows to consider as hard errors
5161da177e4SLinus Torvalds 	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
5171da177e4SLinus Torvalds 	 * but it is obsoleted by pmtu discovery).
5181da177e4SLinus Torvalds 	 *
5191da177e4SLinus Torvalds 	 * Note, that in modern internet, where routing is unreliable
5201da177e4SLinus Torvalds 	 * and in each dark corner broken firewalls sit, sending random
5211da177e4SLinus Torvalds 	 * errors ordered by their masters even this two messages finally lose
5221da177e4SLinus Torvalds 	 * their original sense (even Linux sends invalid PORT_UNREACHs)
5231da177e4SLinus Torvalds 	 *
5241da177e4SLinus Torvalds 	 * Now we are in compliance with RFCs.
5251da177e4SLinus Torvalds 	 *							--ANK (980905)
5261da177e4SLinus Torvalds 	 */
5271da177e4SLinus Torvalds 
5281da177e4SLinus Torvalds 	inet = inet_sk(sk);
5291da177e4SLinus Torvalds 	if (!sock_owned_by_user(sk) && inet->recverr) {
5301da177e4SLinus Torvalds 		sk->sk_err = err;
5311da177e4SLinus Torvalds 		sk->sk_error_report(sk);
5321da177e4SLinus Torvalds 	} else	{ /* Only an error on timeout */
5331da177e4SLinus Torvalds 		sk->sk_err_soft = err;
5341da177e4SLinus Torvalds 	}
5351da177e4SLinus Torvalds 
5361da177e4SLinus Torvalds out:
5371da177e4SLinus Torvalds 	bh_unlock_sock(sk);
5381da177e4SLinus Torvalds 	sock_put(sk);
5391da177e4SLinus Torvalds }
5401da177e4SLinus Torvalds 
54128850dc7SDaniel Borkmann void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
5421da177e4SLinus Torvalds {
543aa8223c7SArnaldo Carvalho de Melo 	struct tcphdr *th = tcp_hdr(skb);
5441da177e4SLinus Torvalds 
54584fa7933SPatrick McHardy 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
546419f9f89SHerbert Xu 		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
547663ead3bSHerbert Xu 		skb->csum_start = skb_transport_header(skb) - skb->head;
548ff1dcadbSAl Viro 		skb->csum_offset = offsetof(struct tcphdr, check);
5491da177e4SLinus Torvalds 	} else {
550419f9f89SHerbert Xu 		th->check = tcp_v4_check(skb->len, saddr, daddr,
55107f0757aSJoe Perches 					 csum_partial(th,
5521da177e4SLinus Torvalds 						      th->doff << 2,
5531da177e4SLinus Torvalds 						      skb->csum));
5541da177e4SLinus Torvalds 	}
5551da177e4SLinus Torvalds }
5561da177e4SLinus Torvalds 
557419f9f89SHerbert Xu /* This routine computes an IPv4 TCP checksum. */
558bb296246SHerbert Xu void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
559419f9f89SHerbert Xu {
560cf533ea5SEric Dumazet 	const struct inet_sock *inet = inet_sk(sk);
561419f9f89SHerbert Xu 
562419f9f89SHerbert Xu 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
563419f9f89SHerbert Xu }
5644bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_send_check);
565419f9f89SHerbert Xu 
5661da177e4SLinus Torvalds /*
5671da177e4SLinus Torvalds  *	This routine will send an RST to the other tcp.
5681da177e4SLinus Torvalds  *
5691da177e4SLinus Torvalds  *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
5701da177e4SLinus Torvalds  *		      for reset.
5711da177e4SLinus Torvalds  *	Answer: if a packet caused RST, it is not for a socket
5721da177e4SLinus Torvalds  *		existing in our system, if it is matched to a socket,
5731da177e4SLinus Torvalds  *		it is just duplicate segment or bug in other side's TCP.
5741da177e4SLinus Torvalds  *		So that we build reply only basing on parameters
5751da177e4SLinus Torvalds  *		arrived with segment.
5761da177e4SLinus Torvalds  *	Exception: precedence violation. We do not implement it in any case.
5771da177e4SLinus Torvalds  */
5781da177e4SLinus Torvalds 
579cfb6eeb4SYOSHIFUJI Hideaki static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
5801da177e4SLinus Torvalds {
581cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
582cfb6eeb4SYOSHIFUJI Hideaki 	struct {
583cfb6eeb4SYOSHIFUJI Hideaki 		struct tcphdr th;
584cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
585714e85beSAl Viro 		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
586cfb6eeb4SYOSHIFUJI Hideaki #endif
587cfb6eeb4SYOSHIFUJI Hideaki 	} rep;
5881da177e4SLinus Torvalds 	struct ip_reply_arg arg;
589cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
590cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *key;
591658ddaafSShawn Lu 	const __u8 *hash_location = NULL;
592658ddaafSShawn Lu 	unsigned char newhash[16];
593658ddaafSShawn Lu 	int genhash;
594658ddaafSShawn Lu 	struct sock *sk1 = NULL;
595cfb6eeb4SYOSHIFUJI Hideaki #endif
596a86b1e30SPavel Emelyanov 	struct net *net;
5971da177e4SLinus Torvalds 
5981da177e4SLinus Torvalds 	/* Never send a reset in response to a reset. */
5991da177e4SLinus Torvalds 	if (th->rst)
6001da177e4SLinus Torvalds 		return;
6011da177e4SLinus Torvalds 
602c3658e8dSEric Dumazet 	/* If sk not NULL, it means we did a successful lookup and incoming
603c3658e8dSEric Dumazet 	 * route had to be correct. prequeue might have dropped our dst.
604c3658e8dSEric Dumazet 	 */
605c3658e8dSEric Dumazet 	if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
6061da177e4SLinus Torvalds 		return;
6071da177e4SLinus Torvalds 
6081da177e4SLinus Torvalds 	/* Swap the send and the receive. */
609cfb6eeb4SYOSHIFUJI Hideaki 	memset(&rep, 0, sizeof(rep));
610cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.dest   = th->source;
611cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.source = th->dest;
612cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.doff   = sizeof(struct tcphdr) / 4;
613cfb6eeb4SYOSHIFUJI Hideaki 	rep.th.rst    = 1;
6141da177e4SLinus Torvalds 
6151da177e4SLinus Torvalds 	if (th->ack) {
616cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.seq = th->ack_seq;
6171da177e4SLinus Torvalds 	} else {
618cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.ack = 1;
619cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
6201da177e4SLinus Torvalds 				       skb->len - (th->doff << 2));
6211da177e4SLinus Torvalds 	}
6221da177e4SLinus Torvalds 
6237174259eSArnaldo Carvalho de Melo 	memset(&arg, 0, sizeof(arg));
624cfb6eeb4SYOSHIFUJI Hideaki 	arg.iov[0].iov_base = (unsigned char *)&rep;
625cfb6eeb4SYOSHIFUJI Hideaki 	arg.iov[0].iov_len  = sizeof(rep.th);
626cfb6eeb4SYOSHIFUJI Hideaki 
6270f85feaeSEric Dumazet 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
628cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
629658ddaafSShawn Lu 	hash_location = tcp_parse_md5sig_option(th);
630658ddaafSShawn Lu 	if (!sk && hash_location) {
631658ddaafSShawn Lu 		/*
632658ddaafSShawn Lu 		 * active side is lost. Try to find listening socket through
633658ddaafSShawn Lu 		 * source port, and then find md5 key through listening socket.
634658ddaafSShawn Lu 		 * we are not loose security here:
635658ddaafSShawn Lu 		 * Incoming packet is checked with md5 hash with finding key,
636658ddaafSShawn Lu 		 * no RST generated if md5 hash doesn't match.
637658ddaafSShawn Lu 		 */
6380f85feaeSEric Dumazet 		sk1 = __inet_lookup_listener(net,
639da5e3630STom Herbert 					     &tcp_hashinfo, ip_hdr(skb)->saddr,
640da5e3630STom Herbert 					     th->source, ip_hdr(skb)->daddr,
641658ddaafSShawn Lu 					     ntohs(th->source), inet_iif(skb));
642658ddaafSShawn Lu 		/* don't send rst if it can't find key */
643658ddaafSShawn Lu 		if (!sk1)
644658ddaafSShawn Lu 			return;
645658ddaafSShawn Lu 		rcu_read_lock();
646658ddaafSShawn Lu 		key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
647658ddaafSShawn Lu 					&ip_hdr(skb)->saddr, AF_INET);
648658ddaafSShawn Lu 		if (!key)
649658ddaafSShawn Lu 			goto release_sk1;
650658ddaafSShawn Lu 
65139f8e58eSEric Dumazet 		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
652658ddaafSShawn Lu 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
653658ddaafSShawn Lu 			goto release_sk1;
654658ddaafSShawn Lu 	} else {
655658ddaafSShawn Lu 		key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
656658ddaafSShawn Lu 					     &ip_hdr(skb)->saddr,
657a915da9bSEric Dumazet 					     AF_INET) : NULL;
658658ddaafSShawn Lu 	}
659658ddaafSShawn Lu 
660cfb6eeb4SYOSHIFUJI Hideaki 	if (key) {
661cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
662cfb6eeb4SYOSHIFUJI Hideaki 				   (TCPOPT_NOP << 16) |
663cfb6eeb4SYOSHIFUJI Hideaki 				   (TCPOPT_MD5SIG << 8) |
664cfb6eeb4SYOSHIFUJI Hideaki 				   TCPOLEN_MD5SIG);
665cfb6eeb4SYOSHIFUJI Hideaki 		/* Update length and the length the header thinks exists */
666cfb6eeb4SYOSHIFUJI Hideaki 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
667cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.doff = arg.iov[0].iov_len / 4;
668cfb6eeb4SYOSHIFUJI Hideaki 
66949a72dfbSAdam Langley 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
67078e645cbSIlpo Järvinen 				     key, ip_hdr(skb)->saddr,
67178e645cbSIlpo Järvinen 				     ip_hdr(skb)->daddr, &rep.th);
672cfb6eeb4SYOSHIFUJI Hideaki 	}
673cfb6eeb4SYOSHIFUJI Hideaki #endif
674eddc9ec5SArnaldo Carvalho de Melo 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
675eddc9ec5SArnaldo Carvalho de Melo 				      ip_hdr(skb)->saddr, /* XXX */
67652cd5750SIlpo Järvinen 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
6771da177e4SLinus Torvalds 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
67888ef4a5aSKOVACS Krisztian 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
679e2446eaaSShawn Lu 	/* When socket is gone, all binding information is lost.
6804c675258SAlexey Kuznetsov 	 * routing might fail in this case. No choice here, if we choose to force
6814c675258SAlexey Kuznetsov 	 * input interface, we will misroute in case of asymmetric route.
682e2446eaaSShawn Lu 	 */
6834c675258SAlexey Kuznetsov 	if (sk)
6844c675258SAlexey Kuznetsov 		arg.bound_dev_if = sk->sk_bound_dev_if;
6851da177e4SLinus Torvalds 
68666b13d99SEric Dumazet 	arg.tos = ip_hdr(skb)->tos;
687bdbbb852SEric Dumazet 	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
688bdbbb852SEric Dumazet 			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
68924a2d43dSEric Dumazet 			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
69024a2d43dSEric Dumazet 			      &arg, arg.iov[0].iov_len);
6911da177e4SLinus Torvalds 
69263231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
69363231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
694658ddaafSShawn Lu 
695658ddaafSShawn Lu #ifdef CONFIG_TCP_MD5SIG
696658ddaafSShawn Lu release_sk1:
697658ddaafSShawn Lu 	if (sk1) {
698658ddaafSShawn Lu 		rcu_read_unlock();
699658ddaafSShawn Lu 		sock_put(sk1);
700658ddaafSShawn Lu 	}
701658ddaafSShawn Lu #endif
7021da177e4SLinus Torvalds }
7031da177e4SLinus Torvalds 
7041da177e4SLinus Torvalds /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
7051da177e4SLinus Torvalds    outside socket context is ugly, certainly. What can I do?
7061da177e4SLinus Torvalds  */
7071da177e4SLinus Torvalds 
7089501f972SYOSHIFUJI Hideaki static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
709ee684b6fSAndrey Vagin 			    u32 win, u32 tsval, u32 tsecr, int oif,
71088ef4a5aSKOVACS Krisztian 			    struct tcp_md5sig_key *key,
71166b13d99SEric Dumazet 			    int reply_flags, u8 tos)
7121da177e4SLinus Torvalds {
713cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
7141da177e4SLinus Torvalds 	struct {
7151da177e4SLinus Torvalds 		struct tcphdr th;
716714e85beSAl Viro 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
717cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
718cfb6eeb4SYOSHIFUJI Hideaki 			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
719cfb6eeb4SYOSHIFUJI Hideaki #endif
720cfb6eeb4SYOSHIFUJI Hideaki 			];
7211da177e4SLinus Torvalds 	} rep;
7221da177e4SLinus Torvalds 	struct ip_reply_arg arg;
723adf30907SEric Dumazet 	struct net *net = dev_net(skb_dst(skb)->dev);
7241da177e4SLinus Torvalds 
7251da177e4SLinus Torvalds 	memset(&rep.th, 0, sizeof(struct tcphdr));
7267174259eSArnaldo Carvalho de Melo 	memset(&arg, 0, sizeof(arg));
7271da177e4SLinus Torvalds 
7281da177e4SLinus Torvalds 	arg.iov[0].iov_base = (unsigned char *)&rep;
7291da177e4SLinus Torvalds 	arg.iov[0].iov_len  = sizeof(rep.th);
730ee684b6fSAndrey Vagin 	if (tsecr) {
731cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
7321da177e4SLinus Torvalds 				   (TCPOPT_TIMESTAMP << 8) |
7331da177e4SLinus Torvalds 				   TCPOLEN_TIMESTAMP);
734ee684b6fSAndrey Vagin 		rep.opt[1] = htonl(tsval);
735ee684b6fSAndrey Vagin 		rep.opt[2] = htonl(tsecr);
736cb48cfe8SCraig Schlenter 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
7371da177e4SLinus Torvalds 	}
7381da177e4SLinus Torvalds 
7391da177e4SLinus Torvalds 	/* Swap the send and the receive. */
7401da177e4SLinus Torvalds 	rep.th.dest    = th->source;
7411da177e4SLinus Torvalds 	rep.th.source  = th->dest;
7421da177e4SLinus Torvalds 	rep.th.doff    = arg.iov[0].iov_len / 4;
7431da177e4SLinus Torvalds 	rep.th.seq     = htonl(seq);
7441da177e4SLinus Torvalds 	rep.th.ack_seq = htonl(ack);
7451da177e4SLinus Torvalds 	rep.th.ack     = 1;
7461da177e4SLinus Torvalds 	rep.th.window  = htons(win);
7471da177e4SLinus Torvalds 
748cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
749cfb6eeb4SYOSHIFUJI Hideaki 	if (key) {
750ee684b6fSAndrey Vagin 		int offset = (tsecr) ? 3 : 0;
751cfb6eeb4SYOSHIFUJI Hideaki 
752cfb6eeb4SYOSHIFUJI Hideaki 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
753cfb6eeb4SYOSHIFUJI Hideaki 					  (TCPOPT_NOP << 16) |
754cfb6eeb4SYOSHIFUJI Hideaki 					  (TCPOPT_MD5SIG << 8) |
755cfb6eeb4SYOSHIFUJI Hideaki 					  TCPOLEN_MD5SIG);
756cfb6eeb4SYOSHIFUJI Hideaki 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
757cfb6eeb4SYOSHIFUJI Hideaki 		rep.th.doff = arg.iov[0].iov_len/4;
758cfb6eeb4SYOSHIFUJI Hideaki 
75949a72dfbSAdam Langley 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
76090b7e112SAdam Langley 				    key, ip_hdr(skb)->saddr,
76190b7e112SAdam Langley 				    ip_hdr(skb)->daddr, &rep.th);
762cfb6eeb4SYOSHIFUJI Hideaki 	}
763cfb6eeb4SYOSHIFUJI Hideaki #endif
76488ef4a5aSKOVACS Krisztian 	arg.flags = reply_flags;
765eddc9ec5SArnaldo Carvalho de Melo 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
766eddc9ec5SArnaldo Carvalho de Melo 				      ip_hdr(skb)->saddr, /* XXX */
7671da177e4SLinus Torvalds 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
7681da177e4SLinus Torvalds 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
7699501f972SYOSHIFUJI Hideaki 	if (oif)
7709501f972SYOSHIFUJI Hideaki 		arg.bound_dev_if = oif;
77166b13d99SEric Dumazet 	arg.tos = tos;
772bdbbb852SEric Dumazet 	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
773bdbbb852SEric Dumazet 			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
77424a2d43dSEric Dumazet 			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
77524a2d43dSEric Dumazet 			      &arg, arg.iov[0].iov_len);
7761da177e4SLinus Torvalds 
77763231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
7781da177e4SLinus Torvalds }
7791da177e4SLinus Torvalds 
7801da177e4SLinus Torvalds static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
7811da177e4SLinus Torvalds {
7828feaf0c0SArnaldo Carvalho de Melo 	struct inet_timewait_sock *tw = inet_twsk(sk);
783cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
7841da177e4SLinus Torvalds 
7859501f972SYOSHIFUJI Hideaki 	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7867174259eSArnaldo Carvalho de Melo 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
787ee684b6fSAndrey Vagin 			tcp_time_stamp + tcptw->tw_ts_offset,
7889501f972SYOSHIFUJI Hideaki 			tcptw->tw_ts_recent,
7899501f972SYOSHIFUJI Hideaki 			tw->tw_bound_dev_if,
79088ef4a5aSKOVACS Krisztian 			tcp_twsk_md5_key(tcptw),
79166b13d99SEric Dumazet 			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
79266b13d99SEric Dumazet 			tw->tw_tos
7939501f972SYOSHIFUJI Hideaki 			);
7941da177e4SLinus Torvalds 
7958feaf0c0SArnaldo Carvalho de Melo 	inet_twsk_put(tw);
7961da177e4SLinus Torvalds }
7971da177e4SLinus Torvalds 
7986edafaafSGui Jianfeng static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7997174259eSArnaldo Carvalho de Melo 				  struct request_sock *req)
8001da177e4SLinus Torvalds {
801168a8f58SJerry Chu 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
802168a8f58SJerry Chu 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
803168a8f58SJerry Chu 	 */
804168a8f58SJerry Chu 	tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
805168a8f58SJerry Chu 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
806168a8f58SJerry Chu 			tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
807ee684b6fSAndrey Vagin 			tcp_time_stamp,
8089501f972SYOSHIFUJI Hideaki 			req->ts_recent,
8099501f972SYOSHIFUJI Hideaki 			0,
810a915da9bSEric Dumazet 			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
811a915da9bSEric Dumazet 					  AF_INET),
81266b13d99SEric Dumazet 			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
81366b13d99SEric Dumazet 			ip_hdr(skb)->tos);
8141da177e4SLinus Torvalds }
8151da177e4SLinus Torvalds 
8161da177e4SLinus Torvalds /*
8179bf1d83eSKris Katterjohn  *	Send a SYN-ACK after having received a SYN.
81860236fddSArnaldo Carvalho de Melo  *	This still operates on a request_sock only, not on a big
8191da177e4SLinus Torvalds  *	socket.
8201da177e4SLinus Torvalds  */
82172659eccSOctavian Purdila static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
822d6274bd8SOctavian Purdila 			      struct flowi *fl,
823e6b4d113SWilliam Allen Simpson 			      struct request_sock *req,
824843f4a55SYuchung Cheng 			      u16 queue_mapping,
825843f4a55SYuchung Cheng 			      struct tcp_fastopen_cookie *foc)
8261da177e4SLinus Torvalds {
8272e6599cbSArnaldo Carvalho de Melo 	const struct inet_request_sock *ireq = inet_rsk(req);
8286bd023f3SDavid S. Miller 	struct flowi4 fl4;
8291da177e4SLinus Torvalds 	int err = -1;
8301da177e4SLinus Torvalds 	struct sk_buff *skb;
8311da177e4SLinus Torvalds 
8321da177e4SLinus Torvalds 	/* First, grab a route. */
833ba3f7f04SDavid S. Miller 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
834fd80eb94SDenis V. Lunev 		return -1;
8351da177e4SLinus Torvalds 
836843f4a55SYuchung Cheng 	skb = tcp_make_synack(sk, dst, req, foc);
8371da177e4SLinus Torvalds 
8381da177e4SLinus Torvalds 	if (skb) {
839634fb979SEric Dumazet 		__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
8401da177e4SLinus Torvalds 
841fff32699SEric Dumazet 		skb_set_queue_mapping(skb, queue_mapping);
842634fb979SEric Dumazet 		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
843634fb979SEric Dumazet 					    ireq->ir_rmt_addr,
8442e6599cbSArnaldo Carvalho de Melo 					    ireq->opt);
845b9df3cb8SGerrit Renker 		err = net_xmit_eval(err);
8461da177e4SLinus Torvalds 	}
8471da177e4SLinus Torvalds 
8481da177e4SLinus Torvalds 	return err;
8491da177e4SLinus Torvalds }
8501da177e4SLinus Torvalds 
8511da177e4SLinus Torvalds /*
85260236fddSArnaldo Carvalho de Melo  *	IPv4 request_sock destructor.
8531da177e4SLinus Torvalds  */
85460236fddSArnaldo Carvalho de Melo static void tcp_v4_reqsk_destructor(struct request_sock *req)
8551da177e4SLinus Torvalds {
8562e6599cbSArnaldo Carvalho de Melo 	kfree(inet_rsk(req)->opt);
8571da177e4SLinus Torvalds }
8581da177e4SLinus Torvalds 
8591da177e4SLinus Torvalds 
860cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
861cfb6eeb4SYOSHIFUJI Hideaki /*
862cfb6eeb4SYOSHIFUJI Hideaki  * RFC2385 MD5 checksumming requires a mapping of
863cfb6eeb4SYOSHIFUJI Hideaki  * IP address->MD5 Key.
864cfb6eeb4SYOSHIFUJI Hideaki  * We need to maintain these in the sk structure.
865cfb6eeb4SYOSHIFUJI Hideaki  */
866cfb6eeb4SYOSHIFUJI Hideaki 
867cfb6eeb4SYOSHIFUJI Hideaki /* Find the Key structure for an address.  */
868a915da9bSEric Dumazet struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
869a915da9bSEric Dumazet 					 const union tcp_md5_addr *addr,
870a915da9bSEric Dumazet 					 int family)
871cfb6eeb4SYOSHIFUJI Hideaki {
872fd3a154aSEric Dumazet 	const struct tcp_sock *tp = tcp_sk(sk);
873a915da9bSEric Dumazet 	struct tcp_md5sig_key *key;
874a915da9bSEric Dumazet 	unsigned int size = sizeof(struct in_addr);
875fd3a154aSEric Dumazet 	const struct tcp_md5sig_info *md5sig;
876cfb6eeb4SYOSHIFUJI Hideaki 
877a8afca03SEric Dumazet 	/* caller either holds rcu_read_lock() or socket lock */
878a8afca03SEric Dumazet 	md5sig = rcu_dereference_check(tp->md5sig_info,
879b4fb05eaSEric Dumazet 				       sock_owned_by_user(sk) ||
880b4fb05eaSEric Dumazet 				       lockdep_is_held(&sk->sk_lock.slock));
881a8afca03SEric Dumazet 	if (!md5sig)
882cfb6eeb4SYOSHIFUJI Hideaki 		return NULL;
883a915da9bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
884a915da9bSEric Dumazet 	if (family == AF_INET6)
885a915da9bSEric Dumazet 		size = sizeof(struct in6_addr);
886a915da9bSEric Dumazet #endif
887b67bfe0dSSasha Levin 	hlist_for_each_entry_rcu(key, &md5sig->head, node) {
888a915da9bSEric Dumazet 		if (key->family != family)
889a915da9bSEric Dumazet 			continue;
890a915da9bSEric Dumazet 		if (!memcmp(&key->addr, addr, size))
891a915da9bSEric Dumazet 			return key;
892cfb6eeb4SYOSHIFUJI Hideaki 	}
893cfb6eeb4SYOSHIFUJI Hideaki 	return NULL;
894cfb6eeb4SYOSHIFUJI Hideaki }
895a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_lookup);
896cfb6eeb4SYOSHIFUJI Hideaki 
897cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
898fd3a154aSEric Dumazet 					 const struct sock *addr_sk)
899cfb6eeb4SYOSHIFUJI Hideaki {
900b52e6921SEric Dumazet 	const union tcp_md5_addr *addr;
901a915da9bSEric Dumazet 
902b52e6921SEric Dumazet 	addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
903a915da9bSEric Dumazet 	return tcp_md5_do_lookup(sk, addr, AF_INET);
904cfb6eeb4SYOSHIFUJI Hideaki }
905cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_md5_lookup);
906cfb6eeb4SYOSHIFUJI Hideaki 
907cfb6eeb4SYOSHIFUJI Hideaki /* This can be called on a newly created socket, from other files */
908a915da9bSEric Dumazet int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
909a915da9bSEric Dumazet 		   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
910cfb6eeb4SYOSHIFUJI Hideaki {
911cfb6eeb4SYOSHIFUJI Hideaki 	/* Add Key to the list */
912b0a713e9SMatthias M. Dellweg 	struct tcp_md5sig_key *key;
913cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
914f6685938SArnaldo Carvalho de Melo 	struct tcp_md5sig_info *md5sig;
915f6685938SArnaldo Carvalho de Melo 
916c0353c7bSAydin Arik 	key = tcp_md5_do_lookup(sk, addr, family);
917a915da9bSEric Dumazet 	if (key) {
918a915da9bSEric Dumazet 		/* Pre-existing entry - just update that one. */
919a915da9bSEric Dumazet 		memcpy(key->key, newkey, newkeylen);
920a915da9bSEric Dumazet 		key->keylen = newkeylen;
921a915da9bSEric Dumazet 		return 0;
922cfb6eeb4SYOSHIFUJI Hideaki 	}
923260fcbebSYan, Zheng 
924a8afca03SEric Dumazet 	md5sig = rcu_dereference_protected(tp->md5sig_info,
925a8afca03SEric Dumazet 					   sock_owned_by_user(sk));
926a915da9bSEric Dumazet 	if (!md5sig) {
927a915da9bSEric Dumazet 		md5sig = kmalloc(sizeof(*md5sig), gfp);
928a915da9bSEric Dumazet 		if (!md5sig)
929a915da9bSEric Dumazet 			return -ENOMEM;
930a915da9bSEric Dumazet 
931a915da9bSEric Dumazet 		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
932a915da9bSEric Dumazet 		INIT_HLIST_HEAD(&md5sig->head);
933a8afca03SEric Dumazet 		rcu_assign_pointer(tp->md5sig_info, md5sig);
934a915da9bSEric Dumazet 	}
935a915da9bSEric Dumazet 
9365f3d9cb2SEric Dumazet 	key = sock_kmalloc(sk, sizeof(*key), gfp);
937a915da9bSEric Dumazet 	if (!key)
938a915da9bSEric Dumazet 		return -ENOMEM;
93971cea17eSEric Dumazet 	if (!tcp_alloc_md5sig_pool()) {
9405f3d9cb2SEric Dumazet 		sock_kfree_s(sk, key, sizeof(*key));
941cfb6eeb4SYOSHIFUJI Hideaki 		return -ENOMEM;
942cfb6eeb4SYOSHIFUJI Hideaki 	}
943f6685938SArnaldo Carvalho de Melo 
944a915da9bSEric Dumazet 	memcpy(key->key, newkey, newkeylen);
945a915da9bSEric Dumazet 	key->keylen = newkeylen;
946a915da9bSEric Dumazet 	key->family = family;
947a915da9bSEric Dumazet 	memcpy(&key->addr, addr,
948a915da9bSEric Dumazet 	       (family == AF_INET6) ? sizeof(struct in6_addr) :
949a915da9bSEric Dumazet 				      sizeof(struct in_addr));
950a915da9bSEric Dumazet 	hlist_add_head_rcu(&key->node, &md5sig->head);
951cfb6eeb4SYOSHIFUJI Hideaki 	return 0;
952cfb6eeb4SYOSHIFUJI Hideaki }
953a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_add);
954cfb6eeb4SYOSHIFUJI Hideaki 
955a915da9bSEric Dumazet int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
956cfb6eeb4SYOSHIFUJI Hideaki {
957a915da9bSEric Dumazet 	struct tcp_md5sig_key *key;
958cfb6eeb4SYOSHIFUJI Hideaki 
959c0353c7bSAydin Arik 	key = tcp_md5_do_lookup(sk, addr, family);
960a915da9bSEric Dumazet 	if (!key)
961cfb6eeb4SYOSHIFUJI Hideaki 		return -ENOENT;
962a915da9bSEric Dumazet 	hlist_del_rcu(&key->node);
9635f3d9cb2SEric Dumazet 	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
964a915da9bSEric Dumazet 	kfree_rcu(key, rcu);
965a915da9bSEric Dumazet 	return 0;
966cfb6eeb4SYOSHIFUJI Hideaki }
967a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_del);
968cfb6eeb4SYOSHIFUJI Hideaki 
969e0683e70Sstephen hemminger static void tcp_clear_md5_list(struct sock *sk)
970cfb6eeb4SYOSHIFUJI Hideaki {
971cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_sock *tp = tcp_sk(sk);
972a915da9bSEric Dumazet 	struct tcp_md5sig_key *key;
973b67bfe0dSSasha Levin 	struct hlist_node *n;
974a8afca03SEric Dumazet 	struct tcp_md5sig_info *md5sig;
975cfb6eeb4SYOSHIFUJI Hideaki 
976a8afca03SEric Dumazet 	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
977a8afca03SEric Dumazet 
978b67bfe0dSSasha Levin 	hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
979a915da9bSEric Dumazet 		hlist_del_rcu(&key->node);
9805f3d9cb2SEric Dumazet 		atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
981a915da9bSEric Dumazet 		kfree_rcu(key, rcu);
982cfb6eeb4SYOSHIFUJI Hideaki 	}
983cfb6eeb4SYOSHIFUJI Hideaki }
984cfb6eeb4SYOSHIFUJI Hideaki 
985cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
986cfb6eeb4SYOSHIFUJI Hideaki 				 int optlen)
987cfb6eeb4SYOSHIFUJI Hideaki {
988cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig cmd;
989cfb6eeb4SYOSHIFUJI Hideaki 	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
990cfb6eeb4SYOSHIFUJI Hideaki 
991cfb6eeb4SYOSHIFUJI Hideaki 	if (optlen < sizeof(cmd))
992cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
993cfb6eeb4SYOSHIFUJI Hideaki 
994cfb6eeb4SYOSHIFUJI Hideaki 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
995cfb6eeb4SYOSHIFUJI Hideaki 		return -EFAULT;
996cfb6eeb4SYOSHIFUJI Hideaki 
997cfb6eeb4SYOSHIFUJI Hideaki 	if (sin->sin_family != AF_INET)
998cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
999cfb6eeb4SYOSHIFUJI Hideaki 
100064a124edSDmitry Popov 	if (!cmd.tcpm_keylen)
1001a915da9bSEric Dumazet 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1002a915da9bSEric Dumazet 				      AF_INET);
1003cfb6eeb4SYOSHIFUJI Hideaki 
1004cfb6eeb4SYOSHIFUJI Hideaki 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1005cfb6eeb4SYOSHIFUJI Hideaki 		return -EINVAL;
1006cfb6eeb4SYOSHIFUJI Hideaki 
1007a915da9bSEric Dumazet 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1008a915da9bSEric Dumazet 			      AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1009a915da9bSEric Dumazet 			      GFP_KERNEL);
1010cfb6eeb4SYOSHIFUJI Hideaki }
1011cfb6eeb4SYOSHIFUJI Hideaki 
101249a72dfbSAdam Langley static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
101349a72dfbSAdam Langley 					__be32 daddr, __be32 saddr, int nbytes)
1014cfb6eeb4SYOSHIFUJI Hideaki {
1015cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp4_pseudohdr *bp;
101649a72dfbSAdam Langley 	struct scatterlist sg;
1017cfb6eeb4SYOSHIFUJI Hideaki 
1018cfb6eeb4SYOSHIFUJI Hideaki 	bp = &hp->md5_blk.ip4;
1019cfb6eeb4SYOSHIFUJI Hideaki 
1020cfb6eeb4SYOSHIFUJI Hideaki 	/*
102149a72dfbSAdam Langley 	 * 1. the TCP pseudo-header (in the order: source IP address,
1022cfb6eeb4SYOSHIFUJI Hideaki 	 * destination IP address, zero-padded protocol number, and
1023cfb6eeb4SYOSHIFUJI Hideaki 	 * segment length)
1024cfb6eeb4SYOSHIFUJI Hideaki 	 */
1025cfb6eeb4SYOSHIFUJI Hideaki 	bp->saddr = saddr;
1026cfb6eeb4SYOSHIFUJI Hideaki 	bp->daddr = daddr;
1027cfb6eeb4SYOSHIFUJI Hideaki 	bp->pad = 0;
1028076fb722SYOSHIFUJI Hideaki 	bp->protocol = IPPROTO_TCP;
102949a72dfbSAdam Langley 	bp->len = cpu_to_be16(nbytes);
1030c7da57a1SDavid S. Miller 
103149a72dfbSAdam Langley 	sg_init_one(&sg, bp, sizeof(*bp));
103249a72dfbSAdam Langley 	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
103349a72dfbSAdam Langley }
103449a72dfbSAdam Langley 
1035a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1036318cf7aaSEric Dumazet 			       __be32 daddr, __be32 saddr, const struct tcphdr *th)
103749a72dfbSAdam Langley {
103849a72dfbSAdam Langley 	struct tcp_md5sig_pool *hp;
103949a72dfbSAdam Langley 	struct hash_desc *desc;
104049a72dfbSAdam Langley 
104149a72dfbSAdam Langley 	hp = tcp_get_md5sig_pool();
104249a72dfbSAdam Langley 	if (!hp)
104349a72dfbSAdam Langley 		goto clear_hash_noput;
104449a72dfbSAdam Langley 	desc = &hp->md5_desc;
104549a72dfbSAdam Langley 
104649a72dfbSAdam Langley 	if (crypto_hash_init(desc))
104749a72dfbSAdam Langley 		goto clear_hash;
104849a72dfbSAdam Langley 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
104949a72dfbSAdam Langley 		goto clear_hash;
105049a72dfbSAdam Langley 	if (tcp_md5_hash_header(hp, th))
105149a72dfbSAdam Langley 		goto clear_hash;
105249a72dfbSAdam Langley 	if (tcp_md5_hash_key(hp, key))
105349a72dfbSAdam Langley 		goto clear_hash;
105449a72dfbSAdam Langley 	if (crypto_hash_final(desc, md5_hash))
1055cfb6eeb4SYOSHIFUJI Hideaki 		goto clear_hash;
1056cfb6eeb4SYOSHIFUJI Hideaki 
1057cfb6eeb4SYOSHIFUJI Hideaki 	tcp_put_md5sig_pool();
1058cfb6eeb4SYOSHIFUJI Hideaki 	return 0;
105949a72dfbSAdam Langley 
1060cfb6eeb4SYOSHIFUJI Hideaki clear_hash:
1061cfb6eeb4SYOSHIFUJI Hideaki 	tcp_put_md5sig_pool();
1062cfb6eeb4SYOSHIFUJI Hideaki clear_hash_noput:
1063cfb6eeb4SYOSHIFUJI Hideaki 	memset(md5_hash, 0, 16);
106449a72dfbSAdam Langley 	return 1;
1065cfb6eeb4SYOSHIFUJI Hideaki }
1066cfb6eeb4SYOSHIFUJI Hideaki 
106739f8e58eSEric Dumazet int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
106839f8e58eSEric Dumazet 			const struct sock *sk,
1069318cf7aaSEric Dumazet 			const struct sk_buff *skb)
1070cfb6eeb4SYOSHIFUJI Hideaki {
107149a72dfbSAdam Langley 	struct tcp_md5sig_pool *hp;
107249a72dfbSAdam Langley 	struct hash_desc *desc;
1073318cf7aaSEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
1074cfb6eeb4SYOSHIFUJI Hideaki 	__be32 saddr, daddr;
1075cfb6eeb4SYOSHIFUJI Hideaki 
107639f8e58eSEric Dumazet 	if (sk) { /* valid for establish/request sockets */
107739f8e58eSEric Dumazet 		saddr = sk->sk_rcv_saddr;
107839f8e58eSEric Dumazet 		daddr = sk->sk_daddr;
1079cfb6eeb4SYOSHIFUJI Hideaki 	} else {
108049a72dfbSAdam Langley 		const struct iphdr *iph = ip_hdr(skb);
108149a72dfbSAdam Langley 		saddr = iph->saddr;
108249a72dfbSAdam Langley 		daddr = iph->daddr;
1083cfb6eeb4SYOSHIFUJI Hideaki 	}
1084cfb6eeb4SYOSHIFUJI Hideaki 
108549a72dfbSAdam Langley 	hp = tcp_get_md5sig_pool();
108649a72dfbSAdam Langley 	if (!hp)
108749a72dfbSAdam Langley 		goto clear_hash_noput;
108849a72dfbSAdam Langley 	desc = &hp->md5_desc;
108949a72dfbSAdam Langley 
109049a72dfbSAdam Langley 	if (crypto_hash_init(desc))
109149a72dfbSAdam Langley 		goto clear_hash;
109249a72dfbSAdam Langley 
109349a72dfbSAdam Langley 	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
109449a72dfbSAdam Langley 		goto clear_hash;
109549a72dfbSAdam Langley 	if (tcp_md5_hash_header(hp, th))
109649a72dfbSAdam Langley 		goto clear_hash;
109749a72dfbSAdam Langley 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
109849a72dfbSAdam Langley 		goto clear_hash;
109949a72dfbSAdam Langley 	if (tcp_md5_hash_key(hp, key))
110049a72dfbSAdam Langley 		goto clear_hash;
110149a72dfbSAdam Langley 	if (crypto_hash_final(desc, md5_hash))
110249a72dfbSAdam Langley 		goto clear_hash;
110349a72dfbSAdam Langley 
110449a72dfbSAdam Langley 	tcp_put_md5sig_pool();
110549a72dfbSAdam Langley 	return 0;
110649a72dfbSAdam Langley 
110749a72dfbSAdam Langley clear_hash:
110849a72dfbSAdam Langley 	tcp_put_md5sig_pool();
110949a72dfbSAdam Langley clear_hash_noput:
111049a72dfbSAdam Langley 	memset(md5_hash, 0, 16);
111149a72dfbSAdam Langley 	return 1;
111249a72dfbSAdam Langley }
111349a72dfbSAdam Langley EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1114cfb6eeb4SYOSHIFUJI Hideaki 
1115ff74e23fSEric Dumazet /* Called with rcu_read_lock() */
1116ff74e23fSEric Dumazet static bool tcp_v4_inbound_md5_hash(struct sock *sk,
11179ea88a15SDmitry Popov 				    const struct sk_buff *skb)
1118cfb6eeb4SYOSHIFUJI Hideaki {
1119cfb6eeb4SYOSHIFUJI Hideaki 	/*
1120cfb6eeb4SYOSHIFUJI Hideaki 	 * This gets called for each TCP segment that arrives
1121cfb6eeb4SYOSHIFUJI Hideaki 	 * so we want to be efficient.
1122cfb6eeb4SYOSHIFUJI Hideaki 	 * We have 3 drop cases:
1123cfb6eeb4SYOSHIFUJI Hideaki 	 * o No MD5 hash and one expected.
1124cfb6eeb4SYOSHIFUJI Hideaki 	 * o MD5 hash and we're not expecting one.
1125cfb6eeb4SYOSHIFUJI Hideaki 	 * o MD5 hash and its wrong.
1126cfb6eeb4SYOSHIFUJI Hideaki 	 */
1127cf533ea5SEric Dumazet 	const __u8 *hash_location = NULL;
1128cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *hash_expected;
1129eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
1130cf533ea5SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
1131cfb6eeb4SYOSHIFUJI Hideaki 	int genhash;
1132cfb6eeb4SYOSHIFUJI Hideaki 	unsigned char newhash[16];
1133cfb6eeb4SYOSHIFUJI Hideaki 
1134a915da9bSEric Dumazet 	hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1135a915da9bSEric Dumazet 					  AF_INET);
11367d5d5525SYOSHIFUJI Hideaki 	hash_location = tcp_parse_md5sig_option(th);
1137cfb6eeb4SYOSHIFUJI Hideaki 
1138cfb6eeb4SYOSHIFUJI Hideaki 	/* We've parsed the options - do we have a hash? */
1139cfb6eeb4SYOSHIFUJI Hideaki 	if (!hash_expected && !hash_location)
1140a2a385d6SEric Dumazet 		return false;
1141cfb6eeb4SYOSHIFUJI Hideaki 
1142cfb6eeb4SYOSHIFUJI Hideaki 	if (hash_expected && !hash_location) {
1143785957d3SDavid S. Miller 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1144a2a385d6SEric Dumazet 		return true;
1145cfb6eeb4SYOSHIFUJI Hideaki 	}
1146cfb6eeb4SYOSHIFUJI Hideaki 
1147cfb6eeb4SYOSHIFUJI Hideaki 	if (!hash_expected && hash_location) {
1148785957d3SDavid S. Miller 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1149a2a385d6SEric Dumazet 		return true;
1150cfb6eeb4SYOSHIFUJI Hideaki 	}
1151cfb6eeb4SYOSHIFUJI Hideaki 
1152cfb6eeb4SYOSHIFUJI Hideaki 	/* Okay, so this is hash_expected and hash_location -
1153cfb6eeb4SYOSHIFUJI Hideaki 	 * so we need to calculate the checksum.
1154cfb6eeb4SYOSHIFUJI Hideaki 	 */
115549a72dfbSAdam Langley 	genhash = tcp_v4_md5_hash_skb(newhash,
1156cfb6eeb4SYOSHIFUJI Hideaki 				      hash_expected,
115739f8e58eSEric Dumazet 				      NULL, skb);
1158cfb6eeb4SYOSHIFUJI Hideaki 
1159cfb6eeb4SYOSHIFUJI Hideaki 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1160e87cc472SJoe Perches 		net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1161673d57e7SHarvey Harrison 				     &iph->saddr, ntohs(th->source),
1162673d57e7SHarvey Harrison 				     &iph->daddr, ntohs(th->dest),
1163e87cc472SJoe Perches 				     genhash ? " tcp_v4_calc_md5_hash failed"
1164e87cc472SJoe Perches 				     : "");
1165a2a385d6SEric Dumazet 		return true;
1166cfb6eeb4SYOSHIFUJI Hideaki 	}
1167a2a385d6SEric Dumazet 	return false;
1168cfb6eeb4SYOSHIFUJI Hideaki }
1169cfb6eeb4SYOSHIFUJI Hideaki #endif
1170cfb6eeb4SYOSHIFUJI Hideaki 
117108d2cc3bSEric Dumazet static void tcp_v4_init_req(struct request_sock *req, struct sock *sk_listener,
117216bea70aSOctavian Purdila 			    struct sk_buff *skb)
117316bea70aSOctavian Purdila {
117416bea70aSOctavian Purdila 	struct inet_request_sock *ireq = inet_rsk(req);
117516bea70aSOctavian Purdila 
117608d2cc3bSEric Dumazet 	sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
117708d2cc3bSEric Dumazet 	sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
117808d2cc3bSEric Dumazet 	ireq->no_srccheck = inet_sk(sk_listener)->transparent;
117916bea70aSOctavian Purdila 	ireq->opt = tcp_v4_save_options(skb);
118016bea70aSOctavian Purdila }
118116bea70aSOctavian Purdila 
1182d94e0417SOctavian Purdila static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl,
1183d94e0417SOctavian Purdila 					  const struct request_sock *req,
1184d94e0417SOctavian Purdila 					  bool *strict)
1185d94e0417SOctavian Purdila {
1186d94e0417SOctavian Purdila 	struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
1187d94e0417SOctavian Purdila 
1188d94e0417SOctavian Purdila 	if (strict) {
1189d94e0417SOctavian Purdila 		if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
1190d94e0417SOctavian Purdila 			*strict = true;
1191d94e0417SOctavian Purdila 		else
1192d94e0417SOctavian Purdila 			*strict = false;
1193d94e0417SOctavian Purdila 	}
1194d94e0417SOctavian Purdila 
1195d94e0417SOctavian Purdila 	return dst;
1196d94e0417SOctavian Purdila }
1197d94e0417SOctavian Purdila 
119872a3effaSEric Dumazet struct request_sock_ops tcp_request_sock_ops __read_mostly = {
11991da177e4SLinus Torvalds 	.family		=	PF_INET,
12002e6599cbSArnaldo Carvalho de Melo 	.obj_size	=	sizeof(struct tcp_request_sock),
12015db92c99SOctavian Purdila 	.rtx_syn_ack	=	tcp_rtx_synack,
120260236fddSArnaldo Carvalho de Melo 	.send_ack	=	tcp_v4_reqsk_send_ack,
120360236fddSArnaldo Carvalho de Melo 	.destructor	=	tcp_v4_reqsk_destructor,
12041da177e4SLinus Torvalds 	.send_reset	=	tcp_v4_send_reset,
120572659eccSOctavian Purdila 	.syn_ack_timeout =	tcp_syn_ack_timeout,
12061da177e4SLinus Torvalds };
12071da177e4SLinus Torvalds 
1208b2e4b3deSStephen Hemminger static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
12092aec4a29SOctavian Purdila 	.mss_clamp	=	TCP_MSS_DEFAULT,
121016bea70aSOctavian Purdila #ifdef CONFIG_TCP_MD5SIG
1211fd3a154aSEric Dumazet 	.req_md5_lookup	=	tcp_v4_md5_lookup,
1212e3afe7b7SJohn Dykstra 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1213b6332e6cSAndrew Morton #endif
121416bea70aSOctavian Purdila 	.init_req	=	tcp_v4_init_req,
1215fb7b37a7SOctavian Purdila #ifdef CONFIG_SYN_COOKIES
1216fb7b37a7SOctavian Purdila 	.cookie_init_seq =	cookie_v4_init_sequence,
1217fb7b37a7SOctavian Purdila #endif
1218d94e0417SOctavian Purdila 	.route_req	=	tcp_v4_route_req,
1219936b8bdbSOctavian Purdila 	.init_seq	=	tcp_v4_init_sequence,
1220d6274bd8SOctavian Purdila 	.send_synack	=	tcp_v4_send_synack,
1221695da14eSOctavian Purdila 	.queue_hash_add =	inet_csk_reqsk_queue_hash_add,
122216bea70aSOctavian Purdila };
1223cfb6eeb4SYOSHIFUJI Hideaki 
12241da177e4SLinus Torvalds int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
12251da177e4SLinus Torvalds {
12261da177e4SLinus Torvalds 	/* Never answer to SYNs send to broadcast or multicast */
1227511c3f92SEric Dumazet 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
12281da177e4SLinus Torvalds 		goto drop;
12291da177e4SLinus Torvalds 
12301fb6f159SOctavian Purdila 	return tcp_conn_request(&tcp_request_sock_ops,
12311fb6f159SOctavian Purdila 				&tcp_request_sock_ipv4_ops, sk, skb);
12321da177e4SLinus Torvalds 
12331da177e4SLinus Torvalds drop:
1234848bf15fSVijay Subramanian 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
12351da177e4SLinus Torvalds 	return 0;
12361da177e4SLinus Torvalds }
12374bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_conn_request);
12381da177e4SLinus Torvalds 
12391da177e4SLinus Torvalds 
12401da177e4SLinus Torvalds /*
12411da177e4SLinus Torvalds  * The three way handshake has completed - we got a valid synack -
12421da177e4SLinus Torvalds  * now create the new socket.
12431da177e4SLinus Torvalds  */
12441da177e4SLinus Torvalds struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
124560236fddSArnaldo Carvalho de Melo 				  struct request_sock *req,
12461da177e4SLinus Torvalds 				  struct dst_entry *dst)
12471da177e4SLinus Torvalds {
12482e6599cbSArnaldo Carvalho de Melo 	struct inet_request_sock *ireq;
12491da177e4SLinus Torvalds 	struct inet_sock *newinet;
12501da177e4SLinus Torvalds 	struct tcp_sock *newtp;
12511da177e4SLinus Torvalds 	struct sock *newsk;
1252cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1253cfb6eeb4SYOSHIFUJI Hideaki 	struct tcp_md5sig_key *key;
1254cfb6eeb4SYOSHIFUJI Hideaki #endif
1255f6d8bd05SEric Dumazet 	struct ip_options_rcu *inet_opt;
12561da177e4SLinus Torvalds 
12571da177e4SLinus Torvalds 	if (sk_acceptq_is_full(sk))
12581da177e4SLinus Torvalds 		goto exit_overflow;
12591da177e4SLinus Torvalds 
12601da177e4SLinus Torvalds 	newsk = tcp_create_openreq_child(sk, req, skb);
12611da177e4SLinus Torvalds 	if (!newsk)
1262093d2823SBalazs Scheidler 		goto exit_nonewsk;
12631da177e4SLinus Torvalds 
1264bcd76111SHerbert Xu 	newsk->sk_gso_type = SKB_GSO_TCPV4;
1265fae6ef87SNeal Cardwell 	inet_sk_rx_dst_set(newsk, skb);
12661da177e4SLinus Torvalds 
12671da177e4SLinus Torvalds 	newtp		      = tcp_sk(newsk);
12681da177e4SLinus Torvalds 	newinet		      = inet_sk(newsk);
12692e6599cbSArnaldo Carvalho de Melo 	ireq		      = inet_rsk(req);
1270d1e559d0SEric Dumazet 	sk_daddr_set(newsk, ireq->ir_rmt_addr);
1271d1e559d0SEric Dumazet 	sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
1272634fb979SEric Dumazet 	newinet->inet_saddr	      = ireq->ir_loc_addr;
1273f6d8bd05SEric Dumazet 	inet_opt	      = ireq->opt;
1274f6d8bd05SEric Dumazet 	rcu_assign_pointer(newinet->inet_opt, inet_opt);
12752e6599cbSArnaldo Carvalho de Melo 	ireq->opt	      = NULL;
1276463c84b9SArnaldo Carvalho de Melo 	newinet->mc_index     = inet_iif(skb);
1277eddc9ec5SArnaldo Carvalho de Melo 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
12784c507d28SJiri Benc 	newinet->rcv_tos      = ip_hdr(skb)->tos;
1279d83d8461SArnaldo Carvalho de Melo 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1280b73c3d0eSTom Herbert 	inet_set_txhash(newsk);
1281f6d8bd05SEric Dumazet 	if (inet_opt)
1282f6d8bd05SEric Dumazet 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1283c720c7e8SEric Dumazet 	newinet->inet_id = newtp->write_seq ^ jiffies;
12841da177e4SLinus Torvalds 
1285dfd25fffSEric Dumazet 	if (!dst) {
1286dfd25fffSEric Dumazet 		dst = inet_csk_route_child_sock(sk, newsk, req);
1287dfd25fffSEric Dumazet 		if (!dst)
12880e734419SDavid S. Miller 			goto put_and_exit;
1289dfd25fffSEric Dumazet 	} else {
1290dfd25fffSEric Dumazet 		/* syncookie case : see end of cookie_v4_check() */
1291dfd25fffSEric Dumazet 	}
12920e734419SDavid S. Miller 	sk_setup_caps(newsk, dst);
12930e734419SDavid S. Miller 
129481164413SDaniel Borkmann 	tcp_ca_openreq_child(newsk, dst);
129581164413SDaniel Borkmann 
12961da177e4SLinus Torvalds 	tcp_sync_mss(newsk, dst_mtu(dst));
12970dbaee3bSDavid S. Miller 	newtp->advmss = dst_metric_advmss(dst);
1298f5fff5dcSTom Quetchenbach 	if (tcp_sk(sk)->rx_opt.user_mss &&
1299f5fff5dcSTom Quetchenbach 	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1300f5fff5dcSTom Quetchenbach 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1301f5fff5dcSTom Quetchenbach 
13021da177e4SLinus Torvalds 	tcp_initialize_rcv_mss(newsk);
13031da177e4SLinus Torvalds 
1304cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1305cfb6eeb4SYOSHIFUJI Hideaki 	/* Copy over the MD5 key from the original socket */
1306a915da9bSEric Dumazet 	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1307a915da9bSEric Dumazet 				AF_INET);
130800db4124SIan Morris 	if (key) {
1309cfb6eeb4SYOSHIFUJI Hideaki 		/*
1310cfb6eeb4SYOSHIFUJI Hideaki 		 * We're using one, so create a matching key
1311cfb6eeb4SYOSHIFUJI Hideaki 		 * on the newsk structure. If we fail to get
1312cfb6eeb4SYOSHIFUJI Hideaki 		 * memory, then we end up not copying the key
1313cfb6eeb4SYOSHIFUJI Hideaki 		 * across. Shucks.
1314cfb6eeb4SYOSHIFUJI Hideaki 		 */
1315a915da9bSEric Dumazet 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1316a915da9bSEric Dumazet 			       AF_INET, key->key, key->keylen, GFP_ATOMIC);
1317a465419bSEric Dumazet 		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1318cfb6eeb4SYOSHIFUJI Hideaki 	}
1319cfb6eeb4SYOSHIFUJI Hideaki #endif
1320cfb6eeb4SYOSHIFUJI Hideaki 
13210e734419SDavid S. Miller 	if (__inet_inherit_port(sk, newsk) < 0)
13220e734419SDavid S. Miller 		goto put_and_exit;
13239327f705SEric Dumazet 	__inet_hash_nolisten(newsk, NULL);
13241da177e4SLinus Torvalds 
13251da177e4SLinus Torvalds 	return newsk;
13261da177e4SLinus Torvalds 
13271da177e4SLinus Torvalds exit_overflow:
1328de0744afSPavel Emelyanov 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1329093d2823SBalazs Scheidler exit_nonewsk:
1330093d2823SBalazs Scheidler 	dst_release(dst);
13311da177e4SLinus Torvalds exit:
1332de0744afSPavel Emelyanov 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
13331da177e4SLinus Torvalds 	return NULL;
13340e734419SDavid S. Miller put_and_exit:
1335e337e24dSChristoph Paasch 	inet_csk_prepare_forced_close(newsk);
1336e337e24dSChristoph Paasch 	tcp_done(newsk);
13370e734419SDavid S. Miller 	goto exit;
13381da177e4SLinus Torvalds }
13394bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
13401da177e4SLinus Torvalds 
13411da177e4SLinus Torvalds static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
13421da177e4SLinus Torvalds {
134352452c54SEric Dumazet 	const struct tcphdr *th = tcp_hdr(skb);
1344eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
134552452c54SEric Dumazet 	struct request_sock *req;
13461da177e4SLinus Torvalds 	struct sock *nsk;
134752452c54SEric Dumazet 
134852452c54SEric Dumazet 	req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr);
1349fa76ce73SEric Dumazet 	if (req) {
1350fa76ce73SEric Dumazet 		nsk = tcp_check_req(sk, skb, req, false);
1351b357a364SEric Dumazet 		if (!nsk)
1352fa76ce73SEric Dumazet 			reqsk_put(req);
1353fa76ce73SEric Dumazet 		return nsk;
1354fa76ce73SEric Dumazet 	}
13551da177e4SLinus Torvalds 
13563b1e0a65SYOSHIFUJI Hideaki 	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1357c67499c0SPavel Emelyanov 			th->source, iph->daddr, th->dest, inet_iif(skb));
13581da177e4SLinus Torvalds 
13591da177e4SLinus Torvalds 	if (nsk) {
13601da177e4SLinus Torvalds 		if (nsk->sk_state != TCP_TIME_WAIT) {
13611da177e4SLinus Torvalds 			bh_lock_sock(nsk);
13621da177e4SLinus Torvalds 			return nsk;
13631da177e4SLinus Torvalds 		}
13649469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(nsk));
13651da177e4SLinus Torvalds 		return NULL;
13661da177e4SLinus Torvalds 	}
13671da177e4SLinus Torvalds 
13681da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES
1369af9b4738SFlorian Westphal 	if (!th->syn)
1370461b74c3SCong Wang 		sk = cookie_v4_check(sk, skb);
13711da177e4SLinus Torvalds #endif
13721da177e4SLinus Torvalds 	return sk;
13731da177e4SLinus Torvalds }
13741da177e4SLinus Torvalds 
13751da177e4SLinus Torvalds /* The socket must have it's spinlock held when we get
13761da177e4SLinus Torvalds  * here.
13771da177e4SLinus Torvalds  *
13781da177e4SLinus Torvalds  * We have a potential double-lock case here, so even when
13791da177e4SLinus Torvalds  * doing backlog processing we use the BH locking scheme.
13801da177e4SLinus Torvalds  * This is because we cannot sleep with the original spinlock
13811da177e4SLinus Torvalds  * held.
13821da177e4SLinus Torvalds  */
13831da177e4SLinus Torvalds int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
13841da177e4SLinus Torvalds {
1385cfb6eeb4SYOSHIFUJI Hideaki 	struct sock *rsk;
1386cfb6eeb4SYOSHIFUJI Hideaki 
13871da177e4SLinus Torvalds 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
138892101b3bSDavid S. Miller 		struct dst_entry *dst = sk->sk_rx_dst;
1389404e0a8bSEric Dumazet 
1390404e0a8bSEric Dumazet 		sock_rps_save_rxhash(sk, skb);
13913d97379aSEric Dumazet 		sk_mark_napi_id(sk, skb);
1392404e0a8bSEric Dumazet 		if (dst) {
1393505fbcf0SEric Dumazet 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
139451456b29SIan Morris 			    !dst->ops->check(dst, 0)) {
139592101b3bSDavid S. Miller 				dst_release(dst);
139692101b3bSDavid S. Miller 				sk->sk_rx_dst = NULL;
139792101b3bSDavid S. Miller 			}
139892101b3bSDavid S. Miller 		}
1399c995ae22SVijay Subramanian 		tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
14001da177e4SLinus Torvalds 		return 0;
14011da177e4SLinus Torvalds 	}
14021da177e4SLinus Torvalds 
1403ab6a5bb6SArnaldo Carvalho de Melo 	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
14041da177e4SLinus Torvalds 		goto csum_err;
14051da177e4SLinus Torvalds 
14061da177e4SLinus Torvalds 	if (sk->sk_state == TCP_LISTEN) {
14071da177e4SLinus Torvalds 		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
14081da177e4SLinus Torvalds 		if (!nsk)
14091da177e4SLinus Torvalds 			goto discard;
14101da177e4SLinus Torvalds 
14111da177e4SLinus Torvalds 		if (nsk != sk) {
1412bdeab991STom Herbert 			sock_rps_save_rxhash(nsk, skb);
14133d97379aSEric Dumazet 			sk_mark_napi_id(sk, skb);
1414cfb6eeb4SYOSHIFUJI Hideaki 			if (tcp_child_process(sk, nsk, skb)) {
1415cfb6eeb4SYOSHIFUJI Hideaki 				rsk = nsk;
14161da177e4SLinus Torvalds 				goto reset;
1417cfb6eeb4SYOSHIFUJI Hideaki 			}
14181da177e4SLinus Torvalds 			return 0;
14191da177e4SLinus Torvalds 		}
1420ca55158cSEric Dumazet 	} else
1421bdeab991STom Herbert 		sock_rps_save_rxhash(sk, skb);
1422ca55158cSEric Dumazet 
1423aa8223c7SArnaldo Carvalho de Melo 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1424cfb6eeb4SYOSHIFUJI Hideaki 		rsk = sk;
14251da177e4SLinus Torvalds 		goto reset;
1426cfb6eeb4SYOSHIFUJI Hideaki 	}
14271da177e4SLinus Torvalds 	return 0;
14281da177e4SLinus Torvalds 
14291da177e4SLinus Torvalds reset:
1430cfb6eeb4SYOSHIFUJI Hideaki 	tcp_v4_send_reset(rsk, skb);
14311da177e4SLinus Torvalds discard:
14321da177e4SLinus Torvalds 	kfree_skb(skb);
14331da177e4SLinus Torvalds 	/* Be careful here. If this function gets more complicated and
14341da177e4SLinus Torvalds 	 * gcc suffers from register pressure on the x86, sk (in %ebx)
14351da177e4SLinus Torvalds 	 * might be destroyed here. This current version compiles correctly,
14361da177e4SLinus Torvalds 	 * but you have been warned.
14371da177e4SLinus Torvalds 	 */
14381da177e4SLinus Torvalds 	return 0;
14391da177e4SLinus Torvalds 
14401da177e4SLinus Torvalds csum_err:
14416a5dc9e5SEric Dumazet 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
144263231bddSPavel Emelyanov 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
14431da177e4SLinus Torvalds 	goto discard;
14441da177e4SLinus Torvalds }
14454bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_do_rcv);
14461da177e4SLinus Torvalds 
1447160eb5a6SDavid S. Miller void tcp_v4_early_demux(struct sk_buff *skb)
144841063e9dSDavid S. Miller {
144941063e9dSDavid S. Miller 	const struct iphdr *iph;
145041063e9dSDavid S. Miller 	const struct tcphdr *th;
145141063e9dSDavid S. Miller 	struct sock *sk;
145241063e9dSDavid S. Miller 
145341063e9dSDavid S. Miller 	if (skb->pkt_type != PACKET_HOST)
1454160eb5a6SDavid S. Miller 		return;
145541063e9dSDavid S. Miller 
145645f00f99SEric Dumazet 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1457160eb5a6SDavid S. Miller 		return;
145841063e9dSDavid S. Miller 
145941063e9dSDavid S. Miller 	iph = ip_hdr(skb);
146045f00f99SEric Dumazet 	th = tcp_hdr(skb);
146141063e9dSDavid S. Miller 
146241063e9dSDavid S. Miller 	if (th->doff < sizeof(struct tcphdr) / 4)
1463160eb5a6SDavid S. Miller 		return;
146441063e9dSDavid S. Miller 
146545f00f99SEric Dumazet 	sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
146641063e9dSDavid S. Miller 				       iph->saddr, th->source,
14677011d085SVijay Subramanian 				       iph->daddr, ntohs(th->dest),
14689cb429d6SEric Dumazet 				       skb->skb_iif);
146941063e9dSDavid S. Miller 	if (sk) {
147041063e9dSDavid S. Miller 		skb->sk = sk;
147141063e9dSDavid S. Miller 		skb->destructor = sock_edemux;
1472f7e4eb03SEric Dumazet 		if (sk_fullsock(sk)) {
1473d0c294c5SMichal Kubeček 			struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1474505fbcf0SEric Dumazet 
147541063e9dSDavid S. Miller 			if (dst)
147641063e9dSDavid S. Miller 				dst = dst_check(dst, 0);
147792101b3bSDavid S. Miller 			if (dst &&
1478505fbcf0SEric Dumazet 			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
147941063e9dSDavid S. Miller 				skb_dst_set_noref(skb, dst);
148041063e9dSDavid S. Miller 		}
148141063e9dSDavid S. Miller 	}
148241063e9dSDavid S. Miller }
148341063e9dSDavid S. Miller 
1484b2fb4f54SEric Dumazet /* Packet is added to VJ-style prequeue for processing in process
1485b2fb4f54SEric Dumazet  * context, if a reader task is waiting. Apparently, this exciting
1486b2fb4f54SEric Dumazet  * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1487b2fb4f54SEric Dumazet  * failed somewhere. Latency? Burstiness? Well, at least now we will
1488b2fb4f54SEric Dumazet  * see, why it failed. 8)8)				  --ANK
1489b2fb4f54SEric Dumazet  *
1490b2fb4f54SEric Dumazet  */
1491b2fb4f54SEric Dumazet bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1492b2fb4f54SEric Dumazet {
1493b2fb4f54SEric Dumazet 	struct tcp_sock *tp = tcp_sk(sk);
1494b2fb4f54SEric Dumazet 
1495b2fb4f54SEric Dumazet 	if (sysctl_tcp_low_latency || !tp->ucopy.task)
1496b2fb4f54SEric Dumazet 		return false;
1497b2fb4f54SEric Dumazet 
1498b2fb4f54SEric Dumazet 	if (skb->len <= tcp_hdrlen(skb) &&
1499b2fb4f54SEric Dumazet 	    skb_queue_len(&tp->ucopy.prequeue) == 0)
1500b2fb4f54SEric Dumazet 		return false;
1501b2fb4f54SEric Dumazet 
1502ca777effSEric Dumazet 	/* Before escaping RCU protected region, we need to take care of skb
1503ca777effSEric Dumazet 	 * dst. Prequeue is only enabled for established sockets.
1504ca777effSEric Dumazet 	 * For such sockets, we might need the skb dst only to set sk->sk_rx_dst
1505ca777effSEric Dumazet 	 * Instead of doing full sk_rx_dst validity here, let's perform
1506ca777effSEric Dumazet 	 * an optimistic check.
1507ca777effSEric Dumazet 	 */
1508ca777effSEric Dumazet 	if (likely(sk->sk_rx_dst))
1509ca777effSEric Dumazet 		skb_dst_drop(skb);
1510ca777effSEric Dumazet 	else
151158717686SDavid S. Miller 		skb_dst_force(skb);
1512ca777effSEric Dumazet 
1513b2fb4f54SEric Dumazet 	__skb_queue_tail(&tp->ucopy.prequeue, skb);
1514b2fb4f54SEric Dumazet 	tp->ucopy.memory += skb->truesize;
1515b2fb4f54SEric Dumazet 	if (tp->ucopy.memory > sk->sk_rcvbuf) {
1516b2fb4f54SEric Dumazet 		struct sk_buff *skb1;
1517b2fb4f54SEric Dumazet 
1518b2fb4f54SEric Dumazet 		BUG_ON(sock_owned_by_user(sk));
1519b2fb4f54SEric Dumazet 
1520b2fb4f54SEric Dumazet 		while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1521b2fb4f54SEric Dumazet 			sk_backlog_rcv(sk, skb1);
1522b2fb4f54SEric Dumazet 			NET_INC_STATS_BH(sock_net(sk),
1523b2fb4f54SEric Dumazet 					 LINUX_MIB_TCPPREQUEUEDROPPED);
1524b2fb4f54SEric Dumazet 		}
1525b2fb4f54SEric Dumazet 
1526b2fb4f54SEric Dumazet 		tp->ucopy.memory = 0;
1527b2fb4f54SEric Dumazet 	} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1528b2fb4f54SEric Dumazet 		wake_up_interruptible_sync_poll(sk_sleep(sk),
1529b2fb4f54SEric Dumazet 					   POLLIN | POLLRDNORM | POLLRDBAND);
1530b2fb4f54SEric Dumazet 		if (!inet_csk_ack_scheduled(sk))
1531b2fb4f54SEric Dumazet 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1532b2fb4f54SEric Dumazet 						  (3 * tcp_rto_min(sk)) / 4,
1533b2fb4f54SEric Dumazet 						  TCP_RTO_MAX);
1534b2fb4f54SEric Dumazet 	}
1535b2fb4f54SEric Dumazet 	return true;
1536b2fb4f54SEric Dumazet }
1537b2fb4f54SEric Dumazet EXPORT_SYMBOL(tcp_prequeue);
1538b2fb4f54SEric Dumazet 
15391da177e4SLinus Torvalds /*
15401da177e4SLinus Torvalds  *	From tcp_input.c
15411da177e4SLinus Torvalds  */
15421da177e4SLinus Torvalds 
15431da177e4SLinus Torvalds int tcp_v4_rcv(struct sk_buff *skb)
15441da177e4SLinus Torvalds {
1545eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph;
1546cf533ea5SEric Dumazet 	const struct tcphdr *th;
15471da177e4SLinus Torvalds 	struct sock *sk;
15481da177e4SLinus Torvalds 	int ret;
1549a86b1e30SPavel Emelyanov 	struct net *net = dev_net(skb->dev);
15501da177e4SLinus Torvalds 
15511da177e4SLinus Torvalds 	if (skb->pkt_type != PACKET_HOST)
15521da177e4SLinus Torvalds 		goto discard_it;
15531da177e4SLinus Torvalds 
15541da177e4SLinus Torvalds 	/* Count it even if it's bad */
155563231bddSPavel Emelyanov 	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
15561da177e4SLinus Torvalds 
15571da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
15581da177e4SLinus Torvalds 		goto discard_it;
15591da177e4SLinus Torvalds 
1560aa8223c7SArnaldo Carvalho de Melo 	th = tcp_hdr(skb);
15611da177e4SLinus Torvalds 
15621da177e4SLinus Torvalds 	if (th->doff < sizeof(struct tcphdr) / 4)
15631da177e4SLinus Torvalds 		goto bad_packet;
15641da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, th->doff * 4))
15651da177e4SLinus Torvalds 		goto discard_it;
15661da177e4SLinus Torvalds 
15671da177e4SLinus Torvalds 	/* An explanation is required here, I think.
15681da177e4SLinus Torvalds 	 * Packet length and doff are validated by header prediction,
1569caa20d9aSStephen Hemminger 	 * provided case of th->doff==0 is eliminated.
15701da177e4SLinus Torvalds 	 * So, we defer the checks. */
1571ed70fcfcSTom Herbert 
1572ed70fcfcSTom Herbert 	if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
15736a5dc9e5SEric Dumazet 		goto csum_error;
15741da177e4SLinus Torvalds 
1575aa8223c7SArnaldo Carvalho de Melo 	th = tcp_hdr(skb);
1576eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
1577971f10ecSEric Dumazet 	/* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
1578971f10ecSEric Dumazet 	 * barrier() makes sure compiler wont play fool^Waliasing games.
1579971f10ecSEric Dumazet 	 */
1580971f10ecSEric Dumazet 	memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1581971f10ecSEric Dumazet 		sizeof(struct inet_skb_parm));
1582971f10ecSEric Dumazet 	barrier();
1583971f10ecSEric Dumazet 
15841da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
15851da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
15861da177e4SLinus Torvalds 				    skb->len - th->doff * 4);
15871da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1588e11ecddfSEric Dumazet 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
158904317dafSEric Dumazet 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1590b82d1bb4SEric Dumazet 	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
15911da177e4SLinus Torvalds 	TCP_SKB_CB(skb)->sacked	 = 0;
15921da177e4SLinus Torvalds 
15939a1f27c4SArnaldo Carvalho de Melo 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
15941da177e4SLinus Torvalds 	if (!sk)
15951da177e4SLinus Torvalds 		goto no_tcp_socket;
15961da177e4SLinus Torvalds 
1597bb134d5dSEric Dumazet process:
1598bb134d5dSEric Dumazet 	if (sk->sk_state == TCP_TIME_WAIT)
1599bb134d5dSEric Dumazet 		goto do_time_wait;
1600bb134d5dSEric Dumazet 
16016cce09f8SEric Dumazet 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
16026cce09f8SEric Dumazet 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1603d218d111SStephen Hemminger 		goto discard_and_relse;
16046cce09f8SEric Dumazet 	}
1605d218d111SStephen Hemminger 
16061da177e4SLinus Torvalds 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
16071da177e4SLinus Torvalds 		goto discard_and_relse;
16089ea88a15SDmitry Popov 
16099ea88a15SDmitry Popov #ifdef CONFIG_TCP_MD5SIG
16109ea88a15SDmitry Popov 	/*
16119ea88a15SDmitry Popov 	 * We really want to reject the packet as early as possible
16129ea88a15SDmitry Popov 	 * if:
16139ea88a15SDmitry Popov 	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
16149ea88a15SDmitry Popov 	 *  o There is an MD5 option and we're not expecting one
16159ea88a15SDmitry Popov 	 */
16169ea88a15SDmitry Popov 	if (tcp_v4_inbound_md5_hash(sk, skb))
16179ea88a15SDmitry Popov 		goto discard_and_relse;
16189ea88a15SDmitry Popov #endif
16199ea88a15SDmitry Popov 
1620b59c2701SPatrick McHardy 	nf_reset(skb);
16211da177e4SLinus Torvalds 
1622fda9ef5dSDmitry Mishin 	if (sk_filter(sk, skb))
16231da177e4SLinus Torvalds 		goto discard_and_relse;
16241da177e4SLinus Torvalds 
16252c8c56e1SEric Dumazet 	sk_incoming_cpu_update(sk);
16261da177e4SLinus Torvalds 	skb->dev = NULL;
16271da177e4SLinus Torvalds 
1628c6366184SIngo Molnar 	bh_lock_sock_nested(sk);
1629*2efd055cSMarcelo Ricardo Leitner 	tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
16301da177e4SLinus Torvalds 	ret = 0;
16311da177e4SLinus Torvalds 	if (!sock_owned_by_user(sk)) {
16321da177e4SLinus Torvalds 		if (!tcp_prequeue(sk, skb))
16331da177e4SLinus Torvalds 			ret = tcp_v4_do_rcv(sk, skb);
1634da882c1fSEric Dumazet 	} else if (unlikely(sk_add_backlog(sk, skb,
1635da882c1fSEric Dumazet 					   sk->sk_rcvbuf + sk->sk_sndbuf))) {
16366b03a53aSZhu Yi 		bh_unlock_sock(sk);
16376cce09f8SEric Dumazet 		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
16386b03a53aSZhu Yi 		goto discard_and_relse;
16396b03a53aSZhu Yi 	}
16401da177e4SLinus Torvalds 	bh_unlock_sock(sk);
16411da177e4SLinus Torvalds 
16421da177e4SLinus Torvalds 	sock_put(sk);
16431da177e4SLinus Torvalds 
16441da177e4SLinus Torvalds 	return ret;
16451da177e4SLinus Torvalds 
16461da177e4SLinus Torvalds no_tcp_socket:
16471da177e4SLinus Torvalds 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
16481da177e4SLinus Torvalds 		goto discard_it;
16491da177e4SLinus Torvalds 
16501da177e4SLinus Torvalds 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
16516a5dc9e5SEric Dumazet csum_error:
16526a5dc9e5SEric Dumazet 		TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
16531da177e4SLinus Torvalds bad_packet:
165463231bddSPavel Emelyanov 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
16551da177e4SLinus Torvalds 	} else {
1656cfb6eeb4SYOSHIFUJI Hideaki 		tcp_v4_send_reset(NULL, skb);
16571da177e4SLinus Torvalds 	}
16581da177e4SLinus Torvalds 
16591da177e4SLinus Torvalds discard_it:
16601da177e4SLinus Torvalds 	/* Discard frame. */
16611da177e4SLinus Torvalds 	kfree_skb(skb);
16621da177e4SLinus Torvalds 	return 0;
16631da177e4SLinus Torvalds 
16641da177e4SLinus Torvalds discard_and_relse:
16651da177e4SLinus Torvalds 	sock_put(sk);
16661da177e4SLinus Torvalds 	goto discard_it;
16671da177e4SLinus Torvalds 
16681da177e4SLinus Torvalds do_time_wait:
16691da177e4SLinus Torvalds 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
16709469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
16711da177e4SLinus Torvalds 		goto discard_it;
16721da177e4SLinus Torvalds 	}
16731da177e4SLinus Torvalds 
16746a5dc9e5SEric Dumazet 	if (skb->len < (th->doff << 2)) {
16759469c7b4SYOSHIFUJI Hideaki 		inet_twsk_put(inet_twsk(sk));
16766a5dc9e5SEric Dumazet 		goto bad_packet;
16776a5dc9e5SEric Dumazet 	}
16786a5dc9e5SEric Dumazet 	if (tcp_checksum_complete(skb)) {
16796a5dc9e5SEric Dumazet 		inet_twsk_put(inet_twsk(sk));
16806a5dc9e5SEric Dumazet 		goto csum_error;
16811da177e4SLinus Torvalds 	}
16829469c7b4SYOSHIFUJI Hideaki 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
16831da177e4SLinus Torvalds 	case TCP_TW_SYN: {
1684c346dca1SYOSHIFUJI Hideaki 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1685c67499c0SPavel Emelyanov 							&tcp_hashinfo,
1686da5e3630STom Herbert 							iph->saddr, th->source,
1687eddc9ec5SArnaldo Carvalho de Melo 							iph->daddr, th->dest,
1688463c84b9SArnaldo Carvalho de Melo 							inet_iif(skb));
16891da177e4SLinus Torvalds 		if (sk2) {
1690789f558cSEric Dumazet 			inet_twsk_deschedule(inet_twsk(sk));
16919469c7b4SYOSHIFUJI Hideaki 			inet_twsk_put(inet_twsk(sk));
16921da177e4SLinus Torvalds 			sk = sk2;
16931da177e4SLinus Torvalds 			goto process;
16941da177e4SLinus Torvalds 		}
16951da177e4SLinus Torvalds 		/* Fall through to ACK */
16961da177e4SLinus Torvalds 	}
16971da177e4SLinus Torvalds 	case TCP_TW_ACK:
16981da177e4SLinus Torvalds 		tcp_v4_timewait_ack(sk, skb);
16991da177e4SLinus Torvalds 		break;
17001da177e4SLinus Torvalds 	case TCP_TW_RST:
17011da177e4SLinus Torvalds 		goto no_tcp_socket;
17021da177e4SLinus Torvalds 	case TCP_TW_SUCCESS:;
17031da177e4SLinus Torvalds 	}
17041da177e4SLinus Torvalds 	goto discard_it;
17051da177e4SLinus Torvalds }
17061da177e4SLinus Torvalds 
1707ccb7c410SDavid S. Miller static struct timewait_sock_ops tcp_timewait_sock_ops = {
1708ccb7c410SDavid S. Miller 	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1709ccb7c410SDavid S. Miller 	.twsk_unique	= tcp_twsk_unique,
1710ccb7c410SDavid S. Miller 	.twsk_destructor= tcp_twsk_destructor,
1711ccb7c410SDavid S. Miller };
17121da177e4SLinus Torvalds 
171363d02d15SEric Dumazet void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
17145d299f3dSEric Dumazet {
17155d299f3dSEric Dumazet 	struct dst_entry *dst = skb_dst(skb);
17165d299f3dSEric Dumazet 
1717ca777effSEric Dumazet 	if (dst) {
17185d299f3dSEric Dumazet 		dst_hold(dst);
17195d299f3dSEric Dumazet 		sk->sk_rx_dst = dst;
17205d299f3dSEric Dumazet 		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
17215d299f3dSEric Dumazet 	}
1722ca777effSEric Dumazet }
172363d02d15SEric Dumazet EXPORT_SYMBOL(inet_sk_rx_dst_set);
17245d299f3dSEric Dumazet 
17253b401a81SStephen Hemminger const struct inet_connection_sock_af_ops ipv4_specific = {
17261da177e4SLinus Torvalds 	.queue_xmit	   = ip_queue_xmit,
17271da177e4SLinus Torvalds 	.send_check	   = tcp_v4_send_check,
172832519f11SArnaldo Carvalho de Melo 	.rebuild_header	   = inet_sk_rebuild_header,
17295d299f3dSEric Dumazet 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
17301da177e4SLinus Torvalds 	.conn_request	   = tcp_v4_conn_request,
17311da177e4SLinus Torvalds 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
17321da177e4SLinus Torvalds 	.net_header_len	   = sizeof(struct iphdr),
17331da177e4SLinus Torvalds 	.setsockopt	   = ip_setsockopt,
17341da177e4SLinus Torvalds 	.getsockopt	   = ip_getsockopt,
1735543d9cfeSArnaldo Carvalho de Melo 	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1736543d9cfeSArnaldo Carvalho de Melo 	.sockaddr_len	   = sizeof(struct sockaddr_in),
1737ab1e0a13SArnaldo Carvalho de Melo 	.bind_conflict	   = inet_csk_bind_conflict,
17383fdadf7dSDmitry Mishin #ifdef CONFIG_COMPAT
17393fdadf7dSDmitry Mishin 	.compat_setsockopt = compat_ip_setsockopt,
17403fdadf7dSDmitry Mishin 	.compat_getsockopt = compat_ip_getsockopt,
17413fdadf7dSDmitry Mishin #endif
17424fab9071SNeal Cardwell 	.mtu_reduced	   = tcp_v4_mtu_reduced,
17431da177e4SLinus Torvalds };
17444bc2f18bSEric Dumazet EXPORT_SYMBOL(ipv4_specific);
17451da177e4SLinus Torvalds 
1746cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1747b2e4b3deSStephen Hemminger static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1748cfb6eeb4SYOSHIFUJI Hideaki 	.md5_lookup		= tcp_v4_md5_lookup,
174949a72dfbSAdam Langley 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
1750cfb6eeb4SYOSHIFUJI Hideaki 	.md5_parse		= tcp_v4_parse_md5_keys,
1751cfb6eeb4SYOSHIFUJI Hideaki };
1752b6332e6cSAndrew Morton #endif
1753cfb6eeb4SYOSHIFUJI Hideaki 
17541da177e4SLinus Torvalds /* NOTE: A lot of things set to zero explicitly by call to
17551da177e4SLinus Torvalds  *       sk_alloc() so need not be done here.
17561da177e4SLinus Torvalds  */
17571da177e4SLinus Torvalds static int tcp_v4_init_sock(struct sock *sk)
17581da177e4SLinus Torvalds {
17596687e988SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
17601da177e4SLinus Torvalds 
1761900f65d3SNeal Cardwell 	tcp_init_sock(sk);
17621da177e4SLinus Torvalds 
17638292a17aSArnaldo Carvalho de Melo 	icsk->icsk_af_ops = &ipv4_specific;
1764900f65d3SNeal Cardwell 
1765cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1766ac807fa8SDavid S. Miller 	tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1767cfb6eeb4SYOSHIFUJI Hideaki #endif
17681da177e4SLinus Torvalds 
17691da177e4SLinus Torvalds 	return 0;
17701da177e4SLinus Torvalds }
17711da177e4SLinus Torvalds 
17727d06b2e0SBrian Haley void tcp_v4_destroy_sock(struct sock *sk)
17731da177e4SLinus Torvalds {
17741da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
17751da177e4SLinus Torvalds 
17761da177e4SLinus Torvalds 	tcp_clear_xmit_timers(sk);
17771da177e4SLinus Torvalds 
17786687e988SArnaldo Carvalho de Melo 	tcp_cleanup_congestion_control(sk);
1779317a76f9SStephen Hemminger 
17801da177e4SLinus Torvalds 	/* Cleanup up the write buffer. */
1781fe067e8aSDavid S. Miller 	tcp_write_queue_purge(sk);
17821da177e4SLinus Torvalds 
17831da177e4SLinus Torvalds 	/* Cleans up our, hopefully empty, out_of_order_queue. */
17841da177e4SLinus Torvalds 	__skb_queue_purge(&tp->out_of_order_queue);
17851da177e4SLinus Torvalds 
1786cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG
1787cfb6eeb4SYOSHIFUJI Hideaki 	/* Clean up the MD5 key list, if any */
1788cfb6eeb4SYOSHIFUJI Hideaki 	if (tp->md5sig_info) {
1789a915da9bSEric Dumazet 		tcp_clear_md5_list(sk);
1790a8afca03SEric Dumazet 		kfree_rcu(tp->md5sig_info, rcu);
1791cfb6eeb4SYOSHIFUJI Hideaki 		tp->md5sig_info = NULL;
1792cfb6eeb4SYOSHIFUJI Hideaki 	}
1793cfb6eeb4SYOSHIFUJI Hideaki #endif
1794cfb6eeb4SYOSHIFUJI Hideaki 
17951da177e4SLinus Torvalds 	/* Clean prequeue, it must be empty really */
17961da177e4SLinus Torvalds 	__skb_queue_purge(&tp->ucopy.prequeue);
17971da177e4SLinus Torvalds 
17981da177e4SLinus Torvalds 	/* Clean up a referenced TCP bind bucket. */
1799463c84b9SArnaldo Carvalho de Melo 	if (inet_csk(sk)->icsk_bind_hash)
1800ab1e0a13SArnaldo Carvalho de Melo 		inet_put_port(sk);
18011da177e4SLinus Torvalds 
180200db4124SIan Morris 	BUG_ON(tp->fastopen_rsk);
1803435cf559SWilliam Allen Simpson 
1804cf60af03SYuchung Cheng 	/* If socket is aborted during connect operation */
1805cf60af03SYuchung Cheng 	tcp_free_fastopen_req(tp);
1806cd8ae852SEric Dumazet 	tcp_saved_syn_free(tp);
1807cf60af03SYuchung Cheng 
1808180d8cd9SGlauber Costa 	sk_sockets_allocated_dec(sk);
1809d1a4c0b3SGlauber Costa 	sock_release_memcg(sk);
18101da177e4SLinus Torvalds }
18111da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_destroy_sock);
18121da177e4SLinus Torvalds 
18131da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
18141da177e4SLinus Torvalds /* Proc filesystem TCP sock list dumping. */
18151da177e4SLinus Torvalds 
1816a8b690f9STom Herbert /*
1817a8b690f9STom Herbert  * Get next listener socket follow cur.  If cur is NULL, get first socket
1818a8b690f9STom Herbert  * starting from bucket given in st->bucket; when st->bucket is zero the
1819a8b690f9STom Herbert  * very first socket in the hash table is returned.
1820a8b690f9STom Herbert  */
18211da177e4SLinus Torvalds static void *listening_get_next(struct seq_file *seq, void *cur)
18221da177e4SLinus Torvalds {
1823463c84b9SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk;
1824c25eb3bfSEric Dumazet 	struct hlist_nulls_node *node;
18251da177e4SLinus Torvalds 	struct sock *sk = cur;
18265caea4eaSEric Dumazet 	struct inet_listen_hashbucket *ilb;
18271da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
1828a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
18291da177e4SLinus Torvalds 
18301da177e4SLinus Torvalds 	if (!sk) {
1831a8b690f9STom Herbert 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
18325caea4eaSEric Dumazet 		spin_lock_bh(&ilb->lock);
1833c25eb3bfSEric Dumazet 		sk = sk_nulls_head(&ilb->head);
1834a8b690f9STom Herbert 		st->offset = 0;
18351da177e4SLinus Torvalds 		goto get_sk;
18361da177e4SLinus Torvalds 	}
18375caea4eaSEric Dumazet 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
18381da177e4SLinus Torvalds 	++st->num;
1839a8b690f9STom Herbert 	++st->offset;
18401da177e4SLinus Torvalds 
18411da177e4SLinus Torvalds 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
184260236fddSArnaldo Carvalho de Melo 		struct request_sock *req = cur;
18431da177e4SLinus Torvalds 
1844463c84b9SArnaldo Carvalho de Melo 		icsk = inet_csk(st->syn_wait_sk);
18451da177e4SLinus Torvalds 		req = req->dl_next;
18461da177e4SLinus Torvalds 		while (1) {
18471da177e4SLinus Torvalds 			while (req) {
1848bdccc4caSDaniel Lezcano 				if (req->rsk_ops->family == st->family) {
18491da177e4SLinus Torvalds 					cur = req;
18501da177e4SLinus Torvalds 					goto out;
18511da177e4SLinus Torvalds 				}
18521da177e4SLinus Torvalds 				req = req->dl_next;
18531da177e4SLinus Torvalds 			}
185472a3effaSEric Dumazet 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
18551da177e4SLinus Torvalds 				break;
18561da177e4SLinus Torvalds get_req:
1857463c84b9SArnaldo Carvalho de Melo 			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
18581da177e4SLinus Torvalds 		}
18591bde5ac4SEric Dumazet 		sk	  = sk_nulls_next(st->syn_wait_sk);
18601da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_LISTENING;
1861b2827053SEric Dumazet 		spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
18621da177e4SLinus Torvalds 	} else {
1863463c84b9SArnaldo Carvalho de Melo 		icsk = inet_csk(sk);
1864b2827053SEric Dumazet 		spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1865463c84b9SArnaldo Carvalho de Melo 		if (reqsk_queue_len(&icsk->icsk_accept_queue))
18661da177e4SLinus Torvalds 			goto start_req;
1867b2827053SEric Dumazet 		spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
18681bde5ac4SEric Dumazet 		sk = sk_nulls_next(sk);
18691da177e4SLinus Torvalds 	}
18701da177e4SLinus Torvalds get_sk:
1871c25eb3bfSEric Dumazet 	sk_nulls_for_each_from(sk, node) {
18728475ef9fSPavel Emelyanov 		if (!net_eq(sock_net(sk), net))
18738475ef9fSPavel Emelyanov 			continue;
18748475ef9fSPavel Emelyanov 		if (sk->sk_family == st->family) {
18751da177e4SLinus Torvalds 			cur = sk;
18761da177e4SLinus Torvalds 			goto out;
18771da177e4SLinus Torvalds 		}
1878463c84b9SArnaldo Carvalho de Melo 		icsk = inet_csk(sk);
1879b2827053SEric Dumazet 		spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1880463c84b9SArnaldo Carvalho de Melo 		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
18811da177e4SLinus Torvalds start_req:
18821da177e4SLinus Torvalds 			st->uid		= sock_i_uid(sk);
18831da177e4SLinus Torvalds 			st->syn_wait_sk = sk;
18841da177e4SLinus Torvalds 			st->state	= TCP_SEQ_STATE_OPENREQ;
18851da177e4SLinus Torvalds 			st->sbucket	= 0;
18861da177e4SLinus Torvalds 			goto get_req;
18871da177e4SLinus Torvalds 		}
1888b2827053SEric Dumazet 		spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
18891da177e4SLinus Torvalds 	}
18905caea4eaSEric Dumazet 	spin_unlock_bh(&ilb->lock);
1891a8b690f9STom Herbert 	st->offset = 0;
18920f7ff927SArnaldo Carvalho de Melo 	if (++st->bucket < INET_LHTABLE_SIZE) {
18935caea4eaSEric Dumazet 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
18945caea4eaSEric Dumazet 		spin_lock_bh(&ilb->lock);
1895c25eb3bfSEric Dumazet 		sk = sk_nulls_head(&ilb->head);
18961da177e4SLinus Torvalds 		goto get_sk;
18971da177e4SLinus Torvalds 	}
18981da177e4SLinus Torvalds 	cur = NULL;
18991da177e4SLinus Torvalds out:
19001da177e4SLinus Torvalds 	return cur;
19011da177e4SLinus Torvalds }
19021da177e4SLinus Torvalds 
19031da177e4SLinus Torvalds static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
19041da177e4SLinus Torvalds {
1905a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
1906a8b690f9STom Herbert 	void *rc;
1907a8b690f9STom Herbert 
1908a8b690f9STom Herbert 	st->bucket = 0;
1909a8b690f9STom Herbert 	st->offset = 0;
1910a8b690f9STom Herbert 	rc = listening_get_next(seq, NULL);
19111da177e4SLinus Torvalds 
19121da177e4SLinus Torvalds 	while (rc && *pos) {
19131da177e4SLinus Torvalds 		rc = listening_get_next(seq, rc);
19141da177e4SLinus Torvalds 		--*pos;
19151da177e4SLinus Torvalds 	}
19161da177e4SLinus Torvalds 	return rc;
19171da177e4SLinus Torvalds }
19181da177e4SLinus Torvalds 
191905dbc7b5SEric Dumazet static inline bool empty_bucket(const struct tcp_iter_state *st)
19206eac5604SAndi Kleen {
192105dbc7b5SEric Dumazet 	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
19226eac5604SAndi Kleen }
19236eac5604SAndi Kleen 
1924a8b690f9STom Herbert /*
1925a8b690f9STom Herbert  * Get first established socket starting from bucket given in st->bucket.
1926a8b690f9STom Herbert  * If st->bucket is zero, the very first socket in the hash is returned.
1927a8b690f9STom Herbert  */
19281da177e4SLinus Torvalds static void *established_get_first(struct seq_file *seq)
19291da177e4SLinus Torvalds {
19301da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
1931a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
19321da177e4SLinus Torvalds 	void *rc = NULL;
19331da177e4SLinus Torvalds 
1934a8b690f9STom Herbert 	st->offset = 0;
1935a8b690f9STom Herbert 	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
19361da177e4SLinus Torvalds 		struct sock *sk;
19373ab5aee7SEric Dumazet 		struct hlist_nulls_node *node;
19389db66bdcSEric Dumazet 		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
19391da177e4SLinus Torvalds 
19406eac5604SAndi Kleen 		/* Lockless fast path for the common case of empty buckets */
19416eac5604SAndi Kleen 		if (empty_bucket(st))
19426eac5604SAndi Kleen 			continue;
19436eac5604SAndi Kleen 
19449db66bdcSEric Dumazet 		spin_lock_bh(lock);
19453ab5aee7SEric Dumazet 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
1946f40c8174SDaniel Lezcano 			if (sk->sk_family != st->family ||
1947878628fbSYOSHIFUJI Hideaki 			    !net_eq(sock_net(sk), net)) {
19481da177e4SLinus Torvalds 				continue;
19491da177e4SLinus Torvalds 			}
19501da177e4SLinus Torvalds 			rc = sk;
19511da177e4SLinus Torvalds 			goto out;
19521da177e4SLinus Torvalds 		}
19539db66bdcSEric Dumazet 		spin_unlock_bh(lock);
19541da177e4SLinus Torvalds 	}
19551da177e4SLinus Torvalds out:
19561da177e4SLinus Torvalds 	return rc;
19571da177e4SLinus Torvalds }
19581da177e4SLinus Torvalds 
19591da177e4SLinus Torvalds static void *established_get_next(struct seq_file *seq, void *cur)
19601da177e4SLinus Torvalds {
19611da177e4SLinus Torvalds 	struct sock *sk = cur;
19623ab5aee7SEric Dumazet 	struct hlist_nulls_node *node;
19631da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
1964a4146b1bSDenis V. Lunev 	struct net *net = seq_file_net(seq);
19651da177e4SLinus Torvalds 
19661da177e4SLinus Torvalds 	++st->num;
1967a8b690f9STom Herbert 	++st->offset;
19681da177e4SLinus Torvalds 
19693ab5aee7SEric Dumazet 	sk = sk_nulls_next(sk);
19701da177e4SLinus Torvalds 
19713ab5aee7SEric Dumazet 	sk_nulls_for_each_from(sk, node) {
1972878628fbSYOSHIFUJI Hideaki 		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
197305dbc7b5SEric Dumazet 			return sk;
19741da177e4SLinus Torvalds 	}
19751da177e4SLinus Torvalds 
197605dbc7b5SEric Dumazet 	spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
197705dbc7b5SEric Dumazet 	++st->bucket;
197805dbc7b5SEric Dumazet 	return established_get_first(seq);
19791da177e4SLinus Torvalds }
19801da177e4SLinus Torvalds 
19811da177e4SLinus Torvalds static void *established_get_idx(struct seq_file *seq, loff_t pos)
19821da177e4SLinus Torvalds {
1983a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
1984a8b690f9STom Herbert 	void *rc;
1985a8b690f9STom Herbert 
1986a8b690f9STom Herbert 	st->bucket = 0;
1987a8b690f9STom Herbert 	rc = established_get_first(seq);
19881da177e4SLinus Torvalds 
19891da177e4SLinus Torvalds 	while (rc && pos) {
19901da177e4SLinus Torvalds 		rc = established_get_next(seq, rc);
19911da177e4SLinus Torvalds 		--pos;
19921da177e4SLinus Torvalds 	}
19931da177e4SLinus Torvalds 	return rc;
19941da177e4SLinus Torvalds }
19951da177e4SLinus Torvalds 
19961da177e4SLinus Torvalds static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
19971da177e4SLinus Torvalds {
19981da177e4SLinus Torvalds 	void *rc;
19991da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
20001da177e4SLinus Torvalds 
20011da177e4SLinus Torvalds 	st->state = TCP_SEQ_STATE_LISTENING;
20021da177e4SLinus Torvalds 	rc	  = listening_get_idx(seq, &pos);
20031da177e4SLinus Torvalds 
20041da177e4SLinus Torvalds 	if (!rc) {
20051da177e4SLinus Torvalds 		st->state = TCP_SEQ_STATE_ESTABLISHED;
20061da177e4SLinus Torvalds 		rc	  = established_get_idx(seq, pos);
20071da177e4SLinus Torvalds 	}
20081da177e4SLinus Torvalds 
20091da177e4SLinus Torvalds 	return rc;
20101da177e4SLinus Torvalds }
20111da177e4SLinus Torvalds 
2012a8b690f9STom Herbert static void *tcp_seek_last_pos(struct seq_file *seq)
2013a8b690f9STom Herbert {
2014a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
2015a8b690f9STom Herbert 	int offset = st->offset;
2016a8b690f9STom Herbert 	int orig_num = st->num;
2017a8b690f9STom Herbert 	void *rc = NULL;
2018a8b690f9STom Herbert 
2019a8b690f9STom Herbert 	switch (st->state) {
2020a8b690f9STom Herbert 	case TCP_SEQ_STATE_OPENREQ:
2021a8b690f9STom Herbert 	case TCP_SEQ_STATE_LISTENING:
2022a8b690f9STom Herbert 		if (st->bucket >= INET_LHTABLE_SIZE)
2023a8b690f9STom Herbert 			break;
2024a8b690f9STom Herbert 		st->state = TCP_SEQ_STATE_LISTENING;
2025a8b690f9STom Herbert 		rc = listening_get_next(seq, NULL);
2026a8b690f9STom Herbert 		while (offset-- && rc)
2027a8b690f9STom Herbert 			rc = listening_get_next(seq, rc);
2028a8b690f9STom Herbert 		if (rc)
2029a8b690f9STom Herbert 			break;
2030a8b690f9STom Herbert 		st->bucket = 0;
203105dbc7b5SEric Dumazet 		st->state = TCP_SEQ_STATE_ESTABLISHED;
2032a8b690f9STom Herbert 		/* Fallthrough */
2033a8b690f9STom Herbert 	case TCP_SEQ_STATE_ESTABLISHED:
2034a8b690f9STom Herbert 		if (st->bucket > tcp_hashinfo.ehash_mask)
2035a8b690f9STom Herbert 			break;
2036a8b690f9STom Herbert 		rc = established_get_first(seq);
2037a8b690f9STom Herbert 		while (offset-- && rc)
2038a8b690f9STom Herbert 			rc = established_get_next(seq, rc);
2039a8b690f9STom Herbert 	}
2040a8b690f9STom Herbert 
2041a8b690f9STom Herbert 	st->num = orig_num;
2042a8b690f9STom Herbert 
2043a8b690f9STom Herbert 	return rc;
2044a8b690f9STom Herbert }
2045a8b690f9STom Herbert 
20461da177e4SLinus Torvalds static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
20471da177e4SLinus Torvalds {
20481da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
2049a8b690f9STom Herbert 	void *rc;
2050a8b690f9STom Herbert 
2051a8b690f9STom Herbert 	if (*pos && *pos == st->last_pos) {
2052a8b690f9STom Herbert 		rc = tcp_seek_last_pos(seq);
2053a8b690f9STom Herbert 		if (rc)
2054a8b690f9STom Herbert 			goto out;
2055a8b690f9STom Herbert 	}
2056a8b690f9STom Herbert 
20571da177e4SLinus Torvalds 	st->state = TCP_SEQ_STATE_LISTENING;
20581da177e4SLinus Torvalds 	st->num = 0;
2059a8b690f9STom Herbert 	st->bucket = 0;
2060a8b690f9STom Herbert 	st->offset = 0;
2061a8b690f9STom Herbert 	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2062a8b690f9STom Herbert 
2063a8b690f9STom Herbert out:
2064a8b690f9STom Herbert 	st->last_pos = *pos;
2065a8b690f9STom Herbert 	return rc;
20661da177e4SLinus Torvalds }
20671da177e4SLinus Torvalds 
20681da177e4SLinus Torvalds static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
20691da177e4SLinus Torvalds {
2070a8b690f9STom Herbert 	struct tcp_iter_state *st = seq->private;
20711da177e4SLinus Torvalds 	void *rc = NULL;
20721da177e4SLinus Torvalds 
20731da177e4SLinus Torvalds 	if (v == SEQ_START_TOKEN) {
20741da177e4SLinus Torvalds 		rc = tcp_get_idx(seq, 0);
20751da177e4SLinus Torvalds 		goto out;
20761da177e4SLinus Torvalds 	}
20771da177e4SLinus Torvalds 
20781da177e4SLinus Torvalds 	switch (st->state) {
20791da177e4SLinus Torvalds 	case TCP_SEQ_STATE_OPENREQ:
20801da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
20811da177e4SLinus Torvalds 		rc = listening_get_next(seq, v);
20821da177e4SLinus Torvalds 		if (!rc) {
20831da177e4SLinus Torvalds 			st->state = TCP_SEQ_STATE_ESTABLISHED;
2084a8b690f9STom Herbert 			st->bucket = 0;
2085a8b690f9STom Herbert 			st->offset = 0;
20861da177e4SLinus Torvalds 			rc	  = established_get_first(seq);
20871da177e4SLinus Torvalds 		}
20881da177e4SLinus Torvalds 		break;
20891da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
20901da177e4SLinus Torvalds 		rc = established_get_next(seq, v);
20911da177e4SLinus Torvalds 		break;
20921da177e4SLinus Torvalds 	}
20931da177e4SLinus Torvalds out:
20941da177e4SLinus Torvalds 	++*pos;
2095a8b690f9STom Herbert 	st->last_pos = *pos;
20961da177e4SLinus Torvalds 	return rc;
20971da177e4SLinus Torvalds }
20981da177e4SLinus Torvalds 
20991da177e4SLinus Torvalds static void tcp_seq_stop(struct seq_file *seq, void *v)
21001da177e4SLinus Torvalds {
21011da177e4SLinus Torvalds 	struct tcp_iter_state *st = seq->private;
21021da177e4SLinus Torvalds 
21031da177e4SLinus Torvalds 	switch (st->state) {
21041da177e4SLinus Torvalds 	case TCP_SEQ_STATE_OPENREQ:
21051da177e4SLinus Torvalds 		if (v) {
2106463c84b9SArnaldo Carvalho de Melo 			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2107b2827053SEric Dumazet 			spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
21081da177e4SLinus Torvalds 		}
21091da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
21101da177e4SLinus Torvalds 		if (v != SEQ_START_TOKEN)
21115caea4eaSEric Dumazet 			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
21121da177e4SLinus Torvalds 		break;
21131da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
21141da177e4SLinus Torvalds 		if (v)
21159db66bdcSEric Dumazet 			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
21161da177e4SLinus Torvalds 		break;
21171da177e4SLinus Torvalds 	}
21181da177e4SLinus Torvalds }
21191da177e4SLinus Torvalds 
212073cb88ecSArjan van de Ven int tcp_seq_open(struct inode *inode, struct file *file)
21211da177e4SLinus Torvalds {
2122d9dda78bSAl Viro 	struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
21231da177e4SLinus Torvalds 	struct tcp_iter_state *s;
212452d6f3f1SDenis V. Lunev 	int err;
21251da177e4SLinus Torvalds 
212652d6f3f1SDenis V. Lunev 	err = seq_open_net(inode, file, &afinfo->seq_ops,
212752d6f3f1SDenis V. Lunev 			  sizeof(struct tcp_iter_state));
212852d6f3f1SDenis V. Lunev 	if (err < 0)
212952d6f3f1SDenis V. Lunev 		return err;
2130f40c8174SDaniel Lezcano 
213152d6f3f1SDenis V. Lunev 	s = ((struct seq_file *)file->private_data)->private;
21321da177e4SLinus Torvalds 	s->family		= afinfo->family;
2133a8b690f9STom Herbert 	s->last_pos		= 0;
2134f40c8174SDaniel Lezcano 	return 0;
2135f40c8174SDaniel Lezcano }
213673cb88ecSArjan van de Ven EXPORT_SYMBOL(tcp_seq_open);
2137f40c8174SDaniel Lezcano 
21386f8b13bcSDaniel Lezcano int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
21391da177e4SLinus Torvalds {
21401da177e4SLinus Torvalds 	int rc = 0;
21411da177e4SLinus Torvalds 	struct proc_dir_entry *p;
21421da177e4SLinus Torvalds 
21439427c4b3SDenis V. Lunev 	afinfo->seq_ops.start		= tcp_seq_start;
21449427c4b3SDenis V. Lunev 	afinfo->seq_ops.next		= tcp_seq_next;
21459427c4b3SDenis V. Lunev 	afinfo->seq_ops.stop		= tcp_seq_stop;
21469427c4b3SDenis V. Lunev 
214784841c3cSDenis V. Lunev 	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
214873cb88ecSArjan van de Ven 			     afinfo->seq_fops, afinfo);
214984841c3cSDenis V. Lunev 	if (!p)
21501da177e4SLinus Torvalds 		rc = -ENOMEM;
21511da177e4SLinus Torvalds 	return rc;
21521da177e4SLinus Torvalds }
21534bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_register);
21541da177e4SLinus Torvalds 
21556f8b13bcSDaniel Lezcano void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
21561da177e4SLinus Torvalds {
2157ece31ffdSGao feng 	remove_proc_entry(afinfo->name, net->proc_net);
21581da177e4SLinus Torvalds }
21594bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_unregister);
21601da177e4SLinus Torvalds 
2161d4f06873SEric Dumazet static void get_openreq4(const struct request_sock *req,
2162652586dfSTetsuo Handa 			 struct seq_file *f, int i, kuid_t uid)
21631da177e4SLinus Torvalds {
21642e6599cbSArnaldo Carvalho de Melo 	const struct inet_request_sock *ireq = inet_rsk(req);
2165fa76ce73SEric Dumazet 	long delta = req->rsk_timer.expires - jiffies;
21661da177e4SLinus Torvalds 
21675e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2168652586dfSTetsuo Handa 		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
21691da177e4SLinus Torvalds 		i,
2170634fb979SEric Dumazet 		ireq->ir_loc_addr,
2171d4f06873SEric Dumazet 		ireq->ir_num,
2172634fb979SEric Dumazet 		ireq->ir_rmt_addr,
2173634fb979SEric Dumazet 		ntohs(ireq->ir_rmt_port),
21741da177e4SLinus Torvalds 		TCP_SYN_RECV,
21751da177e4SLinus Torvalds 		0, 0, /* could print option size, but that is af dependent. */
21761da177e4SLinus Torvalds 		1,    /* timers active (only the expire timer) */
2177a399a805SEric Dumazet 		jiffies_delta_to_clock_t(delta),
2178e6c022a4SEric Dumazet 		req->num_timeout,
2179a7cb5a49SEric W. Biederman 		from_kuid_munged(seq_user_ns(f), uid),
21801da177e4SLinus Torvalds 		0,  /* non standard timer */
21811da177e4SLinus Torvalds 		0, /* open_requests have no inode */
2182d4f06873SEric Dumazet 		0,
2183652586dfSTetsuo Handa 		req);
21841da177e4SLinus Torvalds }
21851da177e4SLinus Torvalds 
2186652586dfSTetsuo Handa static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
21871da177e4SLinus Torvalds {
21881da177e4SLinus Torvalds 	int timer_active;
21891da177e4SLinus Torvalds 	unsigned long timer_expires;
2190cf533ea5SEric Dumazet 	const struct tcp_sock *tp = tcp_sk(sk);
2191cf4c6bf8SIlpo Järvinen 	const struct inet_connection_sock *icsk = inet_csk(sk);
2192cf533ea5SEric Dumazet 	const struct inet_sock *inet = inet_sk(sk);
2193168a8f58SJerry Chu 	struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
2194c720c7e8SEric Dumazet 	__be32 dest = inet->inet_daddr;
2195c720c7e8SEric Dumazet 	__be32 src = inet->inet_rcv_saddr;
2196c720c7e8SEric Dumazet 	__u16 destp = ntohs(inet->inet_dport);
2197c720c7e8SEric Dumazet 	__u16 srcp = ntohs(inet->inet_sport);
219849d09007SEric Dumazet 	int rx_queue;
21991da177e4SLinus Torvalds 
22006ba8a3b1SNandita Dukkipati 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
22016ba8a3b1SNandita Dukkipati 	    icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
22026ba8a3b1SNandita Dukkipati 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
22031da177e4SLinus Torvalds 		timer_active	= 1;
2204463c84b9SArnaldo Carvalho de Melo 		timer_expires	= icsk->icsk_timeout;
2205463c84b9SArnaldo Carvalho de Melo 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
22061da177e4SLinus Torvalds 		timer_active	= 4;
2207463c84b9SArnaldo Carvalho de Melo 		timer_expires	= icsk->icsk_timeout;
2208cf4c6bf8SIlpo Järvinen 	} else if (timer_pending(&sk->sk_timer)) {
22091da177e4SLinus Torvalds 		timer_active	= 2;
2210cf4c6bf8SIlpo Järvinen 		timer_expires	= sk->sk_timer.expires;
22111da177e4SLinus Torvalds 	} else {
22121da177e4SLinus Torvalds 		timer_active	= 0;
22131da177e4SLinus Torvalds 		timer_expires = jiffies;
22141da177e4SLinus Torvalds 	}
22151da177e4SLinus Torvalds 
221649d09007SEric Dumazet 	if (sk->sk_state == TCP_LISTEN)
221749d09007SEric Dumazet 		rx_queue = sk->sk_ack_backlog;
221849d09007SEric Dumazet 	else
221949d09007SEric Dumazet 		/*
222049d09007SEric Dumazet 		 * because we dont lock socket, we might find a transient negative value
222149d09007SEric Dumazet 		 */
222249d09007SEric Dumazet 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
222349d09007SEric Dumazet 
22245e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2225652586dfSTetsuo Handa 			"%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2226cf4c6bf8SIlpo Järvinen 		i, src, srcp, dest, destp, sk->sk_state,
222747da8ee6SSridhar Samudrala 		tp->write_seq - tp->snd_una,
222849d09007SEric Dumazet 		rx_queue,
22291da177e4SLinus Torvalds 		timer_active,
2230a399a805SEric Dumazet 		jiffies_delta_to_clock_t(timer_expires - jiffies),
2231463c84b9SArnaldo Carvalho de Melo 		icsk->icsk_retransmits,
2232a7cb5a49SEric W. Biederman 		from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
22336687e988SArnaldo Carvalho de Melo 		icsk->icsk_probes_out,
2234cf4c6bf8SIlpo Järvinen 		sock_i_ino(sk),
2235cf4c6bf8SIlpo Järvinen 		atomic_read(&sk->sk_refcnt), sk,
22367be87351SStephen Hemminger 		jiffies_to_clock_t(icsk->icsk_rto),
22377be87351SStephen Hemminger 		jiffies_to_clock_t(icsk->icsk_ack.ato),
2238463c84b9SArnaldo Carvalho de Melo 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
22391da177e4SLinus Torvalds 		tp->snd_cwnd,
2240168a8f58SJerry Chu 		sk->sk_state == TCP_LISTEN ?
2241168a8f58SJerry Chu 		    (fastopenq ? fastopenq->max_qlen : 0) :
2242652586dfSTetsuo Handa 		    (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
22431da177e4SLinus Torvalds }
22441da177e4SLinus Torvalds 
2245cf533ea5SEric Dumazet static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2246652586dfSTetsuo Handa 			       struct seq_file *f, int i)
22471da177e4SLinus Torvalds {
2248789f558cSEric Dumazet 	long delta = tw->tw_timer.expires - jiffies;
224923f33c2dSAl Viro 	__be32 dest, src;
22501da177e4SLinus Torvalds 	__u16 destp, srcp;
22511da177e4SLinus Torvalds 
22521da177e4SLinus Torvalds 	dest  = tw->tw_daddr;
22531da177e4SLinus Torvalds 	src   = tw->tw_rcv_saddr;
22541da177e4SLinus Torvalds 	destp = ntohs(tw->tw_dport);
22551da177e4SLinus Torvalds 	srcp  = ntohs(tw->tw_sport);
22561da177e4SLinus Torvalds 
22575e659e4cSPavel Emelyanov 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2258652586dfSTetsuo Handa 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
22591da177e4SLinus Torvalds 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2260a399a805SEric Dumazet 		3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2261652586dfSTetsuo Handa 		atomic_read(&tw->tw_refcnt), tw);
22621da177e4SLinus Torvalds }
22631da177e4SLinus Torvalds 
22641da177e4SLinus Torvalds #define TMPSZ 150
22651da177e4SLinus Torvalds 
22661da177e4SLinus Torvalds static int tcp4_seq_show(struct seq_file *seq, void *v)
22671da177e4SLinus Torvalds {
22681da177e4SLinus Torvalds 	struct tcp_iter_state *st;
226905dbc7b5SEric Dumazet 	struct sock *sk = v;
22701da177e4SLinus Torvalds 
2271652586dfSTetsuo Handa 	seq_setwidth(seq, TMPSZ - 1);
22721da177e4SLinus Torvalds 	if (v == SEQ_START_TOKEN) {
2273652586dfSTetsuo Handa 		seq_puts(seq, "  sl  local_address rem_address   st tx_queue "
22741da177e4SLinus Torvalds 			   "rx_queue tr tm->when retrnsmt   uid  timeout "
22751da177e4SLinus Torvalds 			   "inode");
22761da177e4SLinus Torvalds 		goto out;
22771da177e4SLinus Torvalds 	}
22781da177e4SLinus Torvalds 	st = seq->private;
22791da177e4SLinus Torvalds 
22801da177e4SLinus Torvalds 	switch (st->state) {
22811da177e4SLinus Torvalds 	case TCP_SEQ_STATE_LISTENING:
22821da177e4SLinus Torvalds 	case TCP_SEQ_STATE_ESTABLISHED:
228305dbc7b5SEric Dumazet 		if (sk->sk_state == TCP_TIME_WAIT)
2284652586dfSTetsuo Handa 			get_timewait4_sock(v, seq, st->num);
228505dbc7b5SEric Dumazet 		else
2286652586dfSTetsuo Handa 			get_tcp4_sock(v, seq, st->num);
22871da177e4SLinus Torvalds 		break;
22881da177e4SLinus Torvalds 	case TCP_SEQ_STATE_OPENREQ:
2289d4f06873SEric Dumazet 		get_openreq4(v, seq, st->num, st->uid);
22901da177e4SLinus Torvalds 		break;
22911da177e4SLinus Torvalds 	}
22921da177e4SLinus Torvalds out:
2293652586dfSTetsuo Handa 	seq_pad(seq, '\n');
22941da177e4SLinus Torvalds 	return 0;
22951da177e4SLinus Torvalds }
22961da177e4SLinus Torvalds 
229773cb88ecSArjan van de Ven static const struct file_operations tcp_afinfo_seq_fops = {
229873cb88ecSArjan van de Ven 	.owner   = THIS_MODULE,
229973cb88ecSArjan van de Ven 	.open    = tcp_seq_open,
230073cb88ecSArjan van de Ven 	.read    = seq_read,
230173cb88ecSArjan van de Ven 	.llseek  = seq_lseek,
230273cb88ecSArjan van de Ven 	.release = seq_release_net
230373cb88ecSArjan van de Ven };
230473cb88ecSArjan van de Ven 
23051da177e4SLinus Torvalds static struct tcp_seq_afinfo tcp4_seq_afinfo = {
23061da177e4SLinus Torvalds 	.name		= "tcp",
23071da177e4SLinus Torvalds 	.family		= AF_INET,
230873cb88ecSArjan van de Ven 	.seq_fops	= &tcp_afinfo_seq_fops,
23099427c4b3SDenis V. Lunev 	.seq_ops	= {
23109427c4b3SDenis V. Lunev 		.show		= tcp4_seq_show,
23119427c4b3SDenis V. Lunev 	},
23121da177e4SLinus Torvalds };
23131da177e4SLinus Torvalds 
23142c8c1e72SAlexey Dobriyan static int __net_init tcp4_proc_init_net(struct net *net)
2315757764f6SPavel Emelyanov {
2316757764f6SPavel Emelyanov 	return tcp_proc_register(net, &tcp4_seq_afinfo);
2317757764f6SPavel Emelyanov }
2318757764f6SPavel Emelyanov 
23192c8c1e72SAlexey Dobriyan static void __net_exit tcp4_proc_exit_net(struct net *net)
2320757764f6SPavel Emelyanov {
2321757764f6SPavel Emelyanov 	tcp_proc_unregister(net, &tcp4_seq_afinfo);
2322757764f6SPavel Emelyanov }
2323757764f6SPavel Emelyanov 
2324757764f6SPavel Emelyanov static struct pernet_operations tcp4_net_ops = {
2325757764f6SPavel Emelyanov 	.init = tcp4_proc_init_net,
2326757764f6SPavel Emelyanov 	.exit = tcp4_proc_exit_net,
2327757764f6SPavel Emelyanov };
2328757764f6SPavel Emelyanov 
23291da177e4SLinus Torvalds int __init tcp4_proc_init(void)
23301da177e4SLinus Torvalds {
2331757764f6SPavel Emelyanov 	return register_pernet_subsys(&tcp4_net_ops);
23321da177e4SLinus Torvalds }
23331da177e4SLinus Torvalds 
23341da177e4SLinus Torvalds void tcp4_proc_exit(void)
23351da177e4SLinus Torvalds {
2336757764f6SPavel Emelyanov 	unregister_pernet_subsys(&tcp4_net_ops);
23371da177e4SLinus Torvalds }
23381da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */
23391da177e4SLinus Torvalds 
23401da177e4SLinus Torvalds struct proto tcp_prot = {
23411da177e4SLinus Torvalds 	.name			= "TCP",
23421da177e4SLinus Torvalds 	.owner			= THIS_MODULE,
23431da177e4SLinus Torvalds 	.close			= tcp_close,
23441da177e4SLinus Torvalds 	.connect		= tcp_v4_connect,
23451da177e4SLinus Torvalds 	.disconnect		= tcp_disconnect,
2346463c84b9SArnaldo Carvalho de Melo 	.accept			= inet_csk_accept,
23471da177e4SLinus Torvalds 	.ioctl			= tcp_ioctl,
23481da177e4SLinus Torvalds 	.init			= tcp_v4_init_sock,
23491da177e4SLinus Torvalds 	.destroy		= tcp_v4_destroy_sock,
23501da177e4SLinus Torvalds 	.shutdown		= tcp_shutdown,
23511da177e4SLinus Torvalds 	.setsockopt		= tcp_setsockopt,
23521da177e4SLinus Torvalds 	.getsockopt		= tcp_getsockopt,
23531da177e4SLinus Torvalds 	.recvmsg		= tcp_recvmsg,
23547ba42910SChangli Gao 	.sendmsg		= tcp_sendmsg,
23557ba42910SChangli Gao 	.sendpage		= tcp_sendpage,
23561da177e4SLinus Torvalds 	.backlog_rcv		= tcp_v4_do_rcv,
235746d3ceabSEric Dumazet 	.release_cb		= tcp_release_cb,
2358ab1e0a13SArnaldo Carvalho de Melo 	.hash			= inet_hash,
2359ab1e0a13SArnaldo Carvalho de Melo 	.unhash			= inet_unhash,
2360ab1e0a13SArnaldo Carvalho de Melo 	.get_port		= inet_csk_get_port,
23611da177e4SLinus Torvalds 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2362c9bee3b7SEric Dumazet 	.stream_memory_free	= tcp_stream_memory_free,
23631da177e4SLinus Torvalds 	.sockets_allocated	= &tcp_sockets_allocated,
23640a5578cfSArnaldo Carvalho de Melo 	.orphan_count		= &tcp_orphan_count,
23651da177e4SLinus Torvalds 	.memory_allocated	= &tcp_memory_allocated,
23661da177e4SLinus Torvalds 	.memory_pressure	= &tcp_memory_pressure,
2367a4fe34bfSEric W. Biederman 	.sysctl_mem		= sysctl_tcp_mem,
23681da177e4SLinus Torvalds 	.sysctl_wmem		= sysctl_tcp_wmem,
23691da177e4SLinus Torvalds 	.sysctl_rmem		= sysctl_tcp_rmem,
23701da177e4SLinus Torvalds 	.max_header		= MAX_TCP_HEADER,
23711da177e4SLinus Torvalds 	.obj_size		= sizeof(struct tcp_sock),
23723ab5aee7SEric Dumazet 	.slab_flags		= SLAB_DESTROY_BY_RCU,
23736d6ee43eSArnaldo Carvalho de Melo 	.twsk_prot		= &tcp_timewait_sock_ops,
237460236fddSArnaldo Carvalho de Melo 	.rsk_prot		= &tcp_request_sock_ops,
237539d8cda7SPavel Emelyanov 	.h.hashinfo		= &tcp_hashinfo,
23767ba42910SChangli Gao 	.no_autobind		= true,
2377543d9cfeSArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT
2378543d9cfeSArnaldo Carvalho de Melo 	.compat_setsockopt	= compat_tcp_setsockopt,
2379543d9cfeSArnaldo Carvalho de Melo 	.compat_getsockopt	= compat_tcp_getsockopt,
2380543d9cfeSArnaldo Carvalho de Melo #endif
2381c255a458SAndrew Morton #ifdef CONFIG_MEMCG_KMEM
2382d1a4c0b3SGlauber Costa 	.init_cgroup		= tcp_init_cgroup,
2383d1a4c0b3SGlauber Costa 	.destroy_cgroup		= tcp_destroy_cgroup,
2384d1a4c0b3SGlauber Costa 	.proto_cgroup		= tcp_proto_cgroup,
2385d1a4c0b3SGlauber Costa #endif
23861da177e4SLinus Torvalds };
23874bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_prot);
23881da177e4SLinus Torvalds 
2389046ee902SDenis V. Lunev static void __net_exit tcp_sk_exit(struct net *net)
2390046ee902SDenis V. Lunev {
2391bdbbb852SEric Dumazet 	int cpu;
2392bdbbb852SEric Dumazet 
2393bdbbb852SEric Dumazet 	for_each_possible_cpu(cpu)
2394bdbbb852SEric Dumazet 		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2395bdbbb852SEric Dumazet 	free_percpu(net->ipv4.tcp_sk);
2396bdbbb852SEric Dumazet }
2397bdbbb852SEric Dumazet 
2398bdbbb852SEric Dumazet static int __net_init tcp_sk_init(struct net *net)
2399bdbbb852SEric Dumazet {
2400bdbbb852SEric Dumazet 	int res, cpu;
2401bdbbb852SEric Dumazet 
2402bdbbb852SEric Dumazet 	net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2403bdbbb852SEric Dumazet 	if (!net->ipv4.tcp_sk)
2404bdbbb852SEric Dumazet 		return -ENOMEM;
2405bdbbb852SEric Dumazet 
2406bdbbb852SEric Dumazet 	for_each_possible_cpu(cpu) {
2407bdbbb852SEric Dumazet 		struct sock *sk;
2408bdbbb852SEric Dumazet 
2409bdbbb852SEric Dumazet 		res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2410bdbbb852SEric Dumazet 					   IPPROTO_TCP, net);
2411bdbbb852SEric Dumazet 		if (res)
2412bdbbb852SEric Dumazet 			goto fail;
2413bdbbb852SEric Dumazet 		*per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2414bdbbb852SEric Dumazet 	}
241549213555SDaniel Borkmann 
2416bdbbb852SEric Dumazet 	net->ipv4.sysctl_tcp_ecn = 2;
241749213555SDaniel Borkmann 	net->ipv4.sysctl_tcp_ecn_fallback = 1;
241849213555SDaniel Borkmann 
2419b0f9ca53SFan Du 	net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
24206b58e0a5SFan Du 	net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
242105cbc0dbSFan Du 	net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
2422bdbbb852SEric Dumazet 
242349213555SDaniel Borkmann 	return 0;
2424bdbbb852SEric Dumazet fail:
2425bdbbb852SEric Dumazet 	tcp_sk_exit(net);
2426bdbbb852SEric Dumazet 
2427bdbbb852SEric Dumazet 	return res;
2428b099ce26SEric W. Biederman }
2429b099ce26SEric W. Biederman 
2430b099ce26SEric W. Biederman static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2431b099ce26SEric W. Biederman {
2432b099ce26SEric W. Biederman 	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2433046ee902SDenis V. Lunev }
2434046ee902SDenis V. Lunev 
2435046ee902SDenis V. Lunev static struct pernet_operations __net_initdata tcp_sk_ops = {
2436046ee902SDenis V. Lunev        .init	   = tcp_sk_init,
2437046ee902SDenis V. Lunev        .exit	   = tcp_sk_exit,
2438b099ce26SEric W. Biederman        .exit_batch = tcp_sk_exit_batch,
2439046ee902SDenis V. Lunev };
2440046ee902SDenis V. Lunev 
24419b0f976fSDenis V. Lunev void __init tcp_v4_init(void)
24421da177e4SLinus Torvalds {
24435caea4eaSEric Dumazet 	inet_hashinfo_init(&tcp_hashinfo);
24446a1b3054SEric W. Biederman 	if (register_pernet_subsys(&tcp_sk_ops))
24451da177e4SLinus Torvalds 		panic("Failed to create the TCP control socket.\n");
24461da177e4SLinus Torvalds }
2447